Coverage Report

Created: 2026-03-14 20:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/segment/segment_prefetcher.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <cstdint>
22
#include <roaring/roaring.hh>
23
#include <vector>
24
25
#include "common/status.h"
26
#include "storage/segment/common.h"
27
28
namespace doris {
29
namespace io {
30
class FileReader;
31
} // namespace io
32
class StorageReadOptions;
33
34
namespace segment_v2 {
35
class OrdinalIndexReader;
36
class ColumnReader;
37
38
enum class PrefetcherInitMethod : int { FROM_ROWIDS = 0, ALL_DATA_BLOCKS = 1 };
39
40
// Configuration for segment prefetcher
41
struct SegmentPrefetcherConfig {
42
    // Number of file cache blocks to prefetch ahead
43
    size_t prefetch_window_size = 4;
44
45
    // File cache block size in bytes (default 1MB)
46
    size_t block_size = 1024 * 1024;
47
48
    SegmentPrefetcherConfig(size_t window_size, size_t blk_size)
49
0
            : prefetch_window_size(window_size), block_size(blk_size) {}
50
};
51
52
// Block range representing [offset, offset + size) in the segment file
53
struct BlockRange {
54
    uint64_t offset;
55
    uint64_t size;
56
57
0
    BlockRange(uint64_t off, uint64_t sz) : offset(off), size(sz) {}
58
59
0
    bool operator==(const BlockRange& other) const {
60
0
        return offset == other.offset && size == other.size;
61
0
    }
62
};
63
64
// Represents a block with its first rowid for reading
65
struct BlockInfo {
66
    size_t block_id;
67
    rowid_t first_rowid;
68
69
0
    BlockInfo(size_t bid, rowid_t rid) : block_id(bid), first_rowid(rid) {}
70
};
71
72
struct SegmentPrefetchParams {
73
    SegmentPrefetcherConfig config;
74
    const StorageReadOptions& read_options;
75
};
76
77
// SegmentPrefetcher maintains block sequence and triggers prefetch to keep
78
// N blocks ahead of current reading position.
79
//
80
// Key design:
81
// - Monotonic reading: rowids are read in order (forward or backward)
82
// - Trigger condition: when current_rowid reaches a block boundary, prefetch next N blocks
83
// - No deduplication needed: reading is monotonic, blocks are naturally processed in order
84
class SegmentPrefetcher {
85
public:
86
0
    explicit SegmentPrefetcher(const SegmentPrefetcherConfig& config) : _config(config) {}
87
88
0
    ~SegmentPrefetcher() = default;
89
90
    Status init(std::shared_ptr<ColumnReader> column_reader,
91
                const StorageReadOptions& read_options);
92
93
    bool need_prefetch(rowid_t current_rowid, std::vector<BlockRange>* out_ranges);
94
95
    static void build_blocks_by_rowids(const roaring::Roaring& row_bitmap,
96
                                       const std::vector<SegmentPrefetcher*>& prefetchers);
97
    void begin_build_blocks_by_rowids();
98
    void add_rowids(const rowid_t* rowids, uint32_t num);
99
    void finish_build_blocks_by_rowids();
100
101
    void build_all_data_blocks();
102
103
private:
104
    // Parameters:
105
    //   row_bitmap: The complete bitmap of rowids to scan
106
    //   ordinal_index: Ordinal index reader (must be loaded)
107
    //
108
    // For forward reading: first_rowid is the first rowid we need to read in each block
109
    // For backward reading: first_rowid is the last rowid we need to read in each block
110
    //   (since we read backwards, this is the first one we'll encounter)
111
    void _build_block_sequence_from_bitmap(const roaring::Roaring& row_bitmap,
112
                                           OrdinalIndexReader* ordinal_index);
113
0
    size_t _offset_to_block_id(uint64_t offset) const { return offset / _config.block_size; }
114
115
0
    BlockRange _block_id_to_range(size_t block_id) const {
116
0
        return {block_id * _config.block_size, _config.block_size};
117
0
    }
118
119
0
    int window_size() const { return _prefetched_index - _current_block_index + 1; }
120
121
0
    std::string debug_string() const {
122
0
        return fmt::format(
123
0
                "[internal state] _is_forward={}, _prefetched_index={}, _current_block_index={}, "
124
0
                "window_size={}, block.size()={}, path={}",
125
0
                _is_forward, _prefetched_index, _current_block_index, window_size(),
126
0
                _block_sequence.size(), _path);
127
0
    }
128
129
    void reset_blocks();
130
131
private:
132
    SegmentPrefetcherConfig _config;
133
    std::string _path;
134
135
    // Sequence of blocks with their first rowid (in reading order)
136
    std::vector<BlockInfo> _block_sequence;
137
138
    bool _is_forward = true;
139
140
    int _prefetched_index = -1;
141
    int _current_block_index = 0;
142
143
    int page_idx = 0;
144
    // For each page, track the first rowid we need to read
145
    // For forward: the smallest rowid in this page
146
    // For backward: the largest rowid in this page (first one we'll encounter when reading backwards)
147
    size_t last_block_id = static_cast<size_t>(-1);
148
    rowid_t current_block_first_rowid = 0;
149
150
    OrdinalIndexReader* ordinal_index = nullptr;
151
};
152
153
} // namespace segment_v2
154
} // namespace doris