Coverage Report

Created: 2026-07-02 10:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/iterators.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <memory>
22
#include <set>
23
24
#include "common/status.h"
25
#include "core/block/block.h"
26
#include "exprs/score_runtime.h"
27
#include "exprs/vexpr.h"
28
#include "io/io_common.h"
29
#include "runtime/runtime_state.h"
30
#include "storage/index/ann/ann_topn_runtime.h"
31
#include "storage/olap_common.h"
32
#include "storage/predicate/block_column_predicate.h"
33
#include "storage/predicate/column_predicate.h"
34
#include "storage/row_cursor.h"
35
#include "storage/segment/row_ranges.h"
36
#include "storage/tablet/tablet_schema.h"
37
38
namespace doris {
39
40
class Schema;
41
class ColumnPredicate;
42
43
struct IteratorRowRef;
44
45
namespace segment_v2 {
46
struct SubstreamIterator;
47
}
48
class StorageReadOptions {
49
public:
50
    struct KeyRange {
51
        KeyRange()
52
                : lower_key(nullptr),
53
                  include_lower(false),
54
                  upper_key(nullptr),
55
0
                  include_upper(false) {}
56
57
        KeyRange(const RowCursor* lower_key_, bool include_lower_, const RowCursor* upper_key_,
58
                 bool include_upper_)
59
4.39M
                : lower_key(lower_key_),
60
4.39M
                  include_lower(include_lower_),
61
4.39M
                  upper_key(upper_key_),
62
4.39M
                  include_upper(include_upper_) {}
63
64
        // the lower bound of the range, nullptr if not existed
65
        const RowCursor* lower_key = nullptr;
66
        // whether `lower_key` is included in the range
67
        bool include_lower;
68
        // the upper bound of the range, nullptr if not existed
69
        const RowCursor* upper_key = nullptr;
70
        // whether `upper_key` is included in the range
71
        bool include_upper;
72
73
4.33M
        uint64_t get_digest(uint64_t seed) const {
74
4.33M
            if (lower_key != nullptr) {
75
4.33M
                auto key_str = lower_key->to_string();
76
4.33M
                seed = HashUtil::hash64(key_str.c_str(), key_str.size(), seed);
77
4.33M
                seed = HashUtil::hash64(&include_lower, sizeof(include_lower), seed);
78
4.33M
            }
79
80
4.33M
            if (upper_key != nullptr) {
81
4.33M
                auto key_str = upper_key->to_string();
82
4.33M
                seed = HashUtil::hash64(key_str.c_str(), key_str.size(), seed);
83
4.33M
                seed = HashUtil::hash64(&include_upper, sizeof(include_upper), seed);
84
4.33M
            }
85
86
4.33M
            return seed;
87
4.33M
        }
88
    };
89
90
    // reader's key ranges, empty if not existed.
91
    // used by short key index to filter row blocks
92
    std::vector<KeyRange> key_ranges;
93
94
    // For unique-key merge-on-write, the effect is similar to delete_conditions
95
    // that filters out rows that are deleted in realtime.
96
    // For a particular row, if delete_bitmap.contains(rowid) means that row is
97
    // marked deleted and invisible to user anymore.
98
    // segment_id -> roaring::Roaring*
99
    std::unordered_map<uint32_t, std::shared_ptr<roaring::Roaring>> delete_bitmap;
100
101
    std::shared_ptr<AndBlockColumnPredicate> delete_condition_predicates =
102
            AndBlockColumnPredicate::create_shared();
103
    // reader's column predicate, nullptr if not existed
104
    // used to fiter rows in row block
105
    std::vector<std::shared_ptr<ColumnPredicate>> column_predicates;
106
    std::unordered_map<int32_t, std::shared_ptr<AndBlockColumnPredicate>> col_id_to_predicates;
107
    std::unordered_map<int32_t, std::vector<std::shared_ptr<const ColumnPredicate>>>
108
            del_predicates_for_zone_map;
109
    TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE;
110
111
    // REQUIRED (null is not allowed)
112
    OlapReaderStatistics* stats = nullptr;
113
    bool use_page_cache = false;
114
    uint32_t block_row_max = 4096 - 32; // see https://github.com/apache/doris/pull/11816
115
    // Effective adaptive batch size byte budget.
116
    size_t preferred_block_size_bytes = 8388608UL;
117
118
    TabletSchemaSPtr tablet_schema = nullptr;
119
    bool enable_unique_key_merge_on_write = false;
120
    bool record_rowids = false;
121
    std::vector<int> topn_filter_source_node_ids;
122
    int topn_filter_target_node_id = -1;
123
    // used for special optimization for query : ORDER BY key DESC LIMIT n
124
    bool read_orderby_key_reverse = false;
125
    // For rows with the same key, use ascending order (small-to-large) for tie-breakers.
126
    // For example, use lower rowset version / segment id first.
127
    bool use_insert_order_when_same = false;
128
    int binlog_lsn_idx = -1;
129
    // columns for orderby keys
130
    std::vector<uint32_t>* read_orderby_key_columns = nullptr;
131
    io::IOContext io_ctx;
132
    VExprContextSPtrs common_expr_ctxs_push_down;
133
    const std::set<int32_t>* output_columns = nullptr;
134
    // Extra storage key columns that are included only to keep the scan schema
135
    // aligned with the storage key prefix. SegmentIterator can synthesize
136
    // placeholders only after proving predicates, delete conditions, and
137
    // expressions do not need their real values.
138
    std::set<ColumnId> extra_columns;
139
    // runtime state
140
    RuntimeState* runtime_state = nullptr;
141
    RowsetId rowset_id;
142
    Version version;
143
    TsoRange commit_tso;
144
    int64_t tablet_id = 0;
145
    // slots that cast may be eliminated in storage layer
146
    std::map<std::string, DataTypePtr> target_cast_type_for_variants;
147
    RowRanges row_ranges;
148
149
    // Per-segment row budget pushed down from the scanner (topn or general
150
    // limit). SegmentIterator applies it after predicate/common-expr filtering;
151
    // _can_opt_limit_reads() only decides whether the pre-filter read can also
152
    // be capped. 0 disables the optimization.
153
    size_t read_limit = 0;
154
155
    std::map<ColumnId, VExprContextSPtr> virtual_column_exprs;
156
    std::shared_ptr<segment_v2::AnnTopNRuntime> ann_topn_runtime;
157
158
    std::map<int32_t, TColumnAccessPaths> all_access_paths;
159
    std::map<int32_t, TColumnAccessPaths> predicate_access_paths;
160
161
    std::shared_ptr<ScoreRuntime> score_runtime;
162
    CollectionStatisticsPtr collection_statistics;
163
164
    // Cache for sparse column data to avoid redundant reads
165
    // col_unique_id -> cached column_ptr
166
    std::unordered_map<int32_t, ColumnPtr> sparse_column_cache;
167
168
    uint64_t condition_cache_digest = 0;
169
};
170
171
struct CompactionSampleInfo {
172
    int64_t bytes = 0;
173
    int64_t rows = 0;
174
    int64_t group_data_size = 0;
175
    int64_t null_count = 0; // Number of NULL cells in this column group
176
};
177
178
struct BlockWithSameBit {
179
    Block* block;
180
    std::vector<bool>& same_bit;
181
182
201
    bool empty() const { return block->rows() == 0; }
183
};
184
185
class RowwiseIterator;
186
using RowwiseIteratorUPtr = std::unique_ptr<RowwiseIterator>;
187
class RowwiseIterator {
188
public:
189
5.32M
    RowwiseIterator() = default;
190
5.33M
    virtual ~RowwiseIterator() = default;
191
192
    // Initialize this iterator and make it ready to read with
193
    // input options.
194
    // Input options may contain scan range in which this scan.
195
    // Return Status::OK() if init successfully,
196
    // Return other error otherwise
197
0
    virtual Status init(const StorageReadOptions& opts) {
198
0
        return Status::InternalError("to be implemented, current class: " +
199
0
                                     demangle(typeid(*this).name()));
200
0
    }
201
202
0
    virtual Status init(const StorageReadOptions& opts, CompactionSampleInfo* sample_info) {
203
0
        return Status::InternalError("should not reach here, current class: " +
204
0
                                     demangle(typeid(*this).name()));
205
0
    }
206
207
    // If there is any valid data, this function will load data
208
    // into input batch with Status::OK() returned
209
    // If there is no data to read, will return Status::EndOfFile.
210
    // If other error happens, other error code will be returned.
211
0
    virtual Status next_batch(Block* block) {
212
0
        return Status::InternalError("should not reach here, current class: " +
213
0
                                     demangle(typeid(*this).name()));
214
0
    }
215
216
0
    virtual Status next_batch(BlockWithSameBit* block_with_same_bit) {
217
0
        return Status::InternalError("should not reach here, current class: " +
218
0
                                     demangle(typeid(*this).name()));
219
0
    }
220
221
0
    virtual Status next_batch(BlockView* block_view) {
222
0
        return Status::InternalError("should not reach here, current class: " +
223
0
                                     demangle(typeid(*this).name()));
224
0
    }
225
226
0
    virtual Status next_row(IteratorRowRef* ref) {
227
0
        return Status::InternalError("should not reach here, current class: " +
228
0
                                     demangle(typeid(*this).name()));
229
0
    }
230
0
    virtual Status unique_key_next_row(IteratorRowRef* ref) {
231
0
        return Status::InternalError("should not reach here, current class: " +
232
0
                                     demangle(typeid(*this).name()));
233
0
    }
234
235
0
    virtual bool is_merge_iterator() const { return false; }
236
237
0
    virtual Status current_block_row_locations(std::vector<RowLocation>* block_row_locations) {
238
0
        return Status::InternalError("should not reach here, current class: " +
239
0
                                     demangle(typeid(*this).name()));
240
0
    }
241
242
    // return schema for this Iterator
243
    virtual const Schema& schema() const = 0;
244
245
    // Return the data id such as segment id, used for keep the insert order when do
246
    // merge sort in priority queue
247
26.2k
    virtual uint64_t data_id() const { return 0; }
248
249
9.82k
    virtual void update_profile(RuntimeProfile* profile) {}
250
    // return rows merged count by iterator
251
0
    virtual uint64_t merged_rows() const { return 0; }
252
253
    // return if it's an empty iterator
254
1.80M
    virtual bool empty() const { return false; }
255
};
256
257
} // namespace doris