Coverage Report

Created: 2024-11-18 12:21

/root/doris/be/src/olap/iterators.h
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <cstddef>
21
#include <memory>
22
23
#include "common/status.h"
24
#include "io/io_common.h"
25
#include "olap/block_column_predicate.h"
26
#include "olap/column_predicate.h"
27
#include "olap/olap_common.h"
28
#include "olap/rowset/segment_v2/row_ranges.h"
29
#include "olap/tablet_schema.h"
30
#include "runtime/runtime_state.h"
31
#include "vec/core/block.h"
32
#include "vec/exprs/vexpr.h"
33
34
namespace doris {
35
36
class RowCursor;
37
class Schema;
38
class ColumnPredicate;
39
40
namespace vectorized {
41
struct IteratorRowRef;
42
};
43
44
namespace segment_v2 {
45
struct StreamReader;
46
}
47
48
class StorageReadOptions {
49
public:
50
    struct KeyRange {
51
        KeyRange()
52
                : lower_key(nullptr),
53
                  include_lower(false),
54
                  upper_key(nullptr),
55
0
                  include_upper(false) {}
56
57
        KeyRange(const RowCursor* lower_key_, bool include_lower_, const RowCursor* upper_key_,
58
                 bool include_upper_)
59
                : lower_key(lower_key_),
60
                  include_lower(include_lower_),
61
                  upper_key(upper_key_),
62
0
                  include_upper(include_upper_) {}
63
64
        // the lower bound of the range, nullptr if not existed
65
        const RowCursor* lower_key = nullptr;
66
        // whether `lower_key` is included in the range
67
        bool include_lower;
68
        // the upper bound of the range, nullptr if not existed
69
        const RowCursor* upper_key = nullptr;
70
        // whether `upper_key` is included in the range
71
        bool include_upper;
72
    };
73
74
    // reader's key ranges, empty if not existed.
75
    // used by short key index to filter row blocks
76
    std::vector<KeyRange> key_ranges;
77
78
    // For unique-key merge-on-write, the effect is similar to delete_conditions
79
    // that filters out rows that are deleted in realtime.
80
    // For a particular row, if delete_bitmap.contains(rowid) means that row is
81
    // marked deleted and invisible to user anymore.
82
    // segment_id -> roaring::Roaring*
83
    std::unordered_map<uint32_t, std::shared_ptr<roaring::Roaring>> delete_bitmap;
84
85
    std::shared_ptr<AndBlockColumnPredicate> delete_condition_predicates =
86
            AndBlockColumnPredicate::create_shared();
87
    // reader's column predicate, nullptr if not existed
88
    // used to fiter rows in row block
89
    std::vector<ColumnPredicate*> column_predicates;
90
    std::unordered_map<int32_t, std::shared_ptr<AndBlockColumnPredicate>> col_id_to_predicates;
91
    std::unordered_map<int32_t, std::vector<const ColumnPredicate*>> del_predicates_for_zone_map;
92
    TPushAggOp::type push_down_agg_type_opt = TPushAggOp::NONE;
93
94
    // REQUIRED (null is not allowed)
95
    OlapReaderStatistics* stats = nullptr;
96
    bool use_page_cache = false;
97
    int block_row_max = 4096 - 32; // see https://github.com/apache/doris/pull/11816
98
99
    TabletSchemaSPtr tablet_schema = nullptr;
100
    bool enable_unique_key_merge_on_write = false;
101
    bool record_rowids = false;
102
    // flag for enable topn opt
103
    bool use_topn_opt = false;
104
    std::vector<int> topn_filter_source_node_ids;
105
    // used for special optimization for query : ORDER BY key DESC LIMIT n
106
    bool read_orderby_key_reverse = false;
107
    // columns for orderby keys
108
    std::vector<uint32_t>* read_orderby_key_columns = nullptr;
109
    io::IOContext io_ctx;
110
    vectorized::VExpr* remaining_vconjunct_root = nullptr;
111
    std::vector<vectorized::VExprSPtr> remaining_conjunct_roots;
112
    vectorized::VExprContextSPtrs common_expr_ctxs_push_down;
113
    const std::set<int32_t>* output_columns = nullptr;
114
    // runtime state
115
    RuntimeState* runtime_state = nullptr;
116
    RowsetId rowset_id;
117
    Version version;
118
    int64_t tablet_id = 0;
119
    // slots that cast may be eliminated in storage layer
120
    std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
121
    RowRanges row_ranges;
122
    size_t topn_limit = 0;
123
};
124
125
struct CompactionSampleInfo {
126
    int64_t bytes = 0;
127
    int64_t rows = 0;
128
    int64_t group_data_size;
129
};
130
131
class RowwiseIterator;
132
using RowwiseIteratorUPtr = std::unique_ptr<RowwiseIterator>;
133
class RowwiseIterator {
134
public:
135
9.44k
    RowwiseIterator() = default;
136
9.44k
    virtual ~RowwiseIterator() = default;
137
138
    // Initialize this iterator and make it ready to read with
139
    // input options.
140
    // Input options may contain scan range in which this scan.
141
    // Return Status::OK() if init successfully,
142
    // Return other error otherwise
143
0
    virtual Status init(const StorageReadOptions& opts) {
144
0
        return Status::NotSupported("to be implemented");
145
0
    }
146
147
0
    virtual Status init(const StorageReadOptions& opts, CompactionSampleInfo* sample_info) {
148
0
        return Status::NotSupported("to be implemented");
149
0
    }
150
151
    // If there is any valid data, this function will load data
152
    // into input batch with Status::OK() returned
153
    // If there is no data to read, will return Status::EndOfFile.
154
    // If other error happens, other error code will be returned.
155
0
    virtual Status next_batch(vectorized::Block* block) {
156
0
        return Status::NotSupported("to be implemented");
157
0
    }
158
159
0
    virtual Status next_block_view(vectorized::BlockView* block_view) {
160
0
        return Status::NotSupported("to be implemented");
161
0
    }
162
163
0
    virtual Status next_row(vectorized::IteratorRowRef* ref) {
164
0
        return Status::NotSupported("to be implemented");
165
0
    }
166
0
    virtual Status unique_key_next_row(vectorized::IteratorRowRef* ref) {
167
0
        return Status::NotSupported("to be implemented");
168
0
    }
169
170
0
    virtual bool support_return_data_by_ref() { return false; }
171
172
0
    virtual Status current_block_row_locations(std::vector<RowLocation>* block_row_locations) {
173
0
        return Status::NotSupported("to be implemented");
174
0
    }
175
176
    // return schema for this Iterator
177
    virtual const Schema& schema() const = 0;
178
179
    // Only used by UT. Whether lazy-materialization-read is used by this iterator or not.
180
0
    virtual bool is_lazy_materialization_read() const { return false; }
181
182
    // Return the data id such as segment id, used for keep the insert order when do
183
    // merge sort in priority queue
184
1.60k
    virtual uint64_t data_id() const { return 0; }
185
186
0
    virtual bool update_profile(RuntimeProfile* profile) { return false; }
187
    // return rows merged count by iterator
188
0
    virtual uint64_t merged_rows() const { return 0; }
189
190
    // return if it's an empty iterator
191
4.81k
    virtual bool empty() const { return false; }
192
};
193
194
} // namespace doris