Coverage Report

Created: 2026-03-26 15:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/exprs/vexpr_context.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
22
#include <algorithm>
23
#include <cstddef>
24
#include <memory>
25
#include <unordered_map>
26
#include <utility>
27
#include <vector>
28
29
#include "common/factory_creator.h"
30
#include "common/status.h"
31
#include "core/block/block.h"
32
#include "core/block/column_with_type_and_name.h"
33
#include "core/column/column.h"
34
#include "exec/runtime_filter/runtime_filter_selectivity.h"
35
#include "exprs/function_context.h"
36
#include "exprs/vexpr_fwd.h"
37
#include "runtime/runtime_state.h"
38
#include "storage/index/ann/ann_range_search_runtime.h"
39
#include "storage/index/ann/ann_search_params.h"
40
#include "storage/index/inverted/inverted_index_reader.h"
41
#include "storage/segment/column_reader.h"
42
43
namespace doris {
44
class RowDescriptor;
45
class RuntimeState;
46
} // namespace doris
47
48
namespace doris::segment_v2 {
49
class Segment;
50
class ColumnIterator;
51
class Segment;
52
} // namespace doris::segment_v2
53
54
namespace doris {
55
56
class ScoreRuntime;
57
using ScoreRuntimeSPtr = std::shared_ptr<ScoreRuntime>;
58
59
class IndexExecContext {
60
public:
61
    IndexExecContext(const std::vector<ColumnId>& col_ids,
62
                     const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& index_iterators,
63
                     const std::vector<IndexFieldNameAndTypePair>& storage_name_and_type_vec,
64
                     std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>>&
65
                             common_expr_index_status,
66
                     ScoreRuntimeSPtr score_runtime, segment_v2::Segment* segment = nullptr,
67
                     const segment_v2::ColumnIteratorOptions& column_iter_opts = {})
68
5.65k
            : _col_ids(col_ids),
69
5.65k
              _index_iterators(index_iterators),
70
5.65k
              _storage_name_and_type(storage_name_and_type_vec),
71
5.65k
              _expr_index_status(common_expr_index_status),
72
5.65k
              _score_runtime(std::move(score_runtime)),
73
5.65k
              _segment(segment),
74
5.65k
              _column_iter_opts(column_iter_opts) {}
75
76
3
    segment_v2::IndexIterator* get_inverted_index_iterator_by_column_id(int column_index) const {
77
3
        if (column_index < 0 || column_index >= _col_ids.size()) {
78
0
            return nullptr;
79
0
        }
80
3
        const auto& column_id = _col_ids[column_index];
81
3
        if (column_id >= _index_iterators.size()) {
82
0
            return nullptr;
83
0
        }
84
3
        if (!_index_iterators[column_id]) {
85
1
            return nullptr;
86
1
        }
87
2
        return _index_iterators[column_id].get();
88
3
    }
89
90
0
    segment_v2::IndexIterator* get_inverted_index_iterator_by_id(ColumnId column_id) const {
91
0
        if (column_id >= _index_iterators.size()) {
92
0
            return nullptr;
93
0
        }
94
0
        if (!_index_iterators[column_id]) {
95
0
            return nullptr;
96
0
        }
97
0
        return _index_iterators[column_id].get();
98
0
    }
99
100
    const IndexFieldNameAndTypePair* get_storage_name_and_type_by_column_id(
101
3
            int column_index) const {
102
3
        if (column_index < 0 || column_index >= _col_ids.size()) {
103
0
            return nullptr;
104
0
        }
105
3
        const auto& column_id = _col_ids[column_index];
106
3
        if (column_id >= _storage_name_and_type.size()) {
107
2
            return nullptr;
108
2
        }
109
1
        return &_storage_name_and_type[column_id];
110
3
    }
111
112
0
    const IndexFieldNameAndTypePair* get_storage_name_and_type_by_id(ColumnId column_id) const {
113
0
        if (column_id >= _storage_name_and_type.size()) {
114
0
            return nullptr;
115
0
        }
116
0
        return &_storage_name_and_type[column_id];
117
0
    }
118
119
0
    int column_index_by_id(ColumnId column_id) const {
120
0
        for (int i = 0; i < _col_ids.size(); ++i) {
121
0
            if (_col_ids[i] == column_id) {
122
0
                return i;
123
0
            }
124
0
        }
125
0
        return -1;
126
0
    }
127
128
0
    bool get_column_id(int column_index, ColumnId* column_id) const {
129
0
        if (column_id == nullptr) {
130
0
            return false;
131
0
        }
132
0
        if (column_index < 0 || column_index >= _col_ids.size()) {
133
0
            return false;
134
0
        }
135
0
        *column_id = _col_ids[column_index];
136
0
        return true;
137
0
    }
138
139
0
    segment_v2::Segment* segment() const { return _segment; }
140
141
0
    const segment_v2::ColumnIteratorOptions& column_iter_opts() const { return _column_iter_opts; }
142
143
1
    bool has_index_result_for_expr(const VExpr* expr) const {
144
1
        return _index_result_bitmap.contains(expr);
145
1
    }
146
147
    void set_index_result_for_expr(const VExpr* expr,
148
1
                                   segment_v2::InvertedIndexResultBitmap bitmap) {
149
1
        _index_result_bitmap[expr] = std::move(bitmap);
150
1
    }
151
152
    std::unordered_map<const VExpr*, segment_v2::InvertedIndexResultBitmap>&
153
0
    get_index_result_bitmap() {
154
0
        return _index_result_bitmap;
155
0
    }
156
157
2
    std::unordered_map<const VExpr*, ColumnPtr>& get_index_result_column() {
158
2
        return _index_result_column;
159
2
    }
160
161
0
    const segment_v2::InvertedIndexResultBitmap* get_index_result_for_expr(const VExpr* expr) {
162
0
        auto iter = _index_result_bitmap.find(expr);
163
0
        if (iter == _index_result_bitmap.end()) {
164
0
            return nullptr;
165
0
        }
166
0
        return &iter->second;
167
0
    }
168
169
1
    void set_index_result_column_for_expr(const VExpr* expr, ColumnPtr column) {
170
1
        _index_result_column[expr] = std::move(column);
171
1
    }
172
173
0
    void set_true_for_index_status(const VExpr* expr, int column_index) {
174
0
        if (column_index < 0 || column_index >= _col_ids.size()) {
175
0
            return;
176
0
        }
177
0
        const auto& column_id = _col_ids[column_index];
178
0
        if (_expr_index_status.contains(column_id)) {
179
0
            if (_expr_index_status[column_id].contains(expr)) {
180
0
                _expr_index_status[column_id][expr] = true;
181
0
            }
182
0
        }
183
0
    }
184
185
0
    ScoreRuntimeSPtr get_score_runtime() const { return _score_runtime; }
186
187
0
    void set_analyzer_ctx_for_expr(const VExpr* expr, InvertedIndexAnalyzerCtxSPtr analyzer_ctx) {
188
0
        if (expr == nullptr || analyzer_ctx == nullptr) {
189
0
            return;
190
0
        }
191
0
        _expr_analyzer_ctx[expr] = std::move(analyzer_ctx);
192
0
    }
193
194
0
    const InvertedIndexAnalyzerCtx* get_analyzer_ctx_for_expr(const VExpr* expr) const {
195
0
        auto iter = _expr_analyzer_ctx.find(expr);
196
0
        if (iter == _expr_analyzer_ctx.end()) {
197
0
            return nullptr;
198
0
        }
199
0
        return iter->second.get();
200
0
    }
201
202
5.63k
    void set_index_query_context(segment_v2::IndexQueryContextPtr index_query_context) {
203
5.63k
        _index_query_context = index_query_context;
204
5.63k
    }
205
206
2
    const segment_v2::IndexQueryContextPtr& get_index_query_context() const {
207
2
        return _index_query_context;
208
2
    }
209
210
0
    segment_v2::Segment* get_segment() const { return _segment; }
211
212
0
    const segment_v2::ColumnIteratorOptions& get_column_iter_opts() const {
213
0
        return _column_iter_opts;
214
0
    }
215
216
private:
217
    // A reference to a vector of column IDs for the current expression's output columns.
218
    const std::vector<ColumnId>& _col_ids;
219
220
    // A reference to a vector of unique pointers to index iterators.
221
    const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& _index_iterators;
222
223
    // A reference to a vector of storage name and type pairs related to schema.
224
    const std::vector<IndexFieldNameAndTypePair>& _storage_name_and_type;
225
226
    // A map of expressions to their corresponding inverted index result bitmaps.
227
    std::unordered_map<const VExpr*, segment_v2::InvertedIndexResultBitmap> _index_result_bitmap;
228
229
    // A map of expressions to their corresponding result columns.
230
    std::unordered_map<const VExpr*, ColumnPtr> _index_result_column;
231
232
    // Per-expression analyzer context for inverted index evaluation.
233
    std::unordered_map<const VExpr*, InvertedIndexAnalyzerCtxSPtr> _expr_analyzer_ctx;
234
235
    // A reference to a map of common expressions to their inverted index evaluation status.
236
    std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>>& _expr_index_status;
237
238
    ScoreRuntimeSPtr _score_runtime;
239
240
    segment_v2::Segment* _segment = nullptr; // Ref
241
    segment_v2::ColumnIteratorOptions _column_iter_opts;
242
    segment_v2::IndexQueryContextPtr _index_query_context;
243
};
244
245
class VExprContext {
246
    ENABLE_FACTORY_CREATOR(VExprContext);
247
248
public:
249
655k
    VExprContext(VExprSPtr expr) : _root(std::move(expr)) {}
250
    ~VExprContext();
251
    [[nodiscard]] Status prepare(RuntimeState* state, const RowDescriptor& row_desc);
252
    [[nodiscard]] Status open(RuntimeState* state);
253
    [[nodiscard]] Status clone(RuntimeState* state, VExprContextSPtr& new_ctx);
254
    [[nodiscard]] Status execute(Block* block, int* result_column_id);
255
    [[nodiscard]] Status execute(const Block* block, ColumnPtr& result_column);
256
    [[nodiscard]] Status execute(const Block* block, ColumnWithTypeAndName& result_data);
257
    [[nodiscard]] DataTypePtr execute_type(const Block* block);
258
    [[nodiscard]] const std::string& expr_name() const;
259
    [[nodiscard]] bool is_blockable() const;
260
261
    [[nodiscard]] Status execute_const_expr(ColumnWithTypeAndName& result);
262
263
    double execute_cost() const;
264
265
721k
    VExprSPtr root() { return _root; }
266
0
    void set_root(const VExprSPtr& expr) { _root = expr; }
267
20
    void set_index_context(std::shared_ptr<IndexExecContext> index_context) {
268
20
        _index_context = std::move(index_context);
269
20
    }
270
271
59
    std::shared_ptr<IndexExecContext> get_index_context() const { return _index_context; }
272
273
    /// Creates a FunctionContext, and returns the index that's passed to fn_context() to
274
    /// retrieve the created context. Exprs that need a FunctionContext should call this in
275
    /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the
276
    /// size of the varargs buffer in the created FunctionContext (see udf-internal.h).
277
    int register_function_context(RuntimeState* state, const DataTypePtr& return_type,
278
                                  const std::vector<DataTypePtr>& arg_types);
279
280
    /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to
281
    /// register_function_context(). This should only be called by VExprs.
282
223
    FunctionContext* fn_context(int i) {
283
223
        if (i < 0 || i >= _fn_contexts.size()) {
284
0
            throw Exception(ErrorCode::INTERNAL_ERROR,
285
0
                            "fn_context index invalid, index={}, _fn_contexts.size()={}", i,
286
0
                            _fn_contexts.size());
287
0
        }
288
223
        return _fn_contexts[i].get();
289
223
    }
290
291
    // execute expr with inverted index which column a, b has inverted indexes
292
    //  but some situation although column b has indexes, but apply index is not useful, we should
293
    //  skip this expr, just do not apply index anymore.
294
    [[nodiscard]] Status evaluate_inverted_index(uint32_t segment_num_rows);
295
296
    bool all_expr_inverted_index_evaluated();
297
298
    Status execute_filter(const Block* block, uint8_t* __restrict result_filter_data, size_t rows,
299
                          bool accept_null, bool* can_filter_all);
300
301
    [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block);
302
303
    [[nodiscard]] static Status filter_block(const VExprContextSPtrs& expr_contexts, Block* block,
304
                                             size_t column_to_keep);
305
306
    [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& ctxs,
307
                                                  const std::vector<IColumn::Filter*>* filters,
308
                                                  bool accept_null, const Block* block,
309
                                                  IColumn::Filter* result_filter,
310
                                                  bool* can_filter_all);
311
312
    [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& conjuncts,
313
                                                  const Block* block, ColumnUInt8& null_map,
314
                                                  IColumn::Filter& result_filter);
315
316
    static Status execute_conjuncts(const VExprContextSPtrs& ctxs,
317
                                    const std::vector<IColumn::Filter*>* filters, Block* block,
318
                                    IColumn::Filter* result_filter, bool* can_filter_all);
319
320
    [[nodiscard]] static Status execute_conjuncts_and_filter_block(
321
            const VExprContextSPtrs& ctxs, Block* block, std::vector<uint32_t>& columns_to_filter,
322
            int column_to_keep);
323
324
    static Status execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
325
                                                     std::vector<uint32_t>& columns_to_filter,
326
                                                     int column_to_keep, IColumn::Filter& filter);
327
328
    [[nodiscard]] static Status get_output_block_after_execute_exprs(const VExprContextSPtrs&,
329
                                                                     const Block&, Block*,
330
                                                                     bool do_projection = false);
331
332
6
    int get_last_result_column_id() const {
333
6
        DCHECK(_last_result_column_id != -1);
334
6
        return _last_result_column_id;
335
6
    }
336
337
32
    RuntimeFilterSelectivity& get_runtime_filter_selectivity() {
338
32
        if (!_rf_selectivity) {
339
0
            throw Exception(ErrorCode::INTERNAL_ERROR, "RuntimeFilterSelectivity is null");
340
0
        }
341
32
        return *_rf_selectivity;
342
32
    }
343
344
0
    FunctionContext::FunctionStateScope get_function_state_scope() const {
345
0
        return _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
346
0
    }
347
348
    void clone_fn_contexts(VExprContext* other);
349
350
0
    VExprContext& operator=(const VExprContext& other) {
351
0
        if (this == &other) {
352
0
            return *this;
353
0
        }
354
0
355
0
        _root = other._root;
356
0
        _is_clone = other._is_clone;
357
0
        _prepared = other._prepared;
358
0
        _opened = other._opened;
359
0
360
0
        for (const auto& fn : other._fn_contexts) {
361
0
            _fn_contexts.emplace_back(fn->clone());
362
0
        }
363
0
364
0
        _last_result_column_id = other._last_result_column_id;
365
0
        _depth_num = other._depth_num;
366
0
        return *this;
367
0
    }
368
369
0
    VExprContext& operator=(VExprContext&& other) {
370
0
        _root = other._root;
371
0
        other._root = nullptr;
372
0
        _is_clone = other._is_clone;
373
0
        _prepared = other._prepared;
374
0
        _opened = other._opened;
375
0
        _fn_contexts = std::move(other._fn_contexts);
376
0
        _last_result_column_id = other._last_result_column_id;
377
0
        _depth_num = other._depth_num;
378
0
        return *this;
379
0
    }
380
381
134
    [[nodiscard]] static size_t get_memory_usage(const VExprContextSPtrs& contexts) {
382
134
        size_t usage = 0;
383
134
        std::for_each(contexts.cbegin(), contexts.cend(),
384
134
                      [&usage](auto&& context) { usage += context->_memory_usage; });
385
134
        return usage;
386
134
    }
387
388
0
    [[nodiscard]] size_t get_memory_usage() const { return _memory_usage; }
389
390
    void prepare_ann_range_search(const doris::VectorSearchUserParams& params);
391
392
    Status evaluate_ann_range_search(
393
            const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators,
394
            const std::vector<ColumnId>& idx_to_cid,
395
            const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators,
396
            const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>&
397
                    common_expr_to_slotref_map,
398
            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats);
399
400
    uint64_t get_digest(uint64_t seed) const;
401
402
private:
403
    // Close method is called in vexpr context dector, not need call expicility
404
    void close();
405
406
    static void _reset_memory_usage(const VExprContextSPtrs& contexts);
407
408
    friend class VExpr;
409
410
    /// The expr tree this context is for.
411
    VExprSPtr _root;
412
413
    /// True if this context came from a Clone() call. Used to manage FunctionStateScope.
414
    bool _is_clone = false;
415
416
    /// Variables keeping track of current state.
417
    bool _prepared = false;
418
    bool _opened = false;
419
420
    /// FunctionContexts for each registered expression. The FunctionContexts are created
421
    /// and owned by this VExprContext.
422
    std::vector<std::unique_ptr<FunctionContext>> _fn_contexts;
423
424
    int _last_result_column_id = -1;
425
426
    /// The depth of expression-tree.
427
    int _depth_num = 0;
428
429
    std::shared_ptr<IndexExecContext> _index_context;
430
    size_t _memory_usage = 0;
431
432
    segment_v2::AnnRangeSearchRuntime _ann_range_search_runtime;
433
    bool _suitable_for_ann_index = true;
434
435
    std::unique_ptr<RuntimeFilterSelectivity> _rf_selectivity =
436
            std::make_unique<RuntimeFilterSelectivity>();
437
};
438
} // namespace doris