Coverage Report

Created: 2026-05-26 02:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/vexpr_context.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
22
#include <algorithm>
23
#include <cstddef>
24
#include <memory>
25
#include <unordered_map>
26
#include <utility>
27
#include <vector>
28
29
#include "common/factory_creator.h"
30
#include "common/status.h"
31
#include "core/block/block.h"
32
#include "core/block/column_with_type_and_name.h"
33
#include "core/column/column.h"
34
#include "exec/runtime_filter/runtime_filter_selectivity.h"
35
#include "exprs/function_context.h"
36
#include "exprs/vexpr_fwd.h"
37
#include "runtime/runtime_state.h"
38
#include "storage/index/ann/ann_range_search_runtime.h"
39
#include "storage/index/ann/ann_search_params.h"
40
#include "storage/index/inverted/inverted_index_reader.h"
41
#include "storage/index/zone_map/zonemap_filter_result.h"
42
#include "storage/segment/column_reader.h"
43
44
namespace doris {
45
class RowDescriptor;
46
class RuntimeState;
47
class ZoneMapEvalContext;
48
} // namespace doris
49
50
namespace doris::segment_v2 {
51
class Segment;
52
class ColumnIterator;
53
} // namespace doris::segment_v2
54
55
namespace doris {
56
57
class ScoreRuntime;
58
using ScoreRuntimeSPtr = std::shared_ptr<ScoreRuntime>;
59
60
class IndexExecContext {
61
public:
62
    IndexExecContext(const std::vector<ColumnId>& col_ids,
63
                     const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& index_iterators,
64
                     const std::vector<IndexFieldNameAndTypePair>& storage_name_and_type_vec,
65
                     std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>>&
66
                             common_expr_index_status,
67
                     ScoreRuntimeSPtr score_runtime, segment_v2::Segment* segment,
68
                     const segment_v2::ColumnIteratorOptions& column_iter_opts)
69
2.82k
            : _col_ids(col_ids),
70
2.82k
              _index_iterators(index_iterators),
71
2.82k
              _storage_name_and_type(storage_name_and_type_vec),
72
2.82k
              _expr_index_status(common_expr_index_status),
73
2.82k
              _score_runtime(std::move(score_runtime)),
74
2.82k
              _segment(segment),
75
2.82k
              _column_iter_opts(column_iter_opts) {}
76
77
3
    segment_v2::IndexIterator* get_inverted_index_iterator_by_column_id(int column_index) const {
78
3
        if (column_index < 0 || column_index >= _col_ids.size()) {
79
0
            return nullptr;
80
0
        }
81
3
        const auto& column_id = _col_ids[column_index];
82
3
        if (column_id >= _index_iterators.size()) {
83
0
            return nullptr;
84
0
        }
85
3
        if (!_index_iterators[column_id]) {
86
1
            return nullptr;
87
1
        }
88
2
        return _index_iterators[column_id].get();
89
3
    }
90
91
0
    segment_v2::IndexIterator* get_inverted_index_iterator_by_id(ColumnId column_id) const {
92
0
        if (column_id >= _index_iterators.size()) {
93
0
            return nullptr;
94
0
        }
95
0
        if (!_index_iterators[column_id]) {
96
0
            return nullptr;
97
0
        }
98
0
        return _index_iterators[column_id].get();
99
0
    }
100
101
    const IndexFieldNameAndTypePair* get_storage_name_and_type_by_column_id(
102
3
            int column_index) const {
103
3
        if (column_index < 0 || column_index >= _col_ids.size()) {
104
0
            return nullptr;
105
0
        }
106
3
        const auto& column_id = _col_ids[column_index];
107
3
        if (column_id >= _storage_name_and_type.size()) {
108
2
            return nullptr;
109
2
        }
110
1
        return &_storage_name_and_type[column_id];
111
3
    }
112
113
0
    const IndexFieldNameAndTypePair* get_storage_name_and_type_by_id(ColumnId column_id) const {
114
0
        if (column_id >= _storage_name_and_type.size()) {
115
0
            return nullptr;
116
0
        }
117
0
        return &_storage_name_and_type[column_id];
118
0
    }
119
120
0
    int column_index_by_id(ColumnId column_id) const {
121
0
        for (int i = 0; i < _col_ids.size(); ++i) {
122
0
            if (_col_ids[i] == column_id) {
123
0
                return i;
124
0
            }
125
0
        }
126
0
        return -1;
127
0
    }
128
129
0
    bool get_column_id(int column_index, ColumnId* column_id) const {
130
0
        if (column_id == nullptr) {
131
0
            return false;
132
0
        }
133
0
        if (column_index < 0 || column_index >= _col_ids.size()) {
134
0
            return false;
135
0
        }
136
0
        *column_id = _col_ids[column_index];
137
0
        return true;
138
0
    }
139
140
0
    segment_v2::Segment* segment() const { return _segment; }
141
142
0
    const segment_v2::ColumnIteratorOptions& column_iter_opts() const { return _column_iter_opts; }
143
144
1
    bool has_index_result_for_expr(const VExpr* expr) const {
145
1
        return _index_result_bitmap.contains(expr);
146
1
    }
147
148
    void set_index_result_for_expr(const VExpr* expr,
149
1
                                   segment_v2::InvertedIndexResultBitmap bitmap) {
150
1
        _index_result_bitmap[expr] = std::move(bitmap);
151
1
    }
152
153
    std::unordered_map<const VExpr*, segment_v2::InvertedIndexResultBitmap>&
154
0
    get_index_result_bitmap() {
155
0
        return _index_result_bitmap;
156
0
    }
157
158
2
    std::unordered_map<const VExpr*, ColumnPtr>& get_index_result_column() {
159
2
        return _index_result_column;
160
2
    }
161
162
0
    const segment_v2::InvertedIndexResultBitmap* get_index_result_for_expr(const VExpr* expr) {
163
0
        auto iter = _index_result_bitmap.find(expr);
164
0
        if (iter == _index_result_bitmap.end()) {
165
0
            return nullptr;
166
0
        }
167
0
        return &iter->second;
168
0
    }
169
170
1
    void set_index_result_column_for_expr(const VExpr* expr, ColumnPtr column) {
171
1
        _index_result_column[expr] = std::move(column);
172
1
    }
173
174
0
    void set_true_for_index_status(const VExpr* expr, int column_index) {
175
0
        if (column_index < 0 || column_index >= _col_ids.size()) {
176
0
            return;
177
0
        }
178
0
        const auto& column_id = _col_ids[column_index];
179
0
        if (_expr_index_status.contains(column_id)) {
180
0
            if (_expr_index_status[column_id].contains(expr)) {
181
0
                _expr_index_status[column_id][expr] = true;
182
0
            }
183
0
        }
184
0
    }
185
186
0
    ScoreRuntimeSPtr get_score_runtime() const { return _score_runtime; }
187
188
0
    void set_analyzer_ctx_for_expr(const VExpr* expr, InvertedIndexAnalyzerCtxSPtr analyzer_ctx) {
189
0
        if (expr == nullptr || analyzer_ctx == nullptr) {
190
0
            return;
191
0
        }
192
0
        _expr_analyzer_ctx[expr] = std::move(analyzer_ctx);
193
0
    }
194
195
0
    const InvertedIndexAnalyzerCtx* get_analyzer_ctx_for_expr(const VExpr* expr) const {
196
0
        auto iter = _expr_analyzer_ctx.find(expr);
197
0
        if (iter == _expr_analyzer_ctx.end()) {
198
0
            return nullptr;
199
0
        }
200
0
        return iter->second.get();
201
0
    }
202
203
2.82k
    void set_index_query_context(segment_v2::IndexQueryContextPtr index_query_context) {
204
2.82k
        _index_query_context = index_query_context;
205
2.82k
    }
206
207
2
    const segment_v2::IndexQueryContextPtr& get_index_query_context() const {
208
2
        return _index_query_context;
209
2
    }
210
211
private:
212
    // A reference to a vector of column IDs for the current expression's output columns.
213
    const std::vector<ColumnId>& _col_ids;
214
215
    // A reference to a vector of unique pointers to index iterators.
216
    const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& _index_iterators;
217
218
    // A reference to a vector of storage name and type pairs related to schema.
219
    const std::vector<IndexFieldNameAndTypePair>& _storage_name_and_type;
220
221
    // A map of expressions to their corresponding inverted index result bitmaps.
222
    std::unordered_map<const VExpr*, segment_v2::InvertedIndexResultBitmap> _index_result_bitmap;
223
224
    // A map of expressions to their corresponding result columns.
225
    std::unordered_map<const VExpr*, ColumnPtr> _index_result_column;
226
227
    // Per-expression analyzer context for inverted index evaluation.
228
    std::unordered_map<const VExpr*, InvertedIndexAnalyzerCtxSPtr> _expr_analyzer_ctx;
229
230
    // A reference to a map of common expressions to their inverted index evaluation status.
231
    std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>>& _expr_index_status;
232
233
    ScoreRuntimeSPtr _score_runtime;
234
235
    segment_v2::Segment* _segment = nullptr; // Ref
236
    segment_v2::ColumnIteratorOptions _column_iter_opts;
237
    segment_v2::IndexQueryContextPtr _index_query_context;
238
};
239
240
class VExprContext {
241
    ENABLE_FACTORY_CREATOR(VExprContext);
242
243
public:
244
655k
    VExprContext(VExprSPtr expr) : _root(std::move(expr)) {}
245
    ~VExprContext();
246
    [[nodiscard]] Status prepare(RuntimeState* state, const RowDescriptor& row_desc);
247
    [[nodiscard]] Status open(RuntimeState* state);
248
    [[nodiscard]] Status clone(RuntimeState* state, VExprContextSPtr& new_ctx);
249
    [[nodiscard]] Status execute(Block* block, int* result_column_id);
250
    [[nodiscard]] Status execute(const Block* block, ColumnPtr& result_column);
251
    [[nodiscard]] Status execute(const Block* block, ColumnWithTypeAndName& result_data);
252
    [[nodiscard]] DataTypePtr execute_type(const Block* block);
253
    [[nodiscard]] const std::string& expr_name() const;
254
    [[nodiscard]] bool is_blockable() const;
255
256
    [[nodiscard]] Status execute_const_expr(ColumnWithTypeAndName& result);
257
258
    double execute_cost() const;
259
260
721k
    VExprSPtr root() { return _root; }
261
0
    void set_root(const VExprSPtr& expr) { _root = expr; }
262
7
    void set_index_context(std::shared_ptr<IndexExecContext> index_context) {
263
7
        _index_context = std::move(index_context);
264
7
    }
265
266
100
    std::shared_ptr<IndexExecContext> get_index_context() const { return _index_context; }
267
268
    /// Creates a FunctionContext, and returns the index that's passed to fn_context() to
269
    /// retrieve the created context. Exprs that need a FunctionContext should call this in
270
    /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the
271
    /// size of the varargs buffer in the created FunctionContext (see udf-internal.h).
272
    int register_function_context(RuntimeState* state, const DataTypePtr& return_type,
273
                                  const std::vector<DataTypePtr>& arg_types);
274
275
    /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to
276
    /// register_function_context(). This should only be called by VExprs.
277
344
    FunctionContext* fn_context(int i) {
278
344
        if (i < 0 || i >= _fn_contexts.size()) {
279
0
            throw Exception(ErrorCode::INTERNAL_ERROR,
280
0
                            "fn_context index invalid, index={}, _fn_contexts.size()={}", i,
281
0
                            _fn_contexts.size());
282
0
        }
283
344
        return _fn_contexts[i].get();
284
344
    }
285
286
    // execute expr with inverted index which column a, b has inverted indexes
287
    //  but some situation although column b has indexes, but apply index is not useful, we should
288
    //  skip this expr, just do not apply index anymore.
289
    [[nodiscard]] Status evaluate_inverted_index(uint32_t segment_num_rows);
290
291
    [[nodiscard]] static ZoneMapFilterResult evaluate_zonemap_filter(
292
            const VExprContextSPtrs& conjuncts, const ZoneMapEvalContext& ctx);
293
294
    bool all_expr_inverted_index_evaluated();
295
296
    Status execute_filter(const Block* block, uint8_t* __restrict result_filter_data, size_t rows,
297
                          bool accept_null, bool* can_filter_all);
298
299
    [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block);
300
301
    [[nodiscard]] static Status filter_block(const VExprContextSPtrs& expr_contexts, Block* block,
302
                                             size_t column_to_keep);
303
304
    [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& ctxs,
305
                                                  const std::vector<IColumn::Filter*>* filters,
306
                                                  bool accept_null, const Block* block,
307
                                                  IColumn::Filter* result_filter,
308
                                                  bool* can_filter_all);
309
310
    [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& conjuncts,
311
                                                  const Block* block, ColumnUInt8& null_map,
312
                                                  IColumn::Filter& result_filter);
313
314
    static Status execute_conjuncts(const VExprContextSPtrs& ctxs,
315
                                    const std::vector<IColumn::Filter*>* filters, Block* block,
316
                                    IColumn::Filter* result_filter, bool* can_filter_all);
317
318
    [[nodiscard]] static Status execute_conjuncts_and_filter_block(
319
            const VExprContextSPtrs& ctxs, Block* block, std::vector<uint32_t>& columns_to_filter,
320
            int column_to_keep);
321
322
    static Status execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
323
                                                     std::vector<uint32_t>& columns_to_filter,
324
                                                     int column_to_keep, IColumn::Filter& filter);
325
326
    [[nodiscard]] static Status get_output_block_after_execute_exprs(const VExprContextSPtrs&,
327
                                                                     const Block&, Block*,
328
                                                                     bool do_projection = false);
329
330
6
    int get_last_result_column_id() const {
331
6
        DCHECK(_last_result_column_id != -1);
332
6
        return _last_result_column_id;
333
6
    }
334
335
11
    RuntimeFilterSelectivity& get_runtime_filter_selectivity() {
336
11
        if (!_rf_selectivity) {
337
0
            throw Exception(ErrorCode::INTERNAL_ERROR, "RuntimeFilterSelectivity is null");
338
0
        }
339
11
        return *_rf_selectivity;
340
11
    }
341
342
0
    FunctionContext::FunctionStateScope get_function_state_scope() const {
343
0
        return _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
344
0
    }
345
346
    void clone_fn_contexts(VExprContext* other);
347
348
0
    VExprContext& operator=(const VExprContext& other) {
349
0
        if (this == &other) {
350
0
            return *this;
351
0
        }
352
0
353
0
        _root = other._root;
354
0
        _is_clone = other._is_clone;
355
0
        _prepared = other._prepared;
356
0
        _opened = other._opened;
357
0
358
0
        for (const auto& fn : other._fn_contexts) {
359
0
            _fn_contexts.emplace_back(fn->clone());
360
0
        }
361
0
362
0
        _last_result_column_id = other._last_result_column_id;
363
0
        _depth_num = other._depth_num;
364
0
        return *this;
365
0
    }
366
367
0
    VExprContext& operator=(VExprContext&& other) {
368
0
        _root = other._root;
369
0
        other._root = nullptr;
370
0
        _is_clone = other._is_clone;
371
0
        _prepared = other._prepared;
372
0
        _opened = other._opened;
373
0
        _fn_contexts = std::move(other._fn_contexts);
374
0
        _last_result_column_id = other._last_result_column_id;
375
0
        _depth_num = other._depth_num;
376
0
        return *this;
377
0
    }
378
379
136
    [[nodiscard]] static size_t get_memory_usage(const VExprContextSPtrs& contexts) {
380
136
        size_t usage = 0;
381
136
        std::for_each(contexts.cbegin(), contexts.cend(),
382
136
                      [&usage](auto&& context) { usage += context->_memory_usage; });
383
136
        return usage;
384
136
    }
385
386
0
    [[nodiscard]] size_t get_memory_usage() const { return _memory_usage; }
387
388
    void prepare_ann_range_search(const doris::VectorSearchUserParams& params);
389
390
    Status evaluate_ann_range_search(
391
            const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators,
392
            const std::vector<ColumnId>& idx_to_cid,
393
            const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators,
394
            const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>&
395
                    common_expr_to_slotref_map,
396
            roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats,
397
            bool enable_result_cache);
398
399
    uint64_t get_digest(uint64_t seed) const;
400
401
private:
402
    // Close method is called in vexpr context dector, not need call expicility
403
    void close();
404
405
    static void _reset_memory_usage(const VExprContextSPtrs& contexts);
406
407
    friend class VExpr;
408
409
    /// The expr tree this context is for.
410
    VExprSPtr _root;
411
412
    /// True if this context came from a Clone() call. Used to manage FunctionStateScope.
413
    bool _is_clone = false;
414
415
    /// Variables keeping track of current state.
416
    bool _prepared = false;
417
    bool _opened = false;
418
419
    /// FunctionContexts for each registered expression. The FunctionContexts are created
420
    /// and owned by this VExprContext.
421
    std::vector<std::unique_ptr<FunctionContext>> _fn_contexts;
422
423
    int _last_result_column_id = -1;
424
425
    /// The depth of expression-tree.
426
    int _depth_num = 0;
427
428
    std::shared_ptr<IndexExecContext> _index_context;
429
    size_t _memory_usage = 0;
430
431
    segment_v2::AnnRangeSearchRuntime _ann_range_search_runtime;
432
    bool _suitable_for_ann_index = true;
433
434
    std::unique_ptr<RuntimeFilterSelectivity> _rf_selectivity =
435
            std::make_unique<RuntimeFilterSelectivity>();
436
};
437
} // namespace doris