Coverage Report

Created: 2026-03-25 20:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/exprs/vexpr_context.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/vexpr_context.h"
19
20
#include <algorithm>
21
#include <cstdint>
22
#include <ostream>
23
#include <string>
24
25
#include "common/cast_set.h"
26
#include "common/compiler_util.h" // IWYU pragma: keep
27
#include "common/exception.h"
28
#include "common/status.h"
29
#include "core/block/column_numbers.h"
30
#include "core/block/column_with_type_and_name.h"
31
#include "core/block/columns_with_type_and_name.h"
32
#include "core/column/column.h"
33
#include "core/column/column_const.h"
34
#include "exprs/function_context.h"
35
#include "exprs/vexpr.h"
36
#include "runtime/runtime_state.h"
37
#include "runtime/thread_context.h"
38
#include "storage/olap_common.h"
39
#include "storage/segment/column_reader.h"
40
#include "util/simd/bits.h"
41
42
namespace doris {
43
class RowDescriptor;
44
} // namespace doris
45
46
namespace doris {
47
#include "common/compile_check_begin.h"
48
49
655k
VExprContext::~VExprContext() {
50
    // In runtime filter, only create expr context to get expr root, will not call
51
    // prepare or open, so that it is not need to call close. And call close may core
52
    // because the function context in expr is not set.
53
655k
    if (!_prepared || !_opened) {
54
141k
        return;
55
141k
    }
56
513k
    try {
57
513k
        close();
58
513k
    } catch (const Exception& e) {
59
0
        LOG(WARNING) << "Exception occurs when expr context deconstruct: " << e.to_string();
60
0
    }
61
513k
}
62
63
189k
Status VExprContext::execute(Block* block, int* result_column_id) {
64
189k
    Status st;
65
189k
    RETURN_IF_CATCH_EXCEPTION({
66
189k
        st = _root->execute(this, block, result_column_id);
67
189k
        _last_result_column_id = *result_column_id;
68
        // We should first check the status, as some expressions might incorrectly set result_column_id, even if the st is not ok.
69
189k
        if (st.ok() && _last_result_column_id != -1) {
70
189k
            block->get_by_position(*result_column_id).column->sanity_check();
71
189k
            RETURN_IF_ERROR(
72
189k
                    block->get_by_position(*result_column_id).check_type_and_column_match());
73
189k
        }
74
189k
    });
75
189k
    return st;
76
189k
}
77
78
262
Status VExprContext::execute(const Block* block, ColumnPtr& result_column) {
79
262
    Status st;
80
262
    RETURN_IF_CATCH_EXCEPTION(
81
262
            { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); });
82
262
    return st;
83
262
}
84
85
23
Status VExprContext::execute(const Block* block, ColumnWithTypeAndName& result_data) {
86
23
    Status st;
87
23
    ColumnPtr result_column;
88
23
    RETURN_IF_CATCH_EXCEPTION(
89
23
            { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); });
90
23
    RETURN_IF_ERROR(st);
91
23
    result_data.column = result_column;
92
23
    result_data.type = execute_type(block);
93
23
    result_data.name = _root->expr_name();
94
23
    return Status::OK();
95
23
}
96
97
31
DataTypePtr VExprContext::execute_type(const Block* block) {
98
31
    return _root->execute_type(block);
99
31
}
100
101
30
Status VExprContext::execute_const_expr(ColumnWithTypeAndName& result) {
102
30
    Status st;
103
30
    RETURN_IF_CATCH_EXCEPTION(
104
30
            { st = _root->execute_column(this, nullptr, nullptr, 1, result.column); });
105
30
    RETURN_IF_ERROR(st);
106
30
    result.type = _root->execute_type(nullptr);
107
30
    result.name = _root->expr_name();
108
30
    return Status::OK();
109
30
}
110
111
8
[[nodiscard]] const std::string& VExprContext::expr_name() const {
112
8
    return _root->expr_name();
113
8
}
114
115
0
bool VExprContext::is_blockable() const {
116
0
    return _root->is_blockable();
117
0
}
118
119
258k
Status VExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc) {
120
258k
    _prepared = true;
121
258k
    Status st;
122
258k
    RETURN_IF_CATCH_EXCEPTION({ st = _root->prepare(state, row_desc, this); });
123
258k
    return st;
124
258k
}
125
126
258k
Status VExprContext::open(RuntimeState* state) {
127
258k
    DCHECK(_prepared);
128
258k
    if (_opened) {
129
6
        return Status::OK();
130
6
    }
131
258k
    _opened = true;
132
    // Fragment-local state is only initialized for original contexts. Clones inherit the
133
    // original's fragment state and only need to have thread-local state initialized.
134
258k
    FunctionContext::FunctionStateScope scope =
135
258k
            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
136
258k
    Status st;
137
258k
    RETURN_IF_CATCH_EXCEPTION({ st = _root->open(state, this, scope); });
138
258k
    return st;
139
258k
}
140
141
513k
void VExprContext::close() {
142
    // Sometimes expr context may not have a root, then it need not call close
143
513k
    if (_root == nullptr) {
144
0
        return;
145
0
    }
146
513k
    FunctionContext::FunctionStateScope scope =
147
513k
            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
148
513k
    _root->close(this, scope);
149
513k
}
150
151
254k
Status VExprContext::clone(RuntimeState* state, VExprContextSPtr& new_ctx) {
152
254k
    DCHECK(_prepared) << "expr context not prepared";
153
254k
    DCHECK(_opened);
154
254k
    DCHECK(new_ctx.get() == nullptr);
155
156
254k
    new_ctx = std::make_shared<VExprContext>(_root);
157
254k
    for (auto& _fn_context : _fn_contexts) {
158
16
        new_ctx->_fn_contexts.push_back(_fn_context->clone());
159
16
    }
160
161
254k
    new_ctx->_is_clone = true;
162
254k
    new_ctx->_prepared = true;
163
254k
    new_ctx->_opened = true;
164
    // segment_v2::AnnRangeSearchRuntime should be cloned as well.
165
    // The object of segment_v2::AnnRangeSearchRuntime is not shared by threads.
166
254k
    new_ctx->_ann_range_search_runtime = this->_ann_range_search_runtime;
167
168
254k
    return _root->open(state, new_ctx.get(), FunctionContext::THREAD_LOCAL);
169
254k
}
170
171
0
void VExprContext::clone_fn_contexts(VExprContext* other) {
172
0
    for (auto& _fn_context : _fn_contexts) {
173
0
        other->_fn_contexts.push_back(_fn_context->clone());
174
0
    }
175
0
}
176
177
int VExprContext::register_function_context(RuntimeState* state, const DataTypePtr& return_type,
178
83
                                            const std::vector<DataTypePtr>& arg_types) {
179
83
    _fn_contexts.push_back(FunctionContext::create_context(state, return_type, arg_types));
180
83
    _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal());
181
83
    _fn_contexts.back()->set_enable_strict_mode(state->enable_strict_mode());
182
83
    return static_cast<int>(_fn_contexts.size()) - 1;
183
83
}
184
185
12
Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) {
186
12
    Status st;
187
12
    RETURN_IF_CATCH_EXCEPTION({ st = _root->evaluate_inverted_index(this, segment_num_rows); });
188
12
    return st;
189
12
}
190
191
0
bool VExprContext::all_expr_inverted_index_evaluated() {
192
0
    return _index_context->has_index_result_for_expr(_root.get());
193
0
}
194
195
0
Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block) {
196
0
    if (vexpr_ctx == nullptr || block->rows() == 0) {
197
0
        return Status::OK();
198
0
    }
199
0
    ColumnPtr filter_column;
200
0
    RETURN_IF_ERROR(vexpr_ctx->execute(block, filter_column));
201
0
    size_t filter_column_id = block->columns();
202
0
    block->insert({filter_column, vexpr_ctx->execute_type(block), "filter_column"});
203
0
    vexpr_ctx->_memory_usage = filter_column->allocated_bytes();
204
0
    return Block::filter_block(block, filter_column_id, filter_column_id);
205
0
}
206
207
Status VExprContext::filter_block(const VExprContextSPtrs& expr_contexts, Block* block,
208
190
                                  size_t column_to_keep) {
209
190
    if (expr_contexts.empty() || block->rows() == 0) {
210
188
        return Status::OK();
211
188
    }
212
213
2
    ColumnNumbers columns_to_filter(column_to_keep);
214
2
    std::iota(columns_to_filter.begin(), columns_to_filter.end(), 0);
215
216
2
    return execute_conjuncts_and_filter_block(expr_contexts, block, columns_to_filter,
217
2
                                              static_cast<int>(column_to_keep));
218
190
}
219
220
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs,
221
                                       const std::vector<IColumn::Filter*>* filters, Block* block,
222
2
                                       IColumn::Filter* result_filter, bool* can_filter_all) {
223
2
    return execute_conjuncts(ctxs, filters, false, block, result_filter, can_filter_all);
224
2
}
225
226
Status VExprContext::execute_filter(const Block* block, uint8_t* __restrict result_filter_data,
227
8
                                    size_t rows, bool accept_null, bool* can_filter_all) {
228
8
    return _root->execute_filter(this, block, result_filter_data, rows, accept_null,
229
8
                                 can_filter_all);
230
8
}
231
232
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs,
233
                                       const std::vector<IColumn::Filter*>* filters,
234
                                       bool accept_null, const Block* block,
235
6
                                       IColumn::Filter* result_filter, bool* can_filter_all) {
236
6
    size_t rows = block->rows();
237
6
    DCHECK_EQ(result_filter->size(), rows);
238
6
    *can_filter_all = false;
239
6
    auto* __restrict result_filter_data = result_filter->data();
240
8
    for (const auto& ctx : ctxs) {
241
8
        RETURN_IF_ERROR(
242
8
                ctx->execute_filter(block, result_filter_data, rows, accept_null, can_filter_all));
243
8
        if (*can_filter_all) {
244
2
            return Status::OK();
245
2
        }
246
8
    }
247
4
    if (filters != nullptr) {
248
0
        for (auto* filter : *filters) {
249
0
            auto* __restrict filter_data = filter->data();
250
0
            const size_t size = filter->size();
251
0
            for (size_t i = 0; i < size; ++i) {
252
0
                result_filter_data[i] &= filter_data[i];
253
0
            }
254
0
            if (memchr(result_filter_data, 0x1, size) == nullptr) {
255
0
                *can_filter_all = true;
256
0
                return Status::OK();
257
0
            }
258
0
        }
259
0
    }
260
4
    return Status::OK();
261
4
}
262
263
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& conjuncts, const Block* block,
264
6
                                       ColumnUInt8& null_map, IColumn::Filter& filter) {
265
6
    const auto& rows = block->rows();
266
6
    if (rows == 0) {
267
0
        return Status::OK();
268
0
    }
269
6
    if (null_map.size() != rows) {
270
0
        return Status::InternalError("null_map.size()!=rows, null_map.size()={}, rows={}",
271
0
                                     null_map.size(), rows);
272
0
    }
273
274
6
    auto* final_null_map = null_map.get_data().data();
275
6
    auto* final_filter_ptr = filter.data();
276
277
6
    for (const auto& conjunct : conjuncts) {
278
4
        ColumnPtr result_column;
279
4
        RETURN_IF_ERROR(conjunct->execute(block, result_column));
280
4
        auto [filter_column, is_const] = unpack_if_const(result_column);
281
4
        const auto* nullable_column = assert_cast<const ColumnNullable*>(filter_column.get());
282
4
        if (!is_const) {
283
4
            const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr();
284
4
            const IColumn::Filter& result =
285
4
                    assert_cast<const ColumnUInt8&>(*nested_column).get_data();
286
4
            const auto* __restrict filter_data = result.data();
287
4
            const auto* __restrict null_map_data = nullable_column->get_null_map_data().data();
288
4
            DCHECK_EQ(rows, nullable_column->size());
289
290
32
            for (size_t i = 0; i != rows; ++i) {
291
                // null and null    => null
292
                // null and true    => null
293
                // null and false   => false
294
28
                final_null_map[i] = (final_null_map[i] & (null_map_data[i] | filter_data[i])) |
295
28
                                    (null_map_data[i] & (final_null_map[i] | final_filter_ptr[i]));
296
28
                final_filter_ptr[i] = final_filter_ptr[i] & filter_data[i];
297
28
            }
298
4
        } else {
299
0
            bool filter_data = nullable_column->get_bool(0);
300
0
            bool null_map_data = nullable_column->is_null_at(0);
301
0
            for (size_t i = 0; i != rows; ++i) {
302
                // null and null    => null
303
                // null and true    => null
304
                // null and false   => false
305
0
                final_null_map[i] = (final_null_map[i] & (null_map_data | filter_data)) |
306
0
                                    (null_map_data & (final_null_map[i] | final_filter_ptr[i]));
307
0
                final_filter_ptr[i] = final_filter_ptr[i] & filter_data;
308
0
            }
309
0
        }
310
4
    }
311
6
    return Status::OK();
312
6
}
313
314
// TODO Performance Optimization
315
// need exception safety
316
Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
317
                                                        std::vector<uint32_t>& columns_to_filter,
318
2
                                                        int column_to_keep) {
319
2
    IColumn::Filter result_filter(block->rows(), 1);
320
2
    bool can_filter_all;
321
322
2
    _reset_memory_usage(ctxs);
323
324
2
    RETURN_IF_ERROR(
325
2
            execute_conjuncts(ctxs, nullptr, false, block, &result_filter, &can_filter_all));
326
327
    // Accumulate the usage of `result_filter` into the first context.
328
2
    if (!ctxs.empty()) {
329
2
        ctxs[0]->_memory_usage += result_filter.allocated_bytes();
330
2
    }
331
2
    if (can_filter_all) {
332
3
        for (auto& col : columns_to_filter) {
333
3
            block->get_by_position(col).column->assume_mutable()->clear();
334
3
        }
335
1
    } else {
336
1
        try {
337
1
            Block::filter_block_internal(block, columns_to_filter, result_filter);
338
1
        } catch (const Exception& e) {
339
0
            std::string str;
340
0
            for (auto ctx : ctxs) {
341
0
                if (str.length()) {
342
0
                    str += ",";
343
0
                }
344
0
                str += ctx->root()->debug_string();
345
0
            }
346
347
0
            return Status::InternalError(
348
0
                    "filter_block_internal meet exception, exprs=[{}], exception={}", str,
349
0
                    e.what());
350
0
        }
351
1
    }
352
2
    Block::erase_useless_column(block, column_to_keep);
353
2
    return Status::OK();
354
2
}
355
356
Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
357
                                                        std::vector<uint32_t>& columns_to_filter,
358
                                                        int column_to_keep,
359
2
                                                        IColumn::Filter& filter) {
360
2
    _reset_memory_usage(ctxs);
361
2
    filter.resize_fill(block->rows(), 1);
362
2
    bool can_filter_all;
363
2
    RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &filter, &can_filter_all));
364
365
    // Accumulate the usage of `result_filter` into the first context.
366
2
    if (!ctxs.empty()) {
367
2
        ctxs[0]->_memory_usage += filter.allocated_bytes();
368
2
    }
369
2
    if (can_filter_all) {
370
3
        for (auto& col : columns_to_filter) {
371
            // NOLINTNEXTLINE(performance-move-const-arg)
372
3
            std::move(*block->get_by_position(col).column).assume_mutable()->clear();
373
3
        }
374
1
    } else {
375
1
        RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, columns_to_filter, filter));
376
1
    }
377
378
2
    Block::erase_useless_column(block, column_to_keep);
379
2
    return Status::OK();
380
2
}
381
382
// do_projection: for some query(e.g. in MultiCastDataStreamerSourceOperator::get_block()),
383
// output_vexpr_ctxs will output the same column more than once, and if the output_block
384
// is mem-reused later, it will trigger DCHECK_EQ(d.column->use_count(), 1) failure when
385
// doing Block::clear_column_data, set do_projection to true to copy the column data to
386
// avoid this problem.
387
Status VExprContext::get_output_block_after_execute_exprs(
388
        const VExprContextSPtrs& output_vexpr_ctxs, const Block& input_block, Block* output_block,
389
1
        bool do_projection) {
390
1
    auto rows = input_block.rows();
391
1
    ColumnsWithTypeAndName result_columns;
392
1
    _reset_memory_usage(output_vexpr_ctxs);
393
394
8
    for (const auto& vexpr_ctx : output_vexpr_ctxs) {
395
8
        ColumnPtr result_column;
396
8
        RETURN_IF_ERROR(vexpr_ctx->execute(&input_block, result_column));
397
398
8
        auto type = vexpr_ctx->execute_type(&input_block);
399
8
        const auto& name = vexpr_ctx->expr_name();
400
401
8
        vexpr_ctx->_memory_usage += result_column->allocated_bytes();
402
8
        if (do_projection) {
403
0
            result_columns.emplace_back(result_column->clone_resized(rows), type, name);
404
405
8
        } else {
406
8
            result_columns.emplace_back(result_column, type, name);
407
8
        }
408
8
    }
409
1
    *output_block = {result_columns};
410
1
    return Status::OK();
411
1
}
412
413
5
void VExprContext::_reset_memory_usage(const VExprContextSPtrs& contexts) {
414
5
    std::for_each(contexts.begin(), contexts.end(),
415
12
                  [](auto&& context) { context->_memory_usage = 0; });
416
5
}
417
418
5
void VExprContext::prepare_ann_range_search(const doris::VectorSearchUserParams& params) {
419
5
    if (_root == nullptr) {
420
0
        return;
421
0
    }
422
423
5
    _root->prepare_ann_range_search(params, _ann_range_search_runtime, _suitable_for_ann_index);
424
5
    VLOG_DEBUG << fmt::format("Prepare ann range search result {}, _suitable_for_ann_index {}",
425
0
                              this->_ann_range_search_runtime.to_string(),
426
0
                              this->_suitable_for_ann_index);
427
5
    return;
428
5
}
429
430
Status VExprContext::evaluate_ann_range_search(
431
        const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators,
432
        const std::vector<ColumnId>& idx_to_cid,
433
        const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators,
434
        const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>&
435
                common_expr_to_slotref_map,
436
3
        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats) {
437
3
    if (_root == nullptr) {
438
0
        return Status::OK();
439
0
    }
440
441
3
    RETURN_IF_ERROR(_root->evaluate_ann_range_search(
442
3
            _ann_range_search_runtime, cid_to_index_iterators, idx_to_cid, column_iterators,
443
3
            row_bitmap, ann_index_stats));
444
445
2
    if (!_root->ann_range_search_executedd()) {
446
0
        return Status::OK();
447
0
    }
448
449
2
    if (!_root->ann_dist_is_fulfilled()) {
450
        // Do not perform index scan in this case.
451
1
        return Status::OK();
452
1
    }
453
454
1
    auto src_col_idx = _ann_range_search_runtime.src_col_idx;
455
1
    auto slot_ref_map_it = common_expr_to_slotref_map.find(this);
456
1
    if (slot_ref_map_it == common_expr_to_slotref_map.end()) {
457
1
        return Status::OK();
458
1
    }
459
0
    auto& slot_ref_map = slot_ref_map_it->second;
460
0
    ColumnId cid = idx_to_cid[src_col_idx];
461
0
    if (slot_ref_map.find(cid) == slot_ref_map.end()) {
462
0
        return Status::OK();
463
0
    }
464
0
    const VExpr* slot_ref_expr_addr = slot_ref_map.find(cid)->second;
465
0
    _index_context->set_true_for_index_status(slot_ref_expr_addr, idx_to_cid[cid]);
466
467
0
    VLOG_DEBUG << fmt::format(
468
0
            "Evaluate ann range search for expr {}, src_col_idx {}, cid {}, row_bitmap "
469
0
            "cardinality {}",
470
0
            _root->debug_string(), src_col_idx, cid, row_bitmap.cardinality());
471
0
    return Status::OK();
472
0
}
473
474
0
uint64_t VExprContext::get_digest(uint64_t seed) const {
475
0
    return _root->get_digest(seed);
476
0
}
477
478
0
double VExprContext::execute_cost() const {
479
0
    if (_root == nullptr) {
480
        // When there is no expression root, treat the cost as a base value.
481
        // This avoids null dereferences while keeping a deterministic cost.
482
0
        return 0.0;
483
0
    }
484
0
    return _root->execute_cost();
485
0
}
486
487
#include "common/compile_check_end.h"
488
} // namespace doris