Coverage Report

Created: 2026-04-10 12:12

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/vexpr_context.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/vexpr_context.h"
19
20
#include <algorithm>
21
#include <cstdint>
22
#include <string>
23
24
#include "common/compiler_util.h" // IWYU pragma: keep
25
#include "common/exception.h"
26
#include "common/status.h"
27
#include "core/block/column_numbers.h"
28
#include "core/block/column_with_type_and_name.h"
29
#include "core/block/columns_with_type_and_name.h"
30
#include "core/column/column.h"
31
#include "core/column/column_const.h"
32
#include "exprs/function_context.h"
33
#include "exprs/vexpr.h"
34
#include "runtime/runtime_state.h"
35
#include "runtime/thread_context.h"
36
#include "storage/olap_common.h"
37
#include "storage/segment/column_reader.h"
38
#include "util/simd/bits.h"
39
40
namespace doris {
41
class RowDescriptor;
42
} // namespace doris
43
44
namespace doris {
45
46
29.2M
VExprContext::~VExprContext() {
47
    // In runtime filter, only create expr context to get expr root, will not call
48
    // prepare or open, so that it is not need to call close. And call close may core
49
    // because the function context in expr is not set.
50
29.2M
    if (!_prepared || !_opened) {
51
340k
        return;
52
340k
    }
53
28.8M
    try {
54
28.8M
        close();
55
28.8M
    } catch (const Exception& e) {
56
0
        LOG(WARNING) << "Exception occurs when expr context deconstruct: " << e.to_string();
57
0
    }
58
28.8M
}
59
60
1.41M
Status VExprContext::execute(Block* block, int* result_column_id) {
61
1.41M
    Status st;
62
1.41M
    RETURN_IF_CATCH_EXCEPTION({
63
1.41M
        st = _root->execute(this, block, result_column_id);
64
1.41M
        _last_result_column_id = *result_column_id;
65
        // We should first check the status, as some expressions might incorrectly set result_column_id, even if the st is not ok.
66
1.41M
        if (st.ok() && _last_result_column_id != -1) {
67
1.41M
            block->get_by_position(*result_column_id).column->sanity_check();
68
1.41M
            RETURN_IF_ERROR(
69
1.41M
                    block->get_by_position(*result_column_id).check_type_and_column_match());
70
1.41M
        }
71
1.41M
    });
72
1.41M
    return st;
73
1.41M
}
74
75
2.53M
Status VExprContext::execute(const Block* block, ColumnPtr& result_column) {
76
2.53M
    Status st;
77
2.53M
    RETURN_IF_CATCH_EXCEPTION(
78
2.53M
            { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); });
79
2.53M
    return st;
80
2.53M
}
81
82
27.4k
Status VExprContext::execute(const Block* block, ColumnWithTypeAndName& result_data) {
83
27.4k
    Status st;
84
27.4k
    ColumnPtr result_column;
85
27.4k
    RETURN_IF_CATCH_EXCEPTION(
86
27.4k
            { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); });
87
27.5k
    RETURN_IF_ERROR(st);
88
27.5k
    result_data.column = result_column;
89
27.5k
    result_data.type = execute_type(block);
90
27.5k
    result_data.name = _root->expr_name();
91
27.5k
    return Status::OK();
92
27.5k
}
93
94
1.17M
DataTypePtr VExprContext::execute_type(const Block* block) {
95
1.17M
    return _root->execute_type(block);
96
1.17M
}
97
98
1.08M
Status VExprContext::execute_const_expr(ColumnWithTypeAndName& result) {
99
1.08M
    Status st;
100
1.08M
    RETURN_IF_CATCH_EXCEPTION(
101
1.08M
            { st = _root->execute_column(this, nullptr, nullptr, 1, result.column); });
102
1.08M
    RETURN_IF_ERROR(st);
103
1.08M
    result.type = _root->execute_type(nullptr);
104
1.08M
    result.name = _root->expr_name();
105
1.08M
    return Status::OK();
106
1.08M
}
107
108
1.14M
[[nodiscard]] const std::string& VExprContext::expr_name() const {
109
1.14M
    return _root->expr_name();
110
1.14M
}
111
112
0
bool VExprContext::is_blockable() const {
113
0
    return _root->is_blockable();
114
0
}
115
116
7.32M
Status VExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc) {
117
7.32M
    _prepared = true;
118
7.32M
    Status st;
119
7.32M
    RETURN_IF_CATCH_EXCEPTION({ st = _root->prepare(state, row_desc, this); });
120
7.32M
    return st;
121
7.32M
}
122
123
7.32M
Status VExprContext::open(RuntimeState* state) {
124
7.32M
    DCHECK(_prepared);
125
7.32M
    if (_opened) {
126
46
        return Status::OK();
127
46
    }
128
7.32M
    _opened = true;
129
    // Fragment-local state is only initialized for original contexts. Clones inherit the
130
    // original's fragment state and only need to have thread-local state initialized.
131
7.32M
    FunctionContext::FunctionStateScope scope =
132
7.32M
            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
133
7.32M
    Status st;
134
7.32M
    RETURN_IF_CATCH_EXCEPTION({ st = _root->open(state, this, scope); });
135
7.32M
    return st;
136
7.32M
}
137
138
28.8M
void VExprContext::close() {
139
    // Sometimes expr context may not have a root, then it need not call close
140
28.8M
    if (_root == nullptr) {
141
0
        return;
142
0
    }
143
28.8M
    FunctionContext::FunctionStateScope scope =
144
28.8M
            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
145
28.8M
    _root->close(this, scope);
146
28.8M
}
147
148
21.3M
Status VExprContext::clone(RuntimeState* state, VExprContextSPtr& new_ctx) {
149
18.4E
    DCHECK(_prepared) << "expr context not prepared";
150
21.3M
    DCHECK(_opened);
151
21.3M
    DCHECK(new_ctx.get() == nullptr);
152
153
21.3M
    new_ctx = std::make_shared<VExprContext>(_root);
154
21.3M
    for (auto& _fn_context : _fn_contexts) {
155
1.40M
        new_ctx->_fn_contexts.push_back(_fn_context->clone());
156
1.40M
    }
157
158
21.3M
    new_ctx->_is_clone = true;
159
21.3M
    new_ctx->_prepared = true;
160
21.3M
    new_ctx->_opened = true;
161
    // segment_v2::AnnRangeSearchRuntime should be cloned as well.
162
    // The object of segment_v2::AnnRangeSearchRuntime is not shared by threads.
163
21.3M
    new_ctx->_ann_range_search_runtime = this->_ann_range_search_runtime;
164
165
21.3M
    return _root->open(state, new_ctx.get(), FunctionContext::THREAD_LOCAL);
166
21.3M
}
167
168
0
void VExprContext::clone_fn_contexts(VExprContext* other) {
169
0
    for (auto& _fn_context : _fn_contexts) {
170
0
        other->_fn_contexts.push_back(_fn_context->clone());
171
0
    }
172
0
}
173
174
int VExprContext::register_function_context(RuntimeState* state, const DataTypePtr& return_type,
175
833k
                                            const std::vector<DataTypePtr>& arg_types) {
176
833k
    _fn_contexts.push_back(FunctionContext::create_context(state, return_type, arg_types));
177
833k
    _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal());
178
833k
    _fn_contexts.back()->set_enable_strict_mode(state->enable_strict_mode());
179
833k
    return static_cast<int>(_fn_contexts.size()) - 1;
180
833k
}
181
182
13.1k
Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) {
183
13.1k
    Status st;
184
13.1k
    RETURN_IF_CATCH_EXCEPTION({ st = _root->evaluate_inverted_index(this, segment_num_rows); });
185
13.1k
    return st;
186
13.1k
}
187
188
12.8k
bool VExprContext::all_expr_inverted_index_evaluated() {
189
12.8k
    return _index_context->has_index_result_for_expr(_root.get());
190
12.8k
}
191
192
50
Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block) {
193
50
    if (vexpr_ctx == nullptr || block->rows() == 0) {
194
0
        return Status::OK();
195
0
    }
196
50
    ColumnPtr filter_column;
197
50
    RETURN_IF_ERROR(vexpr_ctx->execute(block, filter_column));
198
50
    size_t filter_column_id = block->columns();
199
50
    block->insert({filter_column, vexpr_ctx->execute_type(block), "filter_column"});
200
50
    vexpr_ctx->_memory_usage = filter_column->allocated_bytes();
201
50
    return Block::filter_block(block, filter_column_id, filter_column_id);
202
50
}
203
204
Status VExprContext::filter_block(const VExprContextSPtrs& expr_contexts, Block* block,
205
24.0M
                                  size_t column_to_keep) {
206
24.0M
    if (expr_contexts.empty() || block->rows() == 0) {
207
23.9M
        return Status::OK();
208
23.9M
    }
209
210
69.3k
    ColumnNumbers columns_to_filter(column_to_keep);
211
69.3k
    std::iota(columns_to_filter.begin(), columns_to_filter.end(), 0);
212
213
69.3k
    return execute_conjuncts_and_filter_block(expr_contexts, block, columns_to_filter,
214
69.3k
                                              static_cast<int>(column_to_keep));
215
24.0M
}
216
217
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs,
218
                                       const std::vector<IColumn::Filter*>* filters, Block* block,
219
66.1k
                                       IColumn::Filter* result_filter, bool* can_filter_all) {
220
66.1k
    return execute_conjuncts(ctxs, filters, false, block, result_filter, can_filter_all);
221
66.1k
}
222
223
Status VExprContext::execute_filter(const Block* block, uint8_t* __restrict result_filter_data,
224
199k
                                    size_t rows, bool accept_null, bool* can_filter_all) {
225
199k
    return _root->execute_filter(this, block, result_filter_data, rows, accept_null,
226
199k
                                 can_filter_all);
227
199k
}
228
229
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs,
230
                                       const std::vector<IColumn::Filter*>* filters,
231
                                       bool accept_null, const Block* block,
232
148k
                                       IColumn::Filter* result_filter, bool* can_filter_all) {
233
148k
    size_t rows = block->rows();
234
148k
    DCHECK_EQ(result_filter->size(), rows);
235
148k
    *can_filter_all = false;
236
148k
    auto* __restrict result_filter_data = result_filter->data();
237
199k
    for (const auto& ctx : ctxs) {
238
199k
        RETURN_IF_ERROR(
239
199k
                ctx->execute_filter(block, result_filter_data, rows, accept_null, can_filter_all));
240
199k
        if (*can_filter_all) {
241
37.0k
            return Status::OK();
242
37.0k
        }
243
199k
    }
244
111k
    if (filters != nullptr) {
245
38.4k
        for (auto* filter : *filters) {
246
1.44k
            auto* __restrict filter_data = filter->data();
247
1.44k
            const size_t size = filter->size();
248
863k
            for (size_t i = 0; i < size; ++i) {
249
862k
                result_filter_data[i] &= filter_data[i];
250
862k
            }
251
1.44k
            if (memchr(result_filter_data, 0x1, size) == nullptr) {
252
938
                *can_filter_all = true;
253
938
                return Status::OK();
254
938
            }
255
1.44k
        }
256
38.4k
    }
257
110k
    return Status::OK();
258
111k
}
259
260
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& conjuncts, const Block* block,
261
377
                                       ColumnUInt8& null_map, IColumn::Filter& filter) {
262
377
    const auto& rows = block->rows();
263
377
    if (rows == 0) {
264
0
        return Status::OK();
265
0
    }
266
377
    if (null_map.size() != rows) {
267
0
        return Status::InternalError("null_map.size()!=rows, null_map.size()={}, rows={}",
268
0
                                     null_map.size(), rows);
269
0
    }
270
271
377
    auto* final_null_map = null_map.get_data().data();
272
377
    auto* final_filter_ptr = filter.data();
273
274
377
    for (const auto& conjunct : conjuncts) {
275
66
        ColumnPtr result_column;
276
66
        RETURN_IF_ERROR(conjunct->execute(block, result_column));
277
66
        auto [filter_column, is_const] = unpack_if_const(result_column);
278
66
        const auto* nullable_column = assert_cast<const ColumnNullable*>(filter_column.get());
279
66
        if (!is_const) {
280
57
            const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr();
281
57
            const IColumn::Filter& result =
282
57
                    assert_cast<const ColumnUInt8&>(*nested_column).get_data();
283
57
            const auto* __restrict filter_data = result.data();
284
57
            const auto* __restrict null_map_data = nullable_column->get_null_map_data().data();
285
57
            DCHECK_EQ(rows, nullable_column->size());
286
287
634
            for (size_t i = 0; i != rows; ++i) {
288
                // null and null    => null
289
                // null and true    => null
290
                // null and false   => false
291
577
                final_null_map[i] = (final_null_map[i] & (null_map_data[i] | filter_data[i])) |
292
577
                                    (null_map_data[i] & (final_null_map[i] | final_filter_ptr[i]));
293
577
                final_filter_ptr[i] = final_filter_ptr[i] & filter_data[i];
294
577
            }
295
57
        } else {
296
9
            bool filter_data = nullable_column->get_bool(0);
297
9
            bool null_map_data = nullable_column->is_null_at(0);
298
28
            for (size_t i = 0; i != rows; ++i) {
299
                // null and null    => null
300
                // null and true    => null
301
                // null and false   => false
302
19
                final_null_map[i] = (final_null_map[i] & (null_map_data | filter_data)) |
303
19
                                    (null_map_data & (final_null_map[i] | final_filter_ptr[i]));
304
19
                final_filter_ptr[i] = final_filter_ptr[i] & filter_data;
305
19
            }
306
9
        }
307
66
    }
308
377
    return Status::OK();
309
377
}
310
311
// TODO Performance Optimization
312
// need exception safety
313
Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
314
                                                        std::vector<uint32_t>& columns_to_filter,
315
70.5k
                                                        int column_to_keep) {
316
70.5k
    IColumn::Filter result_filter(block->rows(), 1);
317
70.5k
    bool can_filter_all;
318
319
70.5k
    _reset_memory_usage(ctxs);
320
321
70.5k
    RETURN_IF_ERROR(
322
70.5k
            execute_conjuncts(ctxs, nullptr, false, block, &result_filter, &can_filter_all));
323
324
    // Accumulate the usage of `result_filter` into the first context.
325
70.5k
    if (!ctxs.empty()) {
326
70.5k
        ctxs[0]->_memory_usage += result_filter.allocated_bytes();
327
70.5k
    }
328
70.5k
    if (can_filter_all) {
329
47.0k
        for (auto& col : columns_to_filter) {
330
47.0k
            auto& column = block->get_by_position(col).column;
331
47.0k
            if (column->is_exclusive()) {
332
45.3k
                column->assume_mutable()->clear();
333
45.3k
            } else {
334
1.70k
                column = column->clone_empty();
335
1.70k
            }
336
47.0k
        }
337
59.9k
    } else {
338
59.9k
        try {
339
59.9k
            Block::filter_block_internal(block, columns_to_filter, result_filter);
340
59.9k
        } catch (const Exception& e) {
341
0
            std::string str;
342
0
            for (auto ctx : ctxs) {
343
0
                if (str.length()) {
344
0
                    str += ",";
345
0
                }
346
0
                str += ctx->root()->debug_string();
347
0
            }
348
349
0
            return Status::InternalError(
350
0
                    "filter_block_internal meet exception, exprs=[{}], exception={}", str,
351
0
                    e.what());
352
0
        }
353
59.9k
    }
354
70.5k
    Block::erase_useless_column(block, column_to_keep);
355
70.5k
    return Status::OK();
356
70.5k
}
357
358
Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
359
                                                        std::vector<uint32_t>& columns_to_filter,
360
                                                        int column_to_keep,
361
6.27k
                                                        IColumn::Filter& filter) {
362
6.27k
    _reset_memory_usage(ctxs);
363
6.27k
    filter.resize_fill(block->rows(), 1);
364
6.27k
    bool can_filter_all;
365
6.27k
    RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &filter, &can_filter_all));
366
367
    // Accumulate the usage of `result_filter` into the first context.
368
6.27k
    if (!ctxs.empty()) {
369
6.23k
        ctxs[0]->_memory_usage += filter.allocated_bytes();
370
6.23k
    }
371
6.27k
    if (can_filter_all) {
372
4.66k
        for (auto& col : columns_to_filter) {
373
4.66k
            auto& column = block->get_by_position(col).column;
374
4.66k
            if (column->is_exclusive()) {
375
4.54k
                column->assume_mutable()->clear();
376
4.54k
            } else {
377
123
                column = column->clone_empty();
378
123
            }
379
4.66k
        }
380
3.84k
    } else {
381
3.84k
        RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, columns_to_filter, filter));
382
3.84k
    }
383
384
6.27k
    Block::erase_useless_column(block, column_to_keep);
385
6.27k
    return Status::OK();
386
6.27k
}
387
388
// do_projection: for some query(e.g. in MultiCastDataStreamerSourceOperator::get_block()),
389
// output_vexpr_ctxs will output the same column more than once, and if the output_block
390
// is mem-reused later, it will trigger DCHECK_EQ(d.column->use_count(), 1) failure when
391
// doing Block::clear_column_data, set do_projection to true to copy the column data to
392
// avoid this problem.
393
Status VExprContext::get_output_block_after_execute_exprs(
394
        const VExprContextSPtrs& output_vexpr_ctxs, const Block& input_block, Block* output_block,
395
220k
        bool do_projection) {
396
220k
    auto rows = input_block.rows();
397
220k
    ColumnsWithTypeAndName result_columns;
398
220k
    _reset_memory_usage(output_vexpr_ctxs);
399
400
1.14M
    for (const auto& vexpr_ctx : output_vexpr_ctxs) {
401
1.14M
        ColumnPtr result_column;
402
1.14M
        RETURN_IF_ERROR(vexpr_ctx->execute(&input_block, result_column));
403
404
1.14M
        auto type = vexpr_ctx->execute_type(&input_block);
405
1.14M
        const auto& name = vexpr_ctx->expr_name();
406
407
1.14M
        vexpr_ctx->_memory_usage += result_column->allocated_bytes();
408
1.14M
        if (do_projection) {
409
16.0k
            result_columns.emplace_back(result_column->clone_resized(rows), type, name);
410
411
1.12M
        } else {
412
1.12M
            result_columns.emplace_back(result_column, type, name);
413
1.12M
        }
414
1.14M
    }
415
220k
    *output_block = {result_columns};
416
220k
    return Status::OK();
417
220k
}
418
419
296k
void VExprContext::_reset_memory_usage(const VExprContextSPtrs& contexts) {
420
296k
    std::for_each(contexts.begin(), contexts.end(),
421
1.25M
                  [](auto&& context) { context->_memory_usage = 0; });
422
296k
}
423
424
13.2k
void VExprContext::prepare_ann_range_search(const doris::VectorSearchUserParams& params) {
425
13.2k
    if (_root == nullptr) {
426
0
        return;
427
0
    }
428
429
13.2k
    _root->prepare_ann_range_search(params, _ann_range_search_runtime, _suitable_for_ann_index);
430
13.2k
    VLOG_DEBUG << fmt::format("Prepare ann range search result {}, _suitable_for_ann_index {}",
431
3
                              this->_ann_range_search_runtime.to_string(),
432
3
                              this->_suitable_for_ann_index);
433
13.2k
    return;
434
13.2k
}
435
436
Status VExprContext::evaluate_ann_range_search(
437
        const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators,
438
        const std::vector<ColumnId>& idx_to_cid,
439
        const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators,
440
        const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>&
441
                common_expr_to_slotref_map,
442
12.8k
        roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats) {
443
12.8k
    if (_root == nullptr) {
444
0
        return Status::OK();
445
0
    }
446
447
12.8k
    RETURN_IF_ERROR(_root->evaluate_ann_range_search(
448
12.8k
            _ann_range_search_runtime, cid_to_index_iterators, idx_to_cid, column_iterators,
449
12.8k
            row_bitmap, ann_index_stats));
450
451
12.8k
    if (!_root->ann_range_search_executedd()) {
452
12.8k
        return Status::OK();
453
12.8k
    }
454
455
70
    if (!_root->ann_dist_is_fulfilled()) {
456
        // Do not perform index scan in this case.
457
9
        return Status::OK();
458
9
    }
459
460
61
    auto src_col_idx = _ann_range_search_runtime.src_col_idx;
461
61
    auto slot_ref_map_it = common_expr_to_slotref_map.find(this);
462
61
    if (slot_ref_map_it == common_expr_to_slotref_map.end()) {
463
1
        return Status::OK();
464
1
    }
465
60
    auto& slot_ref_map = slot_ref_map_it->second;
466
60
    ColumnId cid = idx_to_cid[src_col_idx];
467
60
    if (slot_ref_map.find(cid) == slot_ref_map.end()) {
468
6
        return Status::OK();
469
6
    }
470
54
    const VExpr* slot_ref_expr_addr = slot_ref_map.find(cid)->second;
471
54
    _index_context->set_true_for_index_status(slot_ref_expr_addr, idx_to_cid[cid]);
472
473
54
    VLOG_DEBUG << fmt::format(
474
16
            "Evaluate ann range search for expr {}, src_col_idx {}, cid {}, row_bitmap "
475
16
            "cardinality {}",
476
16
            _root->debug_string(), src_col_idx, cid, row_bitmap.cardinality());
477
54
    return Status::OK();
478
60
}
479
480
528k
uint64_t VExprContext::get_digest(uint64_t seed) const {
481
528k
    return _root->get_digest(seed);
482
528k
}
483
484
1.24M
double VExprContext::execute_cost() const {
485
1.24M
    if (_root == nullptr) {
486
        // When there is no expression root, treat the cost as a base value.
487
        // This avoids null dereferences while keeping a deterministic cost.
488
0
        return 0.0;
489
0
    }
490
1.24M
    return _root->execute_cost();
491
1.24M
}
492
493
} // namespace doris