Coverage Report

Created: 2026-06-27 16:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/vexpr_context.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/vexpr_context.h"
19
20
#include <algorithm>
21
#include <cstdint>
22
#include <string>
23
24
#include "common/compiler_util.h" // IWYU pragma: keep
25
#include "common/exception.h"
26
#include "common/status.h"
27
#include "core/block/column_numbers.h"
28
#include "core/block/column_with_type_and_name.h"
29
#include "core/block/columns_with_type_and_name.h"
30
#include "core/column/column.h"
31
#include "core/column/column_const.h"
32
#include "exec/common/util.hpp"
33
#include "exprs/function_context.h"
34
#include "exprs/vexpr.h"
35
#include "runtime/runtime_state.h"
36
#include "runtime/thread_context.h"
37
#include "storage/olap_common.h"
38
#include "storage/segment/column_reader.h"
39
#include "util/simd/bits.h"
40
41
namespace doris {
42
class RowDescriptor;
43
} // namespace doris
44
45
namespace doris {
46
47
18.9M
VExprContext::~VExprContext() {
48
    // In runtime filter, only create expr context to get expr root, will not call
49
    // prepare or open, so that it is not need to call close. And call close may core
50
    // because the function context in expr is not set.
51
18.9M
    if (!_prepared || !_opened) {
52
242k
        return;
53
242k
    }
54
18.7M
    try {
55
18.7M
        close();
56
18.7M
    } catch (const Exception& e) {
57
0
        LOG(WARNING) << "Exception occurs when expr context deconstruct: " << e.to_string();
58
0
    }
59
18.7M
}
60
61
1.07M
Status VExprContext::execute(Block* block, int* result_column_id) {
62
1.07M
    Status st;
63
1.07M
    RETURN_IF_CATCH_EXCEPTION({
64
1.07M
        st = _root->execute(this, block, result_column_id);
65
1.07M
        _last_result_column_id = *result_column_id;
66
        // We should first check the status, as some expressions might incorrectly set result_column_id, even if the st is not ok.
67
1.07M
        if (st.ok() && _last_result_column_id != -1) {
68
1.07M
            block->get_by_position(*result_column_id).column->sanity_check();
69
1.07M
            RETURN_IF_ERROR(
70
1.07M
                    block->get_by_position(*result_column_id).check_type_and_column_match());
71
1.07M
        }
72
1.07M
    });
73
1.07M
    return st;
74
1.07M
}
75
76
1.91M
Status VExprContext::execute(const Block* block, ColumnPtr& result_column) {
77
1.91M
    Status st;
78
1.91M
    RETURN_IF_CATCH_EXCEPTION(
79
1.91M
            { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); });
80
1.91M
    return st;
81
1.91M
}
82
83
29.0k
Status VExprContext::execute(const Block* block, ColumnWithTypeAndName& result_data) {
84
29.0k
    Status st;
85
29.0k
    ColumnPtr result_column;
86
29.0k
    RETURN_IF_CATCH_EXCEPTION(
87
29.0k
            { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); });
88
29.0k
    RETURN_IF_ERROR(st);
89
29.0k
    result_data.column = result_column;
90
29.0k
    result_data.type = execute_type(block);
91
29.0k
    result_data.name = _root->expr_name();
92
29.0k
    return Status::OK();
93
29.0k
}
94
95
786k
DataTypePtr VExprContext::execute_type(const Block* block) {
96
786k
    return _root->execute_type(block);
97
786k
}
98
99
1.03M
Status VExprContext::execute_const_expr(ColumnWithTypeAndName& result) {
100
1.03M
    Status st;
101
1.03M
    RETURN_IF_CATCH_EXCEPTION(
102
1.03M
            { st = _root->execute_column(this, nullptr, nullptr, 1, result.column); });
103
1.03M
    RETURN_IF_ERROR(st);
104
1.03M
    result.type = _root->execute_type(nullptr);
105
1.03M
    result.name = _root->expr_name();
106
1.03M
    return Status::OK();
107
1.03M
}
108
109
757k
[[nodiscard]] const std::string& VExprContext::expr_name() const {
110
757k
    return _root->expr_name();
111
757k
}
112
113
0
bool VExprContext::is_blockable() const {
114
0
    return _root->is_blockable();
115
0
}
116
117
5.02M
Status VExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc) {
118
5.02M
    _prepared = true;
119
5.02M
    Status st;
120
5.02M
    RETURN_IF_CATCH_EXCEPTION({ st = _root->prepare(state, row_desc, this); });
121
5.02M
    return st;
122
5.02M
}
123
124
5.03M
Status VExprContext::open(RuntimeState* state) {
125
5.03M
    DCHECK(_prepared);
126
5.03M
    if (_opened) {
127
47
        return Status::OK();
128
47
    }
129
5.03M
    _opened = true;
130
    // Fragment-local state is only initialized for original contexts. Clones inherit the
131
    // original's fragment state and only need to have thread-local state initialized.
132
5.03M
    FunctionContext::FunctionStateScope scope =
133
5.03M
            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
134
5.03M
    Status st;
135
5.03M
    RETURN_IF_CATCH_EXCEPTION({ st = _root->open(state, this, scope); });
136
5.03M
    return st;
137
5.03M
}
138
139
18.7M
void VExprContext::close() {
140
    // Sometimes expr context may not have a root, then it need not call close
141
18.7M
    if (_root == nullptr) {
142
0
        return;
143
0
    }
144
18.7M
    FunctionContext::FunctionStateScope scope =
145
18.7M
            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
146
18.7M
    _root->close(this, scope);
147
18.7M
}
148
149
13.6M
Status VExprContext::clone(RuntimeState* state, VExprContextSPtr& new_ctx) {
150
18.4E
    DCHECK(_prepared) << "expr context not prepared";
151
13.6M
    DCHECK(_opened);
152
13.6M
    DCHECK(new_ctx.get() == nullptr);
153
154
13.6M
    new_ctx = std::make_shared<VExprContext>(_root);
155
13.6M
    for (auto& _fn_context : _fn_contexts) {
156
1.19M
        new_ctx->_fn_contexts.push_back(_fn_context->clone());
157
1.19M
    }
158
159
13.6M
    new_ctx->_is_clone = true;
160
13.6M
    new_ctx->_prepared = true;
161
13.6M
    new_ctx->_opened = true;
162
    // segment_v2::AnnRangeSearchRuntime should be cloned as well.
163
    // The object of segment_v2::AnnRangeSearchRuntime is not shared by threads.
164
13.6M
    new_ctx->_ann_range_search_runtime = this->_ann_range_search_runtime;
165
166
13.6M
    return _root->open(state, new_ctx.get(), FunctionContext::THREAD_LOCAL);
167
13.6M
}
168
169
0
void VExprContext::clone_fn_contexts(VExprContext* other) {
170
0
    for (auto& _fn_context : _fn_contexts) {
171
0
        other->_fn_contexts.push_back(_fn_context->clone());
172
0
    }
173
0
}
174
175
int VExprContext::register_function_context(RuntimeState* state, const DataTypePtr& return_type,
176
603k
                                            const std::vector<DataTypePtr>& arg_types) {
177
603k
    _fn_contexts.push_back(FunctionContext::create_context(state, return_type, arg_types));
178
603k
    _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal());
179
603k
    _fn_contexts.back()->set_enable_strict_mode(state->enable_strict_mode());
180
603k
    return static_cast<int>(_fn_contexts.size()) - 1;
181
603k
}
182
183
19.0k
Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) {
184
19.0k
    Status st;
185
19.0k
    RETURN_IF_CATCH_EXCEPTION({ st = _root->evaluate_inverted_index(this, segment_num_rows); });
186
19.0k
    return st;
187
19.0k
}
188
189
18.6k
bool VExprContext::all_expr_inverted_index_evaluated() {
190
18.6k
    return _index_context->has_index_result_for_expr(_root.get());
191
18.6k
}
192
193
50
Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block) {
194
50
    if (vexpr_ctx == nullptr || block->rows() == 0) {
195
0
        return Status::OK();
196
0
    }
197
50
    ColumnPtr filter_column;
198
50
    RETURN_IF_ERROR(vexpr_ctx->execute(block, filter_column));
199
50
    size_t filter_column_id = block->columns();
200
50
    block->insert({filter_column, vexpr_ctx->execute_type(block), "filter_column"});
201
50
    vexpr_ctx->_memory_usage = filter_column->allocated_bytes();
202
50
    return Block::filter_block(block, filter_column_id, filter_column_id);
203
50
}
204
205
Status VExprContext::filter_block(const VExprContextSPtrs& expr_contexts, Block* block,
206
1.37M
                                  size_t column_to_keep) {
207
1.37M
    if (expr_contexts.empty() || block->rows() == 0) {
208
1.35M
        return Status::OK();
209
1.35M
    }
210
211
19.1k
    ColumnNumbers columns_to_filter(column_to_keep);
212
19.1k
    std::iota(columns_to_filter.begin(), columns_to_filter.end(), 0);
213
214
19.1k
    return execute_conjuncts_and_filter_block(expr_contexts, block, columns_to_filter,
215
19.1k
                                              static_cast<int>(column_to_keep));
216
1.37M
}
217
218
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs,
219
                                       const std::vector<IColumn::Filter*>* filters, Block* block,
220
2.32k
                                       IColumn::Filter* result_filter, bool* can_filter_all) {
221
2.32k
    return execute_conjuncts(ctxs, filters, false, block, result_filter, can_filter_all);
222
2.32k
}
223
224
Status VExprContext::execute_filter(const Block* block, uint8_t* __restrict result_filter_data,
225
166k
                                    size_t rows, bool accept_null, bool* can_filter_all) {
226
166k
    return _root->execute_filter(this, block, result_filter_data, rows, accept_null,
227
166k
                                 can_filter_all);
228
166k
}
229
230
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs,
231
                                       const std::vector<IColumn::Filter*>* filters,
232
                                       bool accept_null, const Block* block,
233
346k
                                       IColumn::Filter* result_filter, bool* can_filter_all) {
234
346k
    size_t rows = block->rows();
235
346k
    DCHECK_EQ(result_filter->size(), rows);
236
346k
    *can_filter_all = false;
237
346k
    auto* __restrict result_filter_data = result_filter->data();
238
346k
    for (const auto& ctx : ctxs) {
239
166k
        RETURN_IF_ERROR(
240
166k
                ctx->execute_filter(block, result_filter_data, rows, accept_null, can_filter_all));
241
166k
        if (*can_filter_all) {
242
29.0k
            return Status::OK();
243
29.0k
        }
244
166k
    }
245
317k
    if (filters != nullptr) {
246
28
        for (auto* filter : *filters) {
247
0
            auto* __restrict filter_data = filter->data();
248
0
            const size_t size = filter->size();
249
0
            for (size_t i = 0; i < size; ++i) {
250
0
                result_filter_data[i] &= filter_data[i];
251
0
            }
252
0
            if (memchr(result_filter_data, 0x1, size) == nullptr) {
253
0
                *can_filter_all = true;
254
0
                return Status::OK();
255
0
            }
256
0
        }
257
28
    }
258
317k
    return Status::OK();
259
317k
}
260
261
Status VExprContext::execute_conjuncts(const VExprContextSPtrs& conjuncts, const Block* block,
262
375
                                       ColumnUInt8& null_map, IColumn::Filter& filter) {
263
375
    const auto& rows = block->rows();
264
375
    if (rows == 0) {
265
0
        return Status::OK();
266
0
    }
267
375
    if (null_map.size() != rows) {
268
0
        return Status::InternalError("null_map.size()!=rows, null_map.size()={}, rows={}",
269
0
                                     null_map.size(), rows);
270
0
    }
271
272
375
    auto* final_null_map = null_map.get_data().data();
273
375
    auto* final_filter_ptr = filter.data();
274
275
375
    for (const auto& conjunct : conjuncts) {
276
73
        ColumnPtr result_column;
277
73
        RETURN_IF_ERROR(conjunct->execute(block, result_column));
278
73
        auto [filter_column, is_const] = unpack_if_const(result_column);
279
73
        const auto* nullable_column = assert_cast<const ColumnNullable*>(filter_column.get());
280
73
        if (!is_const) {
281
61
            const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr();
282
61
            const IColumn::Filter& result =
283
61
                    assert_cast<const ColumnUInt8&>(*nested_column).get_data();
284
61
            const auto* __restrict filter_data = result.data();
285
61
            const auto* __restrict null_map_data = nullable_column->get_null_map_data().data();
286
61
            DCHECK_EQ(rows, nullable_column->size());
287
288
795
            for (size_t i = 0; i != rows; ++i) {
289
                // null and null    => null
290
                // null and true    => null
291
                // null and false   => false
292
734
                final_null_map[i] = (final_null_map[i] & (null_map_data[i] | filter_data[i])) |
293
734
                                    (null_map_data[i] & (final_null_map[i] | final_filter_ptr[i]));
294
734
                final_filter_ptr[i] = final_filter_ptr[i] & filter_data[i];
295
734
            }
296
61
        } else {
297
12
            bool filter_data = nullable_column->get_bool(0);
298
12
            bool null_map_data = nullable_column->is_null_at(0);
299
68
            for (size_t i = 0; i != rows; ++i) {
300
                // null and null    => null
301
                // null and true    => null
302
                // null and false   => false
303
56
                final_null_map[i] = (final_null_map[i] & (null_map_data | filter_data)) |
304
56
                                    (null_map_data & (final_null_map[i] | final_filter_ptr[i]));
305
56
                final_filter_ptr[i] = final_filter_ptr[i] & filter_data;
306
56
            }
307
12
        }
308
73
    }
309
375
    return Status::OK();
310
375
}
311
312
// TODO Performance Optimization
313
// need exception safety
314
Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
315
                                                        std::vector<uint32_t>& columns_to_filter,
316
20.3k
                                                        int column_to_keep) {
317
20.3k
    IColumn::Filter result_filter(block->rows(), 1);
318
20.3k
    bool can_filter_all;
319
320
20.3k
    _reset_memory_usage(ctxs);
321
322
20.3k
    RETURN_IF_ERROR(
323
20.3k
            execute_conjuncts(ctxs, nullptr, false, block, &result_filter, &can_filter_all));
324
325
    // Accumulate the usage of `result_filter` into the first context.
326
20.3k
    if (!ctxs.empty()) {
327
20.3k
        ctxs[0]->_memory_usage += result_filter.allocated_bytes();
328
20.3k
    }
329
20.3k
    if (can_filter_all) {
330
38.8k
        for (auto& col : columns_to_filter) {
331
38.8k
            auto& column = block->get_by_position(col).column;
332
38.8k
            if (column->is_exclusive()) {
333
31.1k
                column->assert_mutable()->clear();
334
31.1k
            } else {
335
7.70k
                column = column->clone_empty();
336
7.70k
            }
337
38.8k
        }
338
10.5k
    } else {
339
10.5k
        try {
340
10.5k
            Block::filter_block_internal(block, columns_to_filter, result_filter);
341
10.5k
        } catch (const Exception& e) {
342
0
            std::string str;
343
0
            for (auto ctx : ctxs) {
344
0
                if (str.length()) {
345
0
                    str += ",";
346
0
                }
347
0
                str += ctx->root()->debug_string();
348
0
            }
349
350
0
            return Status::InternalError(
351
0
                    "filter_block_internal meet exception, exprs=[{}], exception={}", str,
352
0
                    e.what());
353
0
        }
354
10.5k
    }
355
20.3k
    Block::erase_useless_column(block, column_to_keep);
356
20.3k
    return Status::OK();
357
20.3k
}
358
359
Status VExprContext::execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block,
360
                                                        std::vector<uint32_t>& columns_to_filter,
361
                                                        int column_to_keep,
362
12.4k
                                                        IColumn::Filter& filter) {
363
12.4k
    _reset_memory_usage(ctxs);
364
12.4k
    filter.resize_fill(block->rows(), 1);
365
12.4k
    bool can_filter_all;
366
12.4k
    RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &filter, &can_filter_all));
367
368
    // Accumulate the usage of `result_filter` into the first context.
369
12.4k
    if (!ctxs.empty()) {
370
12.4k
        ctxs[0]->_memory_usage += filter.allocated_bytes();
371
12.4k
    }
372
12.4k
    if (can_filter_all) {
373
5.90k
        for (auto& col : columns_to_filter) {
374
5.90k
            auto& column = block->get_by_position(col).column;
375
5.90k
            if (column->is_exclusive()) {
376
5.90k
                column->assert_mutable()->clear();
377
5.90k
            } else {
378
2
                column = column->clone_empty();
379
2
            }
380
5.90k
        }
381
9.11k
    } else {
382
9.11k
        RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, columns_to_filter, filter));
383
9.11k
    }
384
385
12.4k
    Block::erase_useless_column(block, column_to_keep);
386
12.4k
    return Status::OK();
387
12.4k
}
388
389
Status VExprContext::get_output_block_after_execute_exprs(
390
162k
        const VExprContextSPtrs& output_vexpr_ctxs, const Block& input_block, Block* output_block) {
391
162k
    ColumnsWithTypeAndName result_columns;
392
162k
    _reset_memory_usage(output_vexpr_ctxs);
393
394
757k
    for (const auto& vexpr_ctx : output_vexpr_ctxs) {
395
757k
        ColumnPtr result_column;
396
757k
        RETURN_IF_ERROR(vexpr_ctx->execute(&input_block, result_column));
397
398
757k
        auto type = vexpr_ctx->execute_type(&input_block);
399
757k
        const auto& name = vexpr_ctx->expr_name();
400
401
757k
        vexpr_ctx->_memory_usage += result_column->allocated_bytes();
402
757k
        result_columns.emplace_back(result_column, type, name);
403
757k
    }
404
162k
    *output_block = {result_columns};
405
162k
    return Status::OK();
406
162k
}
407
408
194k
void VExprContext::_reset_memory_usage(const VExprContextSPtrs& contexts) {
409
194k
    std::for_each(contexts.begin(), contexts.end(),
410
801k
                  [](auto&& context) { context->_memory_usage = 0; });
411
194k
}
412
413
21.4k
void VExprContext::prepare_ann_range_search(const doris::VectorSearchUserParams& params) {
414
21.4k
    if (_root == nullptr) {
415
0
        return;
416
0
    }
417
418
21.4k
    _root->prepare_ann_range_search(params, _ann_range_search_runtime, _suitable_for_ann_index);
419
18.4E
    VLOG_DEBUG << fmt::format("Prepare ann range search result {}, _suitable_for_ann_index {}",
420
18.4E
                              this->_ann_range_search_runtime.to_string(),
421
18.4E
                              this->_suitable_for_ann_index);
422
21.4k
    return;
423
21.4k
}
424
425
Status VExprContext::evaluate_ann_range_search(
426
        const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators,
427
        const std::vector<ColumnId>& idx_to_cid,
428
        const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators,
429
        const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>&
430
                common_expr_to_slotref_map,
431
        size_t rows_of_segment, roaring::Roaring& row_bitmap,
432
        segment_v2::AnnIndexStats& ann_index_stats, bool enable_result_cache,
433
18.7k
        bool* ann_range_search_executed) {
434
18.7k
    if (ann_range_search_executed != nullptr) {
435
18.7k
        *ann_range_search_executed = false;
436
18.7k
    }
437
18.7k
    if (_root == nullptr) {
438
0
        return Status::OK();
439
0
    }
440
441
18.7k
    AnnRangeSearchEvaluationResult evaluation_result;
442
18.7k
    RETURN_IF_ERROR(_root->evaluate_ann_range_search(
443
18.7k
            _ann_range_search_runtime, cid_to_index_iterators, idx_to_cid, column_iterators,
444
18.7k
            rows_of_segment, row_bitmap, ann_index_stats, enable_result_cache, evaluation_result));
445
446
18.7k
    if (!evaluation_result.executed) {
447
18.7k
        return Status::OK();
448
18.7k
    }
449
27
    if (ann_range_search_executed != nullptr) {
450
27
        *ann_range_search_executed = true;
451
27
    }
452
453
25
    DCHECK(_index_context != nullptr);
454
25
    _index_context->set_index_result_for_expr(
455
25
            _root.get(),
456
25
            segment_v2::InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(row_bitmap),
457
25
                                                  std::make_shared<roaring::Roaring>()));
458
459
25
    if (!evaluation_result.dist_fulfilled) {
460
        // Do not perform index scan in this case.
461
2
        return Status::OK();
462
2
    }
463
464
25
    DCHECK_LT(_ann_range_search_runtime.src_col_idx, idx_to_cid.size());
465
23
    const auto src_col_idx = cast_set<int>(_ann_range_search_runtime.src_col_idx);
466
23
    const auto src_col_key = cast_set<ColumnId>(_ann_range_search_runtime.src_col_idx);
467
23
    auto slot_ref_map_it = common_expr_to_slotref_map.find(this);
468
23
    if (slot_ref_map_it == common_expr_to_slotref_map.end()) {
469
1
        return Status::OK();
470
1
    }
471
22
    auto& slot_ref_map = slot_ref_map_it->second;
472
22
    auto slot_ref_it = slot_ref_map.find(src_col_key);
473
22
    if (slot_ref_it == slot_ref_map.end()) {
474
0
        return Status::OK();
475
0
    }
476
22
    const VExpr* slot_ref_expr_addr = slot_ref_it->second;
477
22
    _index_context->set_true_for_index_status(slot_ref_expr_addr, src_col_idx);
478
479
18.4E
    VLOG_DEBUG << fmt::format(
480
18.4E
            "Evaluate ann range search for expr {}, src_col_idx {}, cid {}, row_bitmap "
481
18.4E
            "cardinality {}",
482
18.4E
            _root->debug_string(), src_col_idx, idx_to_cid[_ann_range_search_runtime.src_col_idx],
483
18.4E
            row_bitmap.cardinality());
484
22
    return Status::OK();
485
22
}
486
487
283k
uint64_t VExprContext::get_digest(uint64_t seed) const {
488
283k
    return _root->get_digest(seed);
489
283k
}
490
491
647k
double VExprContext::execute_cost() const {
492
647k
    if (_root == nullptr) {
493
        // When there is no expression root, treat the cost as a base value.
494
        // This avoids null dereferences while keeping a deterministic cost.
495
0
        return 0.0;
496
0
    }
497
647k
    return _root->execute_cost();
498
647k
}
499
500
} // namespace doris