be/src/exprs/vexpr_context.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "exprs/vexpr_context.h" |
19 | | |
20 | | #include <algorithm> |
21 | | #include <cstdint> |
22 | | #include <string> |
23 | | |
24 | | #include "common/compiler_util.h" // IWYU pragma: keep |
25 | | #include "common/exception.h" |
26 | | #include "common/status.h" |
27 | | #include "core/block/column_numbers.h" |
28 | | #include "core/block/column_with_type_and_name.h" |
29 | | #include "core/block/columns_with_type_and_name.h" |
30 | | #include "core/column/column.h" |
31 | | #include "core/column/column_const.h" |
32 | | #include "exec/common/util.hpp" |
33 | | #include "exprs/function_context.h" |
34 | | #include "exprs/vexpr.h" |
35 | | #include "runtime/runtime_state.h" |
36 | | #include "runtime/thread_context.h" |
37 | | #include "storage/olap_common.h" |
38 | | #include "storage/segment/column_reader.h" |
39 | | #include "util/simd/bits.h" |
40 | | |
41 | | namespace doris { |
42 | | class RowDescriptor; |
43 | | } // namespace doris |
44 | | |
45 | | namespace doris { |
46 | | |
47 | 28.9M | VExprContext::~VExprContext() { |
48 | | // In runtime filter, only create expr context to get expr root, will not call |
49 | | // prepare or open, so that it is not need to call close. And call close may core |
50 | | // because the function context in expr is not set. |
51 | 28.9M | if (!_prepared || !_opened) { |
52 | 246k | return; |
53 | 246k | } |
54 | 28.7M | try { |
55 | 28.7M | close(); |
56 | 28.7M | } catch (const Exception& e) { |
57 | 0 | LOG(WARNING) << "Exception occurs when expr context deconstruct: " << e.to_string(); |
58 | 0 | } |
59 | 28.7M | } |
60 | | |
61 | 1.54M | Status VExprContext::execute(Block* block, int* result_column_id) { |
62 | 1.54M | Status st; |
63 | 1.54M | RETURN_IF_CATCH_EXCEPTION({ |
64 | 1.54M | st = _root->execute(this, block, result_column_id); |
65 | 1.54M | _last_result_column_id = *result_column_id; |
66 | | // We should first check the status, as some expressions might incorrectly set result_column_id, even if the st is not ok. |
67 | 1.54M | if (st.ok() && _last_result_column_id != -1) { |
68 | 1.54M | block->get_by_position(*result_column_id).column->sanity_check(); |
69 | 1.54M | RETURN_IF_ERROR( |
70 | 1.54M | block->get_by_position(*result_column_id).check_type_and_column_match()); |
71 | 1.54M | } |
72 | 1.54M | }); |
73 | 1.53M | return st; |
74 | 1.54M | } |
75 | | |
76 | 2.70M | Status VExprContext::execute(const Block* block, ColumnPtr& result_column) { |
77 | 2.70M | Status st; |
78 | 2.70M | RETURN_IF_CATCH_EXCEPTION( |
79 | 2.70M | { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); }); |
80 | 2.70M | return st; |
81 | 2.70M | } |
82 | | |
83 | 34.3k | Status VExprContext::execute(const Block* block, ColumnWithTypeAndName& result_data) { |
84 | 34.3k | Status st; |
85 | 34.3k | ColumnPtr result_column; |
86 | 34.3k | RETURN_IF_CATCH_EXCEPTION( |
87 | 34.3k | { st = _root->execute_column(this, block, nullptr, block->rows(), result_column); }); |
88 | 34.3k | RETURN_IF_ERROR(st); |
89 | 34.3k | result_data.column = result_column; |
90 | 34.3k | result_data.type = execute_type(block); |
91 | 34.3k | result_data.name = _root->expr_name(); |
92 | 34.3k | return Status::OK(); |
93 | 34.3k | } |
94 | | |
95 | 1.21M | DataTypePtr VExprContext::execute_type(const Block* block) { |
96 | 1.21M | return _root->execute_type(block); |
97 | 1.21M | } |
98 | | |
99 | 1.12M | Status VExprContext::execute_const_expr(ColumnWithTypeAndName& result) { |
100 | 1.12M | Status st; |
101 | 1.12M | RETURN_IF_CATCH_EXCEPTION( |
102 | 1.12M | { st = _root->execute_column(this, nullptr, nullptr, 1, result.column); }); |
103 | 1.12M | RETURN_IF_ERROR(st); |
104 | 1.12M | result.type = _root->execute_type(nullptr); |
105 | 1.12M | result.name = _root->expr_name(); |
106 | 1.12M | return Status::OK(); |
107 | 1.12M | } |
108 | | |
109 | 1.18M | [[nodiscard]] const std::string& VExprContext::expr_name() const { |
110 | 1.18M | return _root->expr_name(); |
111 | 1.18M | } |
112 | | |
113 | 0 | bool VExprContext::is_blockable() const { |
114 | 0 | return _root->is_blockable(); |
115 | 0 | } |
116 | | |
117 | 7.40M | Status VExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc) { |
118 | 7.40M | _prepared = true; |
119 | 7.40M | Status st; |
120 | 7.40M | RETURN_IF_CATCH_EXCEPTION({ st = _root->prepare(state, row_desc, this); }); |
121 | 7.40M | return st; |
122 | 7.40M | } |
123 | | |
124 | 7.41M | Status VExprContext::open(RuntimeState* state) { |
125 | 7.41M | DCHECK(_prepared); |
126 | 7.41M | if (_opened) { |
127 | 46 | return Status::OK(); |
128 | 46 | } |
129 | 7.41M | _opened = true; |
130 | | // Fragment-local state is only initialized for original contexts. Clones inherit the |
131 | | // original's fragment state and only need to have thread-local state initialized. |
132 | 7.41M | FunctionContext::FunctionStateScope scope = |
133 | 7.41M | _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; |
134 | 7.41M | Status st; |
135 | 7.41M | RETURN_IF_CATCH_EXCEPTION({ st = _root->open(state, this, scope); }); |
136 | 7.40M | return st; |
137 | 7.41M | } |
138 | | |
139 | 28.7M | void VExprContext::close() { |
140 | | // Sometimes expr context may not have a root, then it need not call close |
141 | 28.7M | if (_root == nullptr) { |
142 | 0 | return; |
143 | 0 | } |
144 | 28.7M | FunctionContext::FunctionStateScope scope = |
145 | 28.7M | _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; |
146 | 28.7M | _root->close(this, scope); |
147 | 28.7M | } |
148 | | |
149 | 21.1M | Status VExprContext::clone(RuntimeState* state, VExprContextSPtr& new_ctx) { |
150 | 18.4E | DCHECK(_prepared) << "expr context not prepared"; |
151 | 21.1M | DCHECK(_opened); |
152 | 21.1M | DCHECK(new_ctx.get() == nullptr); |
153 | | |
154 | 21.1M | new_ctx = std::make_shared<VExprContext>(_root); |
155 | 21.1M | for (auto& _fn_context : _fn_contexts) { |
156 | 1.56M | new_ctx->_fn_contexts.push_back(_fn_context->clone()); |
157 | 1.56M | } |
158 | | |
159 | 21.1M | new_ctx->_is_clone = true; |
160 | 21.1M | new_ctx->_prepared = true; |
161 | 21.1M | new_ctx->_opened = true; |
162 | | // segment_v2::AnnRangeSearchRuntime should be cloned as well. |
163 | | // The object of segment_v2::AnnRangeSearchRuntime is not shared by threads. |
164 | 21.1M | new_ctx->_ann_range_search_runtime = this->_ann_range_search_runtime; |
165 | 21.1M | new_ctx->_scan_filter_handle = _scan_filter_handle; |
166 | | |
167 | 21.1M | return _root->open(state, new_ctx.get(), FunctionContext::THREAD_LOCAL); |
168 | 21.1M | } |
169 | | |
170 | 0 | void VExprContext::clone_fn_contexts(VExprContext* other) { |
171 | 0 | for (auto& _fn_context : _fn_contexts) { |
172 | 0 | other->_fn_contexts.push_back(_fn_context->clone()); |
173 | 0 | } |
174 | 0 | } |
175 | | |
176 | | int VExprContext::register_function_context(RuntimeState* state, const DataTypePtr& return_type, |
177 | 885k | const std::vector<DataTypePtr>& arg_types) { |
178 | 885k | _fn_contexts.push_back(FunctionContext::create_context(state, return_type, arg_types)); |
179 | 885k | _fn_contexts.back()->set_check_overflow_for_decimal(state->check_overflow_for_decimal()); |
180 | 885k | _fn_contexts.back()->set_enable_strict_mode(state->enable_strict_mode()); |
181 | 885k | return static_cast<int>(_fn_contexts.size()) - 1; |
182 | 885k | } |
183 | | |
184 | 18.7k | Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) { |
185 | 18.7k | Status st; |
186 | 18.7k | RETURN_IF_CATCH_EXCEPTION({ st = _root->evaluate_inverted_index(this, segment_num_rows); }); |
187 | 18.7k | return st; |
188 | 18.7k | } |
189 | | |
190 | 18.3k | bool VExprContext::all_expr_inverted_index_evaluated() { |
191 | 18.3k | return _index_context->has_index_result_for_expr(_root.get()); |
192 | 18.3k | } |
193 | | |
194 | 50 | Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block) { |
195 | 50 | if (vexpr_ctx == nullptr || block->rows() == 0) { |
196 | 0 | return Status::OK(); |
197 | 0 | } |
198 | 50 | ColumnPtr filter_column; |
199 | 50 | RETURN_IF_ERROR(vexpr_ctx->execute(block, filter_column)); |
200 | 50 | size_t filter_column_id = block->columns(); |
201 | 50 | block->insert({filter_column, vexpr_ctx->execute_type(block), "filter_column"}); |
202 | 50 | vexpr_ctx->_memory_usage = filter_column->allocated_bytes(); |
203 | 50 | return Block::filter_block(block, filter_column_id, filter_column_id); |
204 | 50 | } |
205 | | |
206 | | Status VExprContext::filter_block(const VExprContextSPtrs& expr_contexts, Block* block, |
207 | | size_t column_to_keep, |
208 | 1.86M | std::optional<ScanFilterStage> scan_filter_stage) { |
209 | 1.86M | if (expr_contexts.empty() || block->rows() == 0) { |
210 | 1.78M | return Status::OK(); |
211 | 1.78M | } |
212 | | |
213 | 75.7k | ColumnNumbers columns_to_filter(column_to_keep); |
214 | 75.7k | std::iota(columns_to_filter.begin(), columns_to_filter.end(), 0); |
215 | | |
216 | 75.7k | return execute_conjuncts_and_filter_block(expr_contexts, block, columns_to_filter, |
217 | 75.7k | static_cast<int>(column_to_keep), scan_filter_stage); |
218 | 1.86M | } |
219 | | |
220 | | Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, |
221 | | const std::vector<IColumn::Filter*>* filters, Block* block, |
222 | | IColumn::Filter* result_filter, bool* can_filter_all, |
223 | 262k | std::optional<ScanFilterStage> scan_filter_stage) { |
224 | 262k | return execute_conjuncts(ctxs, filters, false, block, result_filter, can_filter_all, |
225 | 262k | scan_filter_stage); |
226 | 262k | } |
227 | | |
228 | | Status VExprContext::execute_filter(const Block* block, uint8_t* __restrict result_filter_data, |
229 | 729k | size_t rows, bool accept_null, bool* can_filter_all) { |
230 | 729k | return _root->execute_filter(this, block, result_filter_data, rows, accept_null, |
231 | 729k | can_filter_all); |
232 | 729k | } |
233 | | |
234 | | Status VExprContext::execute_conjuncts(const VExprContextSPtrs& ctxs, |
235 | | const std::vector<IColumn::Filter*>* filters, |
236 | | bool accept_null, const Block* block, |
237 | | IColumn::Filter* result_filter, bool* can_filter_all, |
238 | 678k | std::optional<ScanFilterStage> scan_filter_stage) { |
239 | 678k | size_t rows = block->rows(); |
240 | 678k | DCHECK_EQ(result_filter->size(), rows); |
241 | 678k | *can_filter_all = false; |
242 | 678k | auto* __restrict result_filter_data = result_filter->data(); |
243 | 729k | for (const auto& ctx : ctxs) { |
244 | 729k | const bool collect_scan_filter_stats = |
245 | 729k | scan_filter_stage.has_value() && ctx->scan_filter_handle(); |
246 | 729k | const int64_t input_rows = |
247 | 729k | collect_scan_filter_stats |
248 | 729k | ? std::count(result_filter_data, result_filter_data + rows, 1) |
249 | 729k | : 0; |
250 | 729k | RETURN_IF_ERROR( |
251 | 729k | ctx->execute_filter(block, result_filter_data, rows, accept_null, can_filter_all)); |
252 | 729k | if (collect_scan_filter_stats) { |
253 | 1.63k | const int64_t output_rows = |
254 | 1.63k | *can_filter_all ? 0 |
255 | 1.63k | : std::count(result_filter_data, result_filter_data + rows, 1); |
256 | 1.63k | ctx->scan_filter_handle().stats->record(*scan_filter_stage, input_rows, output_rows); |
257 | 1.63k | } |
258 | 729k | if (*can_filter_all) { |
259 | 220k | return Status::OK(); |
260 | 220k | } |
261 | 729k | } |
262 | 458k | if (filters != nullptr) { |
263 | 72.1k | for (auto* filter : *filters) { |
264 | 19.0k | auto* __restrict filter_data = filter->data(); |
265 | 19.0k | const size_t size = filter->size(); |
266 | 827k | for (size_t i = 0; i < size; ++i) { |
267 | 808k | result_filter_data[i] &= filter_data[i]; |
268 | 808k | } |
269 | 19.0k | if (memchr(result_filter_data, 0x1, size) == nullptr) { |
270 | 18.1k | *can_filter_all = true; |
271 | 18.1k | return Status::OK(); |
272 | 18.1k | } |
273 | 19.0k | } |
274 | 72.1k | } |
275 | 439k | return Status::OK(); |
276 | 458k | } |
277 | | |
278 | | Status VExprContext::execute_conjuncts(const VExprContextSPtrs& conjuncts, const Block* block, |
279 | 378 | ColumnUInt8& null_map, IColumn::Filter& filter) { |
280 | 378 | const auto& rows = block->rows(); |
281 | 378 | if (rows == 0) { |
282 | 0 | return Status::OK(); |
283 | 0 | } |
284 | 378 | if (null_map.size() != rows) { |
285 | 0 | return Status::InternalError("null_map.size()!=rows, null_map.size()={}, rows={}", |
286 | 0 | null_map.size(), rows); |
287 | 0 | } |
288 | | |
289 | 378 | auto* final_null_map = null_map.get_data().data(); |
290 | 378 | auto* final_filter_ptr = filter.data(); |
291 | | |
292 | 378 | for (const auto& conjunct : conjuncts) { |
293 | 66 | ColumnPtr result_column; |
294 | 66 | RETURN_IF_ERROR(conjunct->execute(block, result_column)); |
295 | 66 | auto [filter_column, is_const] = unpack_if_const(result_column); |
296 | 66 | const auto* nullable_column = assert_cast<const ColumnNullable*>(filter_column.get()); |
297 | 66 | if (!is_const) { |
298 | 57 | const ColumnPtr& nested_column = nullable_column->get_nested_column_ptr(); |
299 | 57 | const IColumn::Filter& result = |
300 | 57 | assert_cast<const ColumnUInt8&>(*nested_column).get_data(); |
301 | 57 | const auto* __restrict filter_data = result.data(); |
302 | 57 | const auto* __restrict null_map_data = nullable_column->get_null_map_data().data(); |
303 | 57 | DCHECK_EQ(rows, nullable_column->size()); |
304 | | |
305 | 634 | for (size_t i = 0; i != rows; ++i) { |
306 | | // null and null => null |
307 | | // null and true => null |
308 | | // null and false => false |
309 | 577 | final_null_map[i] = (final_null_map[i] & (null_map_data[i] | filter_data[i])) | |
310 | 577 | (null_map_data[i] & (final_null_map[i] | final_filter_ptr[i])); |
311 | 577 | final_filter_ptr[i] = final_filter_ptr[i] & filter_data[i]; |
312 | 577 | } |
313 | 57 | } else { |
314 | 9 | bool filter_data = nullable_column->get_bool(0); |
315 | 9 | bool null_map_data = nullable_column->is_null_at(0); |
316 | 28 | for (size_t i = 0; i != rows; ++i) { |
317 | | // null and null => null |
318 | | // null and true => null |
319 | | // null and false => false |
320 | 19 | final_null_map[i] = (final_null_map[i] & (null_map_data | filter_data)) | |
321 | 19 | (null_map_data & (final_null_map[i] | final_filter_ptr[i])); |
322 | 19 | final_filter_ptr[i] = final_filter_ptr[i] & filter_data; |
323 | 19 | } |
324 | 9 | } |
325 | 66 | } |
326 | 378 | return Status::OK(); |
327 | 378 | } |
328 | | |
329 | | // TODO Performance Optimization |
330 | | // need exception safety |
331 | | Status VExprContext::execute_conjuncts_and_filter_block( |
332 | | const VExprContextSPtrs& ctxs, Block* block, std::vector<uint32_t>& columns_to_filter, |
333 | 77.7k | int column_to_keep, std::optional<ScanFilterStage> scan_filter_stage) { |
334 | 77.7k | IColumn::Filter result_filter(block->rows(), 1); |
335 | 77.7k | bool can_filter_all; |
336 | | |
337 | 77.7k | _reset_memory_usage(ctxs); |
338 | | |
339 | 77.7k | RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &result_filter, &can_filter_all, |
340 | 77.7k | scan_filter_stage)); |
341 | | |
342 | | // Accumulate the usage of `result_filter` into the first context. |
343 | 77.7k | if (!ctxs.empty()) { |
344 | 77.7k | ctxs[0]->_memory_usage += result_filter.allocated_bytes(); |
345 | 77.7k | } |
346 | 77.7k | if (can_filter_all) { |
347 | 38.9k | for (auto& col : columns_to_filter) { |
348 | 38.9k | auto& column = block->get_by_position(col).column; |
349 | 38.9k | if (column->is_exclusive()) { |
350 | 35.8k | column->assert_mutable()->clear(); |
351 | 35.8k | } else { |
352 | 3.10k | column = column->clone_empty(); |
353 | 3.10k | } |
354 | 38.9k | } |
355 | 68.3k | } else { |
356 | 68.3k | try { |
357 | 68.3k | Block::filter_block_internal(block, columns_to_filter, result_filter); |
358 | 68.3k | } catch (const Exception& e) { |
359 | 0 | std::string str; |
360 | 0 | for (auto ctx : ctxs) { |
361 | 0 | if (str.length()) { |
362 | 0 | str += ","; |
363 | 0 | } |
364 | 0 | str += ctx->root()->debug_string(); |
365 | 0 | } |
366 | |
|
367 | 0 | return Status::InternalError( |
368 | 0 | "filter_block_internal meet exception, exprs=[{}], exception={}", str, |
369 | 0 | e.what()); |
370 | 0 | } |
371 | 68.3k | } |
372 | 77.7k | Block::erase_useless_column(block, column_to_keep); |
373 | 77.7k | return Status::OK(); |
374 | 77.7k | } |
375 | | |
376 | | Status VExprContext::execute_conjuncts_and_filter_block( |
377 | | const VExprContextSPtrs& ctxs, Block* block, std::vector<uint32_t>& columns_to_filter, |
378 | | int column_to_keep, IColumn::Filter& filter, |
379 | 13.6k | std::optional<ScanFilterStage> scan_filter_stage) { |
380 | 13.6k | _reset_memory_usage(ctxs); |
381 | 13.6k | filter.resize_fill(block->rows(), 1); |
382 | 13.6k | bool can_filter_all; |
383 | 13.6k | RETURN_IF_ERROR(execute_conjuncts(ctxs, nullptr, false, block, &filter, &can_filter_all, |
384 | 13.6k | scan_filter_stage)); |
385 | | |
386 | | // Accumulate the usage of `result_filter` into the first context. |
387 | 13.6k | if (!ctxs.empty()) { |
388 | 13.1k | ctxs[0]->_memory_usage += filter.allocated_bytes(); |
389 | 13.1k | } |
390 | 13.6k | if (can_filter_all) { |
391 | 6.89k | for (auto& col : columns_to_filter) { |
392 | 6.89k | auto& column = block->get_by_position(col).column; |
393 | 6.89k | if (column->is_exclusive()) { |
394 | 6.89k | column->assert_mutable()->clear(); |
395 | 6.89k | } else { |
396 | 1 | column = column->clone_empty(); |
397 | 1 | } |
398 | 6.89k | } |
399 | 9.87k | } else { |
400 | 9.87k | RETURN_IF_CATCH_EXCEPTION(Block::filter_block_internal(block, columns_to_filter, filter)); |
401 | 9.87k | } |
402 | | |
403 | 13.6k | Block::erase_useless_column(block, column_to_keep); |
404 | 13.6k | return Status::OK(); |
405 | 13.6k | } |
406 | | |
407 | | // do_projection: for some query(e.g. in MultiCastDataStreamerSourceOperator::get_block()), |
408 | | // output_vexpr_ctxs will output the same column more than once, and if the output_block |
409 | | // is mem-reused later, it will trigger DCHECK_EQ(d.column->use_count(), 1) failure when |
410 | | // doing Block::clear_column_data, set do_projection to true to copy the column data to |
411 | | // avoid this problem. |
412 | | Status VExprContext::get_output_block_after_execute_exprs( |
413 | | const VExprContextSPtrs& output_vexpr_ctxs, const Block& input_block, Block* output_block, |
414 | 237k | bool do_projection) { |
415 | 237k | auto rows = input_block.rows(); |
416 | 237k | ColumnsWithTypeAndName result_columns; |
417 | 237k | _reset_memory_usage(output_vexpr_ctxs); |
418 | | |
419 | 1.17M | for (const auto& vexpr_ctx : output_vexpr_ctxs) { |
420 | 1.17M | ColumnPtr result_column; |
421 | 1.17M | RETURN_IF_ERROR(vexpr_ctx->execute(&input_block, result_column)); |
422 | | |
423 | 1.17M | auto type = vexpr_ctx->execute_type(&input_block); |
424 | 1.17M | const auto& name = vexpr_ctx->expr_name(); |
425 | | |
426 | 1.17M | vexpr_ctx->_memory_usage += result_column->allocated_bytes(); |
427 | 1.17M | if (do_projection) { |
428 | 28.7k | result_columns.emplace_back(result_column->clone_resized(rows), type, name); |
429 | | |
430 | 1.14M | } else { |
431 | 1.14M | result_columns.emplace_back(result_column, type, name); |
432 | 1.14M | } |
433 | 1.17M | } |
434 | 237k | *output_block = {result_columns}; |
435 | 237k | return Status::OK(); |
436 | 237k | } |
437 | | |
438 | 329k | void VExprContext::_reset_memory_usage(const VExprContextSPtrs& contexts) { |
439 | 329k | std::for_each(contexts.begin(), contexts.end(), |
440 | 1.32M | [](auto&& context) { context->_memory_usage = 0; }); |
441 | 329k | } |
442 | | |
443 | 22.2k | void VExprContext::prepare_ann_range_search(const doris::VectorSearchUserParams& params) { |
444 | 22.2k | if (_root == nullptr) { |
445 | 0 | return; |
446 | 0 | } |
447 | | |
448 | 22.2k | _root->prepare_ann_range_search(params, _ann_range_search_runtime, _suitable_for_ann_index); |
449 | 18.4E | VLOG_DEBUG << fmt::format("Prepare ann range search result {}, _suitable_for_ann_index {}", |
450 | 18.4E | this->_ann_range_search_runtime.to_string(), |
451 | 18.4E | this->_suitable_for_ann_index); |
452 | 22.2k | return; |
453 | 22.2k | } |
454 | | |
455 | | Status VExprContext::evaluate_ann_range_search( |
456 | | const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators, |
457 | | const std::vector<ColumnId>& idx_to_cid, |
458 | | const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators, |
459 | | const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>& |
460 | | common_expr_to_slotref_map, |
461 | | roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats, |
462 | 18.4k | bool enable_result_cache, bool* ann_range_search_executed) { |
463 | 18.4k | if (ann_range_search_executed != nullptr) { |
464 | 18.4k | *ann_range_search_executed = false; |
465 | 18.4k | } |
466 | 18.4k | if (_root == nullptr) { |
467 | 0 | return Status::OK(); |
468 | 0 | } |
469 | | |
470 | 18.4k | AnnRangeSearchEvaluationResult evaluation_result; |
471 | 18.4k | RETURN_IF_ERROR(_root->evaluate_ann_range_search( |
472 | 18.4k | _ann_range_search_runtime, cid_to_index_iterators, idx_to_cid, column_iterators, |
473 | 18.4k | row_bitmap, ann_index_stats, enable_result_cache, evaluation_result)); |
474 | | |
475 | 18.4k | if (!evaluation_result.executed) { |
476 | 18.4k | return Status::OK(); |
477 | 18.4k | } |
478 | 33 | if (ann_range_search_executed != nullptr) { |
479 | 23 | *ann_range_search_executed = true; |
480 | 23 | } |
481 | | |
482 | 33 | DCHECK(_index_context != nullptr); |
483 | 33 | _index_context->set_index_result_for_expr( |
484 | 33 | _root.get(), |
485 | 33 | segment_v2::InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(row_bitmap), |
486 | 33 | std::make_shared<roaring::Roaring>())); |
487 | | |
488 | 33 | if (!evaluation_result.dist_fulfilled) { |
489 | | // Do not perform index scan in this case. |
490 | 0 | return Status::OK(); |
491 | 0 | } |
492 | | |
493 | 33 | DCHECK_LT(_ann_range_search_runtime.src_col_idx, idx_to_cid.size()); |
494 | 33 | const auto src_col_idx = cast_set<int>(_ann_range_search_runtime.src_col_idx); |
495 | 33 | const auto src_col_key = cast_set<ColumnId>(_ann_range_search_runtime.src_col_idx); |
496 | 33 | auto slot_ref_map_it = common_expr_to_slotref_map.find(this); |
497 | 33 | if (slot_ref_map_it == common_expr_to_slotref_map.end()) { |
498 | 0 | return Status::OK(); |
499 | 0 | } |
500 | 33 | auto& slot_ref_map = slot_ref_map_it->second; |
501 | 33 | auto slot_ref_it = slot_ref_map.find(src_col_key); |
502 | 33 | if (slot_ref_it == slot_ref_map.end()) { |
503 | 0 | return Status::OK(); |
504 | 0 | } |
505 | 33 | const VExpr* slot_ref_expr_addr = slot_ref_it->second; |
506 | 33 | _index_context->set_true_for_index_status(slot_ref_expr_addr, src_col_idx); |
507 | | |
508 | 33 | VLOG_DEBUG << fmt::format( |
509 | 10 | "Evaluate ann range search for expr {}, src_col_idx {}, cid {}, row_bitmap " |
510 | 10 | "cardinality {}", |
511 | 10 | _root->debug_string(), src_col_idx, idx_to_cid[_ann_range_search_runtime.src_col_idx], |
512 | 10 | row_bitmap.cardinality()); |
513 | 33 | return Status::OK(); |
514 | 33 | } |
515 | | |
516 | 499k | uint64_t VExprContext::get_digest(uint64_t seed) const { |
517 | 499k | return _root->get_digest(seed); |
518 | 499k | } |
519 | | |
520 | 1.25M | double VExprContext::execute_cost() const { |
521 | 1.25M | if (_root == nullptr) { |
522 | | // When there is no expression root, treat the cost as a base value. |
523 | | // This avoids null dereferences while keeping a deterministic cost. |
524 | 0 | return 0.0; |
525 | 0 | } |
526 | 1.25M | return _root->execute_cost(); |
527 | 1.25M | } |
528 | | |
529 | | } // namespace doris |