Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <glog/logging.h> |
21 | | |
22 | | #include <algorithm> |
23 | | #include <cstddef> |
24 | | #include <memory> |
25 | | #include <unordered_map> |
26 | | #include <utility> |
27 | | #include <vector> |
28 | | |
29 | | #include "common/factory_creator.h" |
30 | | #include "common/status.h" |
31 | | #include "core/block/block.h" |
32 | | #include "core/block/column_with_type_and_name.h" |
33 | | #include "core/column/column.h" |
34 | | #include "exec/runtime_filter/runtime_filter_selectivity.h" |
35 | | #include "exprs/function_context.h" |
36 | | #include "exprs/vexpr_fwd.h" |
37 | | #include "runtime/runtime_state.h" |
38 | | #include "storage/index/ann/ann_range_search_runtime.h" |
39 | | #include "storage/index/ann/ann_search_params.h" |
40 | | #include "storage/index/inverted/inverted_index_reader.h" |
41 | | #include "storage/index/zone_map/zonemap_filter_result.h" |
42 | | #include "storage/segment/column_reader.h" |
43 | | |
44 | | namespace doris { |
45 | | class RowDescriptor; |
46 | | class RuntimeState; |
47 | | class ZoneMapEvalContext; |
48 | | } // namespace doris |
49 | | |
50 | | namespace doris::segment_v2 { |
51 | | class Segment; |
52 | | class ColumnIterator; |
53 | | } // namespace doris::segment_v2 |
54 | | |
55 | | namespace doris { |
56 | | |
57 | | class ScoreRuntime; |
58 | | using ScoreRuntimeSPtr = std::shared_ptr<ScoreRuntime>; |
59 | | |
60 | | class IndexExecContext { |
61 | | public: |
62 | | IndexExecContext(const std::vector<ColumnId>& col_ids, |
63 | | const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& index_iterators, |
64 | | const std::vector<IndexFieldNameAndTypePair>& storage_name_and_type_vec, |
65 | | std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>>& |
66 | | common_expr_index_status, |
67 | | ScoreRuntimeSPtr score_runtime, segment_v2::Segment* segment, |
68 | | const segment_v2::ColumnIteratorOptions& column_iter_opts) |
69 | 2.82k | : _col_ids(col_ids), |
70 | 2.82k | _index_iterators(index_iterators), |
71 | 2.82k | _storage_name_and_type(storage_name_and_type_vec), |
72 | 2.82k | _expr_index_status(common_expr_index_status), |
73 | 2.82k | _score_runtime(std::move(score_runtime)), |
74 | 2.82k | _segment(segment), |
75 | 2.82k | _column_iter_opts(column_iter_opts) {} |
76 | | |
77 | 3 | segment_v2::IndexIterator* get_inverted_index_iterator_by_column_id(int column_index) const { |
78 | 3 | if (column_index < 0 || column_index >= _col_ids.size()) { |
79 | 0 | return nullptr; |
80 | 0 | } |
81 | 3 | const auto& column_id = _col_ids[column_index]; |
82 | 3 | if (column_id >= _index_iterators.size()) { |
83 | 0 | return nullptr; |
84 | 0 | } |
85 | 3 | if (!_index_iterators[column_id]) { |
86 | 1 | return nullptr; |
87 | 1 | } |
88 | 2 | return _index_iterators[column_id].get(); |
89 | 3 | } |
90 | | |
91 | 0 | segment_v2::IndexIterator* get_inverted_index_iterator_by_id(ColumnId column_id) const { |
92 | 0 | if (column_id >= _index_iterators.size()) { |
93 | 0 | return nullptr; |
94 | 0 | } |
95 | 0 | if (!_index_iterators[column_id]) { |
96 | 0 | return nullptr; |
97 | 0 | } |
98 | 0 | return _index_iterators[column_id].get(); |
99 | 0 | } |
100 | | |
101 | | const IndexFieldNameAndTypePair* get_storage_name_and_type_by_column_id( |
102 | 3 | int column_index) const { |
103 | 3 | if (column_index < 0 || column_index >= _col_ids.size()) { |
104 | 0 | return nullptr; |
105 | 0 | } |
106 | 3 | const auto& column_id = _col_ids[column_index]; |
107 | 3 | if (column_id >= _storage_name_and_type.size()) { |
108 | 2 | return nullptr; |
109 | 2 | } |
110 | 1 | return &_storage_name_and_type[column_id]; |
111 | 3 | } |
112 | | |
113 | 0 | const IndexFieldNameAndTypePair* get_storage_name_and_type_by_id(ColumnId column_id) const { |
114 | 0 | if (column_id >= _storage_name_and_type.size()) { |
115 | 0 | return nullptr; |
116 | 0 | } |
117 | 0 | return &_storage_name_and_type[column_id]; |
118 | 0 | } |
119 | | |
120 | 0 | int column_index_by_id(ColumnId column_id) const { |
121 | 0 | for (int i = 0; i < _col_ids.size(); ++i) { |
122 | 0 | if (_col_ids[i] == column_id) { |
123 | 0 | return i; |
124 | 0 | } |
125 | 0 | } |
126 | 0 | return -1; |
127 | 0 | } |
128 | | |
129 | 0 | bool get_column_id(int column_index, ColumnId* column_id) const { |
130 | 0 | if (column_id == nullptr) { |
131 | 0 | return false; |
132 | 0 | } |
133 | 0 | if (column_index < 0 || column_index >= _col_ids.size()) { |
134 | 0 | return false; |
135 | 0 | } |
136 | 0 | *column_id = _col_ids[column_index]; |
137 | 0 | return true; |
138 | 0 | } |
139 | | |
140 | 0 | segment_v2::Segment* segment() const { return _segment; } |
141 | | |
142 | 0 | const segment_v2::ColumnIteratorOptions& column_iter_opts() const { return _column_iter_opts; } |
143 | | |
144 | 1 | bool has_index_result_for_expr(const VExpr* expr) const { |
145 | 1 | return _index_result_bitmap.contains(expr); |
146 | 1 | } |
147 | | |
148 | | void set_index_result_for_expr(const VExpr* expr, |
149 | 1 | segment_v2::InvertedIndexResultBitmap bitmap) { |
150 | 1 | _index_result_bitmap[expr] = std::move(bitmap); |
151 | 1 | } |
152 | | |
153 | | std::unordered_map<const VExpr*, segment_v2::InvertedIndexResultBitmap>& |
154 | 0 | get_index_result_bitmap() { |
155 | 0 | return _index_result_bitmap; |
156 | 0 | } |
157 | | |
158 | 2 | std::unordered_map<const VExpr*, ColumnPtr>& get_index_result_column() { |
159 | 2 | return _index_result_column; |
160 | 2 | } |
161 | | |
162 | 0 | const segment_v2::InvertedIndexResultBitmap* get_index_result_for_expr(const VExpr* expr) { |
163 | 0 | auto iter = _index_result_bitmap.find(expr); |
164 | 0 | if (iter == _index_result_bitmap.end()) { |
165 | 0 | return nullptr; |
166 | 0 | } |
167 | 0 | return &iter->second; |
168 | 0 | } |
169 | | |
170 | 1 | void set_index_result_column_for_expr(const VExpr* expr, ColumnPtr column) { |
171 | 1 | _index_result_column[expr] = std::move(column); |
172 | 1 | } |
173 | | |
174 | 0 | void set_true_for_index_status(const VExpr* expr, int column_index) { |
175 | 0 | if (column_index < 0 || column_index >= _col_ids.size()) { |
176 | 0 | return; |
177 | 0 | } |
178 | 0 | const auto& column_id = _col_ids[column_index]; |
179 | 0 | if (_expr_index_status.contains(column_id)) { |
180 | 0 | if (_expr_index_status[column_id].contains(expr)) { |
181 | 0 | _expr_index_status[column_id][expr] = true; |
182 | 0 | } |
183 | 0 | } |
184 | 0 | } |
185 | | |
186 | 0 | ScoreRuntimeSPtr get_score_runtime() const { return _score_runtime; } |
187 | | |
188 | 0 | void set_analyzer_ctx_for_expr(const VExpr* expr, InvertedIndexAnalyzerCtxSPtr analyzer_ctx) { |
189 | 0 | if (expr == nullptr || analyzer_ctx == nullptr) { |
190 | 0 | return; |
191 | 0 | } |
192 | 0 | _expr_analyzer_ctx[expr] = std::move(analyzer_ctx); |
193 | 0 | } |
194 | | |
195 | 0 | const InvertedIndexAnalyzerCtx* get_analyzer_ctx_for_expr(const VExpr* expr) const { |
196 | 0 | auto iter = _expr_analyzer_ctx.find(expr); |
197 | 0 | if (iter == _expr_analyzer_ctx.end()) { |
198 | 0 | return nullptr; |
199 | 0 | } |
200 | 0 | return iter->second.get(); |
201 | 0 | } |
202 | | |
203 | 2.82k | void set_index_query_context(segment_v2::IndexQueryContextPtr index_query_context) { |
204 | 2.82k | _index_query_context = index_query_context; |
205 | 2.82k | } |
206 | | |
207 | 2 | const segment_v2::IndexQueryContextPtr& get_index_query_context() const { |
208 | 2 | return _index_query_context; |
209 | 2 | } |
210 | | |
211 | | private: |
212 | | // A reference to a vector of column IDs for the current expression's output columns. |
213 | | const std::vector<ColumnId>& _col_ids; |
214 | | |
215 | | // A reference to a vector of unique pointers to index iterators. |
216 | | const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& _index_iterators; |
217 | | |
218 | | // A reference to a vector of storage name and type pairs related to schema. |
219 | | const std::vector<IndexFieldNameAndTypePair>& _storage_name_and_type; |
220 | | |
221 | | // A map of expressions to their corresponding inverted index result bitmaps. |
222 | | std::unordered_map<const VExpr*, segment_v2::InvertedIndexResultBitmap> _index_result_bitmap; |
223 | | |
224 | | // A map of expressions to their corresponding result columns. |
225 | | std::unordered_map<const VExpr*, ColumnPtr> _index_result_column; |
226 | | |
227 | | // Per-expression analyzer context for inverted index evaluation. |
228 | | std::unordered_map<const VExpr*, InvertedIndexAnalyzerCtxSPtr> _expr_analyzer_ctx; |
229 | | |
230 | | // A reference to a map of common expressions to their inverted index evaluation status. |
231 | | std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>>& _expr_index_status; |
232 | | |
233 | | ScoreRuntimeSPtr _score_runtime; |
234 | | |
235 | | segment_v2::Segment* _segment = nullptr; // Ref |
236 | | segment_v2::ColumnIteratorOptions _column_iter_opts; |
237 | | segment_v2::IndexQueryContextPtr _index_query_context; |
238 | | }; |
239 | | |
240 | | class VExprContext { |
241 | | ENABLE_FACTORY_CREATOR(VExprContext); |
242 | | |
243 | | public: |
244 | 655k | VExprContext(VExprSPtr expr) : _root(std::move(expr)) {} |
245 | | ~VExprContext(); |
246 | | [[nodiscard]] Status prepare(RuntimeState* state, const RowDescriptor& row_desc); |
247 | | [[nodiscard]] Status open(RuntimeState* state); |
248 | | [[nodiscard]] Status clone(RuntimeState* state, VExprContextSPtr& new_ctx); |
249 | | [[nodiscard]] Status execute(Block* block, int* result_column_id); |
250 | | [[nodiscard]] Status execute(const Block* block, ColumnPtr& result_column); |
251 | | [[nodiscard]] Status execute(const Block* block, ColumnWithTypeAndName& result_data); |
252 | | [[nodiscard]] DataTypePtr execute_type(const Block* block); |
253 | | [[nodiscard]] const std::string& expr_name() const; |
254 | | [[nodiscard]] bool is_blockable() const; |
255 | | |
256 | | [[nodiscard]] Status execute_const_expr(ColumnWithTypeAndName& result); |
257 | | |
258 | | double execute_cost() const; |
259 | | |
260 | 721k | VExprSPtr root() { return _root; } |
261 | 0 | void set_root(const VExprSPtr& expr) { _root = expr; } |
262 | 7 | void set_index_context(std::shared_ptr<IndexExecContext> index_context) { |
263 | 7 | _index_context = std::move(index_context); |
264 | 7 | } |
265 | | |
266 | 100 | std::shared_ptr<IndexExecContext> get_index_context() const { return _index_context; } |
267 | | |
268 | | /// Creates a FunctionContext, and returns the index that's passed to fn_context() to |
269 | | /// retrieve the created context. Exprs that need a FunctionContext should call this in |
270 | | /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the |
271 | | /// size of the varargs buffer in the created FunctionContext (see udf-internal.h). |
272 | | int register_function_context(RuntimeState* state, const DataTypePtr& return_type, |
273 | | const std::vector<DataTypePtr>& arg_types); |
274 | | |
275 | | /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to |
276 | | /// register_function_context(). This should only be called by VExprs. |
277 | 344 | FunctionContext* fn_context(int i) { |
278 | 344 | if (i < 0 || i >= _fn_contexts.size()) { |
279 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, |
280 | 0 | "fn_context index invalid, index={}, _fn_contexts.size()={}", i, |
281 | 0 | _fn_contexts.size()); |
282 | 0 | } |
283 | 344 | return _fn_contexts[i].get(); |
284 | 344 | } |
285 | | |
286 | | // execute expr with inverted index which column a, b has inverted indexes |
287 | | // but some situation although column b has indexes, but apply index is not useful, we should |
288 | | // skip this expr, just do not apply index anymore. |
289 | | [[nodiscard]] Status evaluate_inverted_index(uint32_t segment_num_rows); |
290 | | |
291 | | [[nodiscard]] static ZoneMapFilterResult evaluate_zonemap_filter( |
292 | | const VExprContextSPtrs& conjuncts, const ZoneMapEvalContext& ctx); |
293 | | |
294 | | bool all_expr_inverted_index_evaluated(); |
295 | | |
296 | | Status execute_filter(const Block* block, uint8_t* __restrict result_filter_data, size_t rows, |
297 | | bool accept_null, bool* can_filter_all); |
298 | | |
299 | | [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block); |
300 | | |
301 | | [[nodiscard]] static Status filter_block(const VExprContextSPtrs& expr_contexts, Block* block, |
302 | | size_t column_to_keep); |
303 | | |
304 | | [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& ctxs, |
305 | | const std::vector<IColumn::Filter*>* filters, |
306 | | bool accept_null, const Block* block, |
307 | | IColumn::Filter* result_filter, |
308 | | bool* can_filter_all); |
309 | | |
310 | | [[nodiscard]] static Status execute_conjuncts(const VExprContextSPtrs& conjuncts, |
311 | | const Block* block, ColumnUInt8& null_map, |
312 | | IColumn::Filter& result_filter); |
313 | | |
314 | | static Status execute_conjuncts(const VExprContextSPtrs& ctxs, |
315 | | const std::vector<IColumn::Filter*>* filters, Block* block, |
316 | | IColumn::Filter* result_filter, bool* can_filter_all); |
317 | | |
318 | | [[nodiscard]] static Status execute_conjuncts_and_filter_block( |
319 | | const VExprContextSPtrs& ctxs, Block* block, std::vector<uint32_t>& columns_to_filter, |
320 | | int column_to_keep); |
321 | | |
322 | | static Status execute_conjuncts_and_filter_block(const VExprContextSPtrs& ctxs, Block* block, |
323 | | std::vector<uint32_t>& columns_to_filter, |
324 | | int column_to_keep, IColumn::Filter& filter); |
325 | | |
326 | | [[nodiscard]] static Status get_output_block_after_execute_exprs(const VExprContextSPtrs&, |
327 | | const Block&, Block*, |
328 | | bool do_projection = false); |
329 | | |
330 | 6 | int get_last_result_column_id() const { |
331 | 6 | DCHECK(_last_result_column_id != -1); |
332 | 6 | return _last_result_column_id; |
333 | 6 | } |
334 | | |
335 | 11 | RuntimeFilterSelectivity& get_runtime_filter_selectivity() { |
336 | 11 | if (!_rf_selectivity) { |
337 | 0 | throw Exception(ErrorCode::INTERNAL_ERROR, "RuntimeFilterSelectivity is null"); |
338 | 0 | } |
339 | 11 | return *_rf_selectivity; |
340 | 11 | } |
341 | | |
342 | 0 | FunctionContext::FunctionStateScope get_function_state_scope() const { |
343 | 0 | return _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; |
344 | 0 | } |
345 | | |
346 | | void clone_fn_contexts(VExprContext* other); |
347 | | |
348 | 0 | VExprContext& operator=(const VExprContext& other) { |
349 | 0 | if (this == &other) { |
350 | 0 | return *this; |
351 | 0 | } |
352 | 0 |
|
353 | 0 | _root = other._root; |
354 | 0 | _is_clone = other._is_clone; |
355 | 0 | _prepared = other._prepared; |
356 | 0 | _opened = other._opened; |
357 | 0 |
|
358 | 0 | for (const auto& fn : other._fn_contexts) { |
359 | 0 | _fn_contexts.emplace_back(fn->clone()); |
360 | 0 | } |
361 | 0 |
|
362 | 0 | _last_result_column_id = other._last_result_column_id; |
363 | 0 | _depth_num = other._depth_num; |
364 | 0 | return *this; |
365 | 0 | } |
366 | | |
367 | 0 | VExprContext& operator=(VExprContext&& other) { |
368 | 0 | _root = other._root; |
369 | 0 | other._root = nullptr; |
370 | 0 | _is_clone = other._is_clone; |
371 | 0 | _prepared = other._prepared; |
372 | 0 | _opened = other._opened; |
373 | 0 | _fn_contexts = std::move(other._fn_contexts); |
374 | 0 | _last_result_column_id = other._last_result_column_id; |
375 | 0 | _depth_num = other._depth_num; |
376 | 0 | return *this; |
377 | 0 | } |
378 | | |
379 | 136 | [[nodiscard]] static size_t get_memory_usage(const VExprContextSPtrs& contexts) { |
380 | 136 | size_t usage = 0; |
381 | 136 | std::for_each(contexts.cbegin(), contexts.cend(), |
382 | 136 | [&usage](auto&& context) { usage += context->_memory_usage; }); |
383 | 136 | return usage; |
384 | 136 | } |
385 | | |
386 | 0 | [[nodiscard]] size_t get_memory_usage() const { return _memory_usage; } |
387 | | |
388 | | void prepare_ann_range_search(const doris::VectorSearchUserParams& params); |
389 | | |
390 | | Status evaluate_ann_range_search( |
391 | | const std::vector<std::unique_ptr<segment_v2::IndexIterator>>& cid_to_index_iterators, |
392 | | const std::vector<ColumnId>& idx_to_cid, |
393 | | const std::vector<std::unique_ptr<segment_v2::ColumnIterator>>& column_iterators, |
394 | | const std::unordered_map<VExprContext*, std::unordered_map<ColumnId, VExpr*>>& |
395 | | common_expr_to_slotref_map, |
396 | | roaring::Roaring& row_bitmap, segment_v2::AnnIndexStats& ann_index_stats, |
397 | | bool enable_result_cache); |
398 | | |
399 | | uint64_t get_digest(uint64_t seed) const; |
400 | | |
401 | | private: |
402 | | // Close method is called in vexpr context dector, not need call expicility |
403 | | void close(); |
404 | | |
405 | | static void _reset_memory_usage(const VExprContextSPtrs& contexts); |
406 | | |
407 | | friend class VExpr; |
408 | | |
409 | | /// The expr tree this context is for. |
410 | | VExprSPtr _root; |
411 | | |
412 | | /// True if this context came from a Clone() call. Used to manage FunctionStateScope. |
413 | | bool _is_clone = false; |
414 | | |
415 | | /// Variables keeping track of current state. |
416 | | bool _prepared = false; |
417 | | bool _opened = false; |
418 | | |
419 | | /// FunctionContexts for each registered expression. The FunctionContexts are created |
420 | | /// and owned by this VExprContext. |
421 | | std::vector<std::unique_ptr<FunctionContext>> _fn_contexts; |
422 | | |
423 | | int _last_result_column_id = -1; |
424 | | |
425 | | /// The depth of expression-tree. |
426 | | int _depth_num = 0; |
427 | | |
428 | | std::shared_ptr<IndexExecContext> _index_context; |
429 | | size_t _memory_usage = 0; |
430 | | |
431 | | segment_v2::AnnRangeSearchRuntime _ann_range_search_runtime; |
432 | | bool _suitable_for_ann_index = true; |
433 | | |
434 | | std::unique_ptr<RuntimeFilterSelectivity> _rf_selectivity = |
435 | | std::make_unique<RuntimeFilterSelectivity>(); |
436 | | }; |
437 | | } // namespace doris |