be/src/storage/tablet/tablet_reader.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/tablet/tablet_reader.h" |
19 | | |
20 | | #include <gen_cpp/olap_file.pb.h> |
21 | | #include <gen_cpp/segment_v2.pb.h> |
22 | | #include <thrift/protocol/TDebugProtocol.h> |
23 | | |
24 | | #include <algorithm> |
25 | | #include <functional> |
26 | | #include <iterator> |
27 | | #include <memory> |
28 | | #include <numeric> |
29 | | #include <ostream> |
30 | | #include <set> |
31 | | #include <shared_mutex> |
32 | | #include <unordered_map> |
33 | | |
34 | | #include "common/compiler_util.h" // IWYU pragma: keep |
35 | | #include "common/config.h" |
36 | | #include "common/exception.h" |
37 | | #include "common/logging.h" |
38 | | #include "common/status.h" |
39 | | #include "core/arena.h" |
40 | | #include "core/block/block.h" |
41 | | #include "exec/common/variant_util.h" |
42 | | #include "exprs/bloom_filter_func.h" |
43 | | #include "exprs/create_predicate_function.h" |
44 | | #include "exprs/hybrid_set.h" |
45 | | #include "runtime/query_context.h" |
46 | | #include "runtime/runtime_predicate.h" |
47 | | #include "runtime/runtime_state.h" |
48 | | #include "storage/delete/delete_handler.h" |
49 | | #include "storage/index/bloom_filter/bloom_filter.h" |
50 | | #include "storage/itoken_extractor.h" |
51 | | #include "storage/olap_common.h" |
52 | | #include "storage/olap_define.h" |
53 | | #include "storage/predicate/block_column_predicate.h" |
54 | | #include "storage/predicate/column_predicate.h" |
55 | | #include "storage/predicate/like_column_predicate.h" |
56 | | #include "storage/predicate/predicate_creator.h" |
57 | | #include "storage/row_cursor.h" |
58 | | #include "storage/schema.h" |
59 | | #include "storage/tablet/tablet.h" |
60 | | #include "storage/tablet/tablet_meta.h" |
61 | | #include "storage/tablet/tablet_schema.h" |
62 | | |
63 | | namespace doris { |
64 | | using namespace ErrorCode; |
65 | | |
66 | 1.26M | void TabletReader::ReaderParams::check_validation() const { |
67 | 1.26M | if (UNLIKELY(version.first == -1 && is_segcompaction == false)) { |
68 | 0 | throw Exception(Status::FatalError("version is not set. tablet={}", tablet->tablet_id())); |
69 | 0 | } |
70 | 1.26M | } |
71 | | |
72 | 1.26M | Status TabletReader::init(const ReaderParams& read_params) { |
73 | 1.26M | Status res = _init_params(read_params); |
74 | 1.26M | if (!res.ok()) { |
75 | 0 | LOG(WARNING) << "fail to init reader when init params. res:" << res |
76 | 0 | << ", tablet_id:" << read_params.tablet->tablet_id() |
77 | 0 | << ", schema_hash:" << read_params.tablet->schema_hash() |
78 | 0 | << ", reader type:" << int(read_params.reader_type) |
79 | 0 | << ", version:" << read_params.version; |
80 | 0 | } |
81 | 1.26M | return res; |
82 | 1.26M | } |
83 | | |
84 | | void TabletReader::remove_delete_columns_from_access_paths( |
85 | | const DeleteHandler& delete_handler, const TabletSchema& tablet_schema, |
86 | 83 | std::map<int32_t, TColumnAccessPaths>& all_access_paths) { |
87 | 83 | auto delete_predicates = AndBlockColumnPredicate::create_shared(); |
88 | 83 | std::unordered_map<int32_t, std::vector<std::shared_ptr<const ColumnPredicate>>> |
89 | 83 | del_predicates_for_zone_map; |
90 | 83 | delete_handler.get_delete_conditions_after_version(0, delete_predicates.get(), |
91 | 83 | &del_predicates_for_zone_map); |
92 | 83 | std::set<ColumnId> delete_column_ids; |
93 | 83 | delete_predicates->get_all_column_ids(delete_column_ids); |
94 | 122 | for (auto cid : delete_column_ids) { |
95 | 122 | all_access_paths.erase(tablet_schema.column(cid).unique_id()); |
96 | 122 | } |
97 | 83 | } |
98 | | |
99 | 1.26M | Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { |
100 | 1.26M | SCOPED_RAW_TIMER(&_stats.tablet_reader_capture_rs_readers_timer_ns); |
101 | 1.26M | if (read_params.rs_splits.empty()) { |
102 | 0 | return Status::InternalError("fail to acquire data sources. tablet={}", |
103 | 0 | _tablet->tablet_id()); |
104 | 0 | } |
105 | | |
106 | 1.26M | bool eof = false; |
107 | 1.26M | bool is_lower_key_included = _keys_param.start_key_include; |
108 | 1.26M | bool is_upper_key_included = _keys_param.end_key_include; |
109 | | |
110 | 2.93M | for (int i = 0; i < _keys_param.start_keys.size(); ++i) { |
111 | | // lower bound |
112 | 1.67M | RowCursor& start_key = _keys_param.start_keys[i]; |
113 | 1.67M | RowCursor& end_key = _keys_param.end_keys[i]; |
114 | | |
115 | 1.67M | if (!is_lower_key_included) { |
116 | 699 | if (compare_row_key(start_key, end_key) >= 0) { |
117 | 0 | VLOG_NOTICE << "return EOF when lower key not include" |
118 | 0 | << ", start_key=" << start_key.to_string() |
119 | 0 | << ", end_key=" << end_key.to_string(); |
120 | 0 | eof = true; |
121 | 0 | break; |
122 | 0 | } |
123 | 1.67M | } else { |
124 | 1.67M | if (compare_row_key(start_key, end_key) > 0) { |
125 | 0 | VLOG_NOTICE << "return EOF when lower key include=" |
126 | 0 | << ", start_key=" << start_key.to_string() |
127 | 0 | << ", end_key=" << end_key.to_string(); |
128 | 0 | eof = true; |
129 | 0 | break; |
130 | 0 | } |
131 | 1.67M | } |
132 | | |
133 | 1.67M | _is_lower_keys_included.push_back(is_lower_key_included); |
134 | 1.67M | _is_upper_keys_included.push_back(is_upper_key_included); |
135 | 1.67M | } |
136 | | |
137 | 1.26M | if (eof) { |
138 | 0 | return Status::EndOfFile("reach end of scan range. tablet={}", _tablet->tablet_id()); |
139 | 0 | } |
140 | | |
141 | 1.26M | bool need_ordered_result = true; |
142 | 1.26M | if (read_params.reader_type == ReaderType::READER_QUERY || |
143 | 1.26M | read_params.reader_type == ReaderType::READER_BINLOG) { |
144 | 1.23M | if (_tablet_schema->keys_type() == DUP_KEYS) { |
145 | | // duplicated keys are allowed, no need to merge sort keys in rowset |
146 | 378k | need_ordered_result = false; |
147 | 378k | } |
148 | 1.23M | if (_tablet_schema->keys_type() == UNIQUE_KEYS && |
149 | 1.23M | _tablet->enable_unique_key_merge_on_write()) { |
150 | | // unique keys with merge on write, no need to merge sort keys in rowset |
151 | 809k | need_ordered_result = false; |
152 | 809k | } |
153 | 1.23M | if (_aggregation) { |
154 | | // compute engine will aggregate rows with the same key, |
155 | | // it's ok for rowset to return unordered result |
156 | 1.19M | need_ordered_result = false; |
157 | 1.19M | } |
158 | | |
159 | 1.23M | if (_direct_mode) { |
160 | | // direct mode indicates that the storage layer does not need to merge, |
161 | | // it's ok for rowset to return unordered result |
162 | 1.22M | need_ordered_result = false; |
163 | 1.22M | } |
164 | | |
165 | 1.23M | if (read_params.read_orderby_key) { |
166 | 905 | need_ordered_result = true; |
167 | 905 | } |
168 | 1.23M | } |
169 | | |
170 | 1.26M | _reader_context.reader_type = read_params.reader_type; |
171 | 1.26M | _reader_context.version = read_params.version; |
172 | 1.26M | _reader_context.tablet_schema = _tablet_schema; |
173 | 1.26M | _reader_context.need_ordered_result = need_ordered_result; |
174 | 1.26M | _reader_context.topn_filter_source_node_ids = read_params.topn_filter_source_node_ids; |
175 | 1.26M | _reader_context.topn_filter_target_node_id = read_params.topn_filter_target_node_id; |
176 | 1.26M | _reader_context.read_orderby_key_reverse = read_params.read_orderby_key_reverse; |
177 | 1.26M | _reader_context.use_insert_order_when_same = |
178 | 1.26M | read_params.use_insert_order_when_same || |
179 | 1.26M | read_params.reader_type == ReaderType::READER_BINLOG || |
180 | 1.26M | read_params.reader_type == ReaderType::READER_BINLOG_COMPACTION; |
181 | 1.26M | _reader_context.force_key_ordered_read = read_params.force_key_ordered_read; |
182 | 1.26M | _reader_context.read_orderby_key_limit = read_params.read_orderby_key_limit; |
183 | 1.26M | _reader_context.return_columns = &_return_columns; |
184 | 1.26M | _reader_context.read_orderby_key_columns = |
185 | 1.26M | !_orderby_key_columns.empty() ? &_orderby_key_columns : nullptr; |
186 | 1.26M | _reader_context.predicates = &_col_predicates; |
187 | 1.26M | _reader_context.value_predicates = &_value_col_predicates; |
188 | 1.26M | _reader_context.lower_bound_keys = &_keys_param.start_keys; |
189 | 1.26M | _reader_context.is_lower_keys_included = &_is_lower_keys_included; |
190 | 1.26M | _reader_context.upper_bound_keys = &_keys_param.end_keys; |
191 | 1.26M | _reader_context.is_upper_keys_included = &_is_upper_keys_included; |
192 | 1.26M | _reader_context.delete_handler = &_delete_handler; |
193 | 1.26M | _reader_context.stats = &_stats; |
194 | 1.26M | _reader_context.use_page_cache = read_params.use_page_cache; |
195 | 1.26M | _reader_context.sequence_id_idx = _sequence_col_idx; |
196 | 1.26M | _reader_context.is_unique = tablet()->keys_type() == UNIQUE_KEYS; |
197 | 1.26M | _reader_context.merged_rows = &_merged_rows; |
198 | 1.26M | _reader_context.delete_bitmap = read_params.delete_bitmap; |
199 | 1.26M | _reader_context.enable_unique_key_merge_on_write = tablet()->enable_unique_key_merge_on_write(); |
200 | 1.26M | _reader_context.enable_mor_value_predicate_pushdown = |
201 | 1.26M | read_params.enable_mor_value_predicate_pushdown; |
202 | 1.26M | _reader_context.record_rowids = read_params.record_rowids; |
203 | 1.26M | _reader_context.rowid_conversion = read_params.rowid_conversion; |
204 | 1.26M | _reader_context.is_key_column_group = read_params.is_key_column_group; |
205 | 1.26M | _reader_context.common_expr_ctxs_push_down = read_params.common_expr_ctxs_push_down; |
206 | 1.26M | _reader_context.output_columns = &read_params.output_columns; |
207 | 1.26M | _reader_context.push_down_agg_type_opt = read_params.push_down_agg_type_opt; |
208 | 1.26M | _reader_context.ttl_seconds = _tablet->ttl_seconds(); |
209 | 1.26M | _reader_context.score_runtime = read_params.score_runtime; |
210 | 1.26M | _reader_context.collection_statistics = read_params.collection_statistics; |
211 | | |
212 | 1.26M | _reader_context.virtual_column_exprs = read_params.virtual_column_exprs; |
213 | 1.26M | _reader_context.vir_cid_to_idx_in_block = read_params.vir_cid_to_idx_in_block; |
214 | 1.26M | _reader_context.vir_col_idx_to_type = read_params.vir_col_idx_to_type; |
215 | 1.26M | _reader_context.ann_topn_runtime = read_params.ann_topn_runtime; |
216 | | |
217 | 1.26M | _reader_context.condition_cache_digest = read_params.condition_cache_digest; |
218 | 1.26M | _reader_context.all_access_paths = read_params.all_access_paths; |
219 | 1.26M | _reader_context.predicate_access_paths = read_params.predicate_access_paths; |
220 | | |
221 | | // Force a full read of delete-condition columns: the FE can't see storage deletes and may |
222 | | // mark them meta-only (OFFSET/NULL), whose content-less read makes the delete predicate |
223 | | // match nothing and leak deleted rows. |
224 | 1.26M | if (!_delete_handler.empty() && !_reader_context.all_access_paths.empty()) { |
225 | 82 | remove_delete_columns_from_access_paths(_delete_handler, *_tablet_schema, |
226 | 82 | _reader_context.all_access_paths); |
227 | 82 | } |
228 | | |
229 | | // Propagate general read limit for DUP_KEYS and UNIQUE_KEYS with MOW |
230 | 1.26M | _reader_context.general_read_limit = read_params.general_read_limit; |
231 | | |
232 | | // Multi-stage predicate lazy materialization (experimental) |
233 | 1.26M | _reader_context.enable_multi_stage_predicate_lazy_materialization = |
234 | 1.26M | read_params.enable_multi_stage_predicate_lazy_materialization; |
235 | 1.26M | _reader_context.predicate_lm_stage1_column_ids = read_params.predicate_lm_stage1_column_ids; |
236 | 1.26M | _reader_context.predicate_lm_stage1_survival_ratio_threshold = |
237 | 1.26M | read_params.predicate_lm_stage1_survival_ratio_threshold; |
238 | | |
239 | | // Preserve the original requested output layout so BlockReader can map expanded storage |
240 | | // columns (for non-direct AGG/UNIQUE paths) back to the final output block. |
241 | 1.26M | _reader_context.origin_return_columns = read_params.origin_return_columns; |
242 | | |
243 | 1.26M | return Status::OK(); |
244 | 1.26M | } |
245 | | |
246 | 514 | TabletColumn TabletReader::materialize_column(const TabletColumn& orig) { |
247 | 514 | if (!orig.is_variant_type()) { |
248 | 494 | return orig; |
249 | 494 | } |
250 | 20 | TabletColumn column_with_cast_type = orig; |
251 | 20 | auto cast_type = _reader_context.target_cast_type_for_variants.at(orig.name()); |
252 | 20 | return variant_util::get_column_by_type(cast_type, orig.name(), |
253 | 20 | { |
254 | 20 | .unique_id = orig.unique_id(), |
255 | 20 | .parent_unique_id = orig.parent_unique_id(), |
256 | 20 | .path_info = *orig.path_info_ptr(), |
257 | 20 | }); |
258 | 514 | } |
259 | | |
260 | 1.26M | Status TabletReader::_init_params(const ReaderParams& read_params) { |
261 | 1.26M | read_params.check_validation(); |
262 | | |
263 | 1.26M | _direct_mode = read_params.direct_mode; |
264 | 1.26M | _aggregation = read_params.aggregation; |
265 | 1.26M | _reader_type = read_params.reader_type; |
266 | 1.26M | _tablet = read_params.tablet; |
267 | 1.26M | _tablet_schema = read_params.tablet_schema; |
268 | 1.26M | _reader_context.runtime_state = read_params.runtime_state; |
269 | 1.26M | _reader_context.target_cast_type_for_variants = read_params.target_cast_type_for_variants; |
270 | | |
271 | 1.26M | RETURN_IF_ERROR(_init_conditions_param(read_params)); |
272 | | |
273 | 1.26M | Status res = _init_delete_condition(read_params); |
274 | 1.26M | if (!res.ok()) { |
275 | 0 | LOG(WARNING) << "fail to init delete param. res = " << res; |
276 | 0 | return res; |
277 | 0 | } |
278 | | |
279 | 1.26M | res = _init_return_columns(read_params); |
280 | 1.26M | if (!res.ok()) { |
281 | 0 | LOG(WARNING) << "fail to init return columns. res = " << res; |
282 | 0 | return res; |
283 | 0 | } |
284 | | |
285 | 1.26M | res = _init_keys_param(read_params); |
286 | 1.26M | if (!res.ok()) { |
287 | 0 | LOG(WARNING) << "fail to init keys param. res=" << res; |
288 | 0 | return res; |
289 | 0 | } |
290 | 1.26M | res = _init_orderby_keys_param(read_params); |
291 | 1.26M | if (!res.ok()) { |
292 | 0 | LOG(WARNING) << "fail to init orderby keys param. res=" << res; |
293 | 0 | return res; |
294 | 0 | } |
295 | 1.26M | if (_tablet_schema->has_sequence_col()) { |
296 | 4.50k | auto sequence_col_idx = _tablet_schema->sequence_col_idx(); |
297 | 4.50k | DCHECK_NE(sequence_col_idx, -1); |
298 | 17.2k | for (auto col : _return_columns) { |
299 | | // query has sequence col |
300 | 17.2k | if (col == sequence_col_idx) { |
301 | 386 | _sequence_col_idx = sequence_col_idx; |
302 | 386 | break; |
303 | 386 | } |
304 | 17.2k | } |
305 | 4.50k | } |
306 | | |
307 | 1.26M | return res; |
308 | 1.26M | } |
309 | | |
310 | 1.26M | Status TabletReader::_init_return_columns(const ReaderParams& read_params) { |
311 | 1.26M | SCOPED_RAW_TIMER(&_stats.tablet_reader_init_return_columns_timer_ns); |
312 | 1.26M | if (read_params.reader_type == ReaderType::READER_QUERY || |
313 | 1.26M | read_params.reader_type == ReaderType::READER_BINLOG) { |
314 | 1.23M | _return_columns = read_params.return_columns; |
315 | 1.23M | _tablet_columns_convert_to_null_set = read_params.tablet_columns_convert_to_null_set; |
316 | 13.0M | for (auto id : read_params.return_columns) { |
317 | 13.0M | if (_tablet_schema->column(id).is_key()) { |
318 | 5.65M | _key_cids.push_back(id); |
319 | 7.38M | } else { |
320 | 7.38M | _value_cids.push_back(id); |
321 | 7.38M | } |
322 | 13.0M | } |
323 | 1.23M | } else if (read_params.return_columns.empty()) { |
324 | 0 | for (uint32_t i = 0; i < _tablet_schema->num_columns(); ++i) { |
325 | 0 | _return_columns.push_back(i); |
326 | 0 | if (_tablet_schema->column(i).is_key()) { |
327 | 0 | _key_cids.push_back(i); |
328 | 0 | } else { |
329 | 0 | _value_cids.push_back(i); |
330 | 0 | } |
331 | 0 | } |
332 | 0 | VLOG_NOTICE << "return column is empty, using full column as default."; |
333 | 28.9k | } else if ((read_params.reader_type == ReaderType::READER_CUMULATIVE_COMPACTION || |
334 | 28.9k | read_params.reader_type == ReaderType::READER_SEGMENT_COMPACTION || |
335 | 28.9k | read_params.reader_type == ReaderType::READER_BASE_COMPACTION || |
336 | 28.9k | read_params.reader_type == ReaderType::READER_FULL_COMPACTION || |
337 | 28.9k | read_params.reader_type == ReaderType::READER_BINLOG_COMPACTION || |
338 | 28.9k | read_params.reader_type == ReaderType::READER_COLD_DATA_COMPACTION || |
339 | 28.9k | read_params.reader_type == ReaderType::READER_ALTER_TABLE) && |
340 | 31.0k | !read_params.return_columns.empty()) { |
341 | 31.0k | _return_columns = read_params.return_columns; |
342 | 121k | for (auto id : read_params.return_columns) { |
343 | 121k | if (_tablet_schema->column(id).is_key()) { |
344 | 42.9k | _key_cids.push_back(id); |
345 | 78.6k | } else { |
346 | 78.6k | _value_cids.push_back(id); |
347 | 78.6k | } |
348 | 121k | } |
349 | 18.4E | } else if (read_params.reader_type == ReaderType::READER_CHECKSUM) { |
350 | 0 | _return_columns = read_params.return_columns; |
351 | 0 | for (auto id : read_params.return_columns) { |
352 | 0 | if (_tablet_schema->column(id).is_key()) { |
353 | 0 | _key_cids.push_back(id); |
354 | 0 | } else { |
355 | 0 | _value_cids.push_back(id); |
356 | 0 | } |
357 | 0 | } |
358 | 18.4E | } else { |
359 | 18.4E | return Status::Error<INVALID_ARGUMENT>( |
360 | 18.4E | "fail to init return columns. reader_type={}, return_columns_size={}", |
361 | 18.4E | int(read_params.reader_type), read_params.return_columns.size()); |
362 | 18.4E | } |
363 | | |
364 | 1.26M | std::sort(_key_cids.begin(), _key_cids.end(), std::greater<>()); |
365 | | |
366 | 1.26M | return Status::OK(); |
367 | 1.26M | } |
368 | | |
369 | 1.26M | Status TabletReader::_init_keys_param(const ReaderParams& read_params) { |
370 | 1.26M | SCOPED_RAW_TIMER(&_stats.tablet_reader_init_keys_param_timer_ns); |
371 | 1.26M | if (read_params.start_key.empty()) { |
372 | 244k | return Status::OK(); |
373 | 244k | } |
374 | | |
375 | 1.01M | _keys_param.start_key_include = read_params.start_key_include; |
376 | 1.01M | _keys_param.end_key_include = read_params.end_key_include; |
377 | | |
378 | 1.01M | size_t start_key_size = read_params.start_key.size(); |
379 | | //_keys_param.start_keys.resize(start_key_size); |
380 | 1.01M | std::vector<RowCursor>(start_key_size).swap(_keys_param.start_keys); |
381 | | |
382 | 1.01M | size_t scan_key_size = read_params.start_key.front().size(); |
383 | 1.01M | if (scan_key_size > _tablet_schema->num_columns()) { |
384 | 0 | return Status::Error<INVALID_ARGUMENT>( |
385 | 0 | "Input param are invalid. Column count is bigger than num_columns of schema. " |
386 | 0 | "column_count={}, schema.num_columns={}", |
387 | 0 | scan_key_size, _tablet_schema->num_columns()); |
388 | 0 | } |
389 | | |
390 | 2.69M | for (size_t i = 0; i < start_key_size; ++i) { |
391 | 1.67M | if (read_params.start_key[i].size() != scan_key_size) { |
392 | 0 | return Status::Error<INVALID_ARGUMENT>( |
393 | 0 | "The start_key.at({}).size={}, not equals the scan_key_size={}", i, |
394 | 0 | read_params.start_key[i].size(), scan_key_size); |
395 | 0 | } |
396 | | |
397 | 1.67M | Status res = _keys_param.start_keys[i].init(_tablet_schema, read_params.start_key[i]); |
398 | 1.67M | if (!res.ok()) { |
399 | 0 | LOG(WARNING) << "fail to init row cursor. res = " << res; |
400 | 0 | return res; |
401 | 0 | } |
402 | 1.67M | } |
403 | | |
404 | 1.01M | size_t end_key_size = read_params.end_key.size(); |
405 | | //_keys_param.end_keys.resize(end_key_size); |
406 | 1.01M | std::vector<RowCursor>(end_key_size).swap(_keys_param.end_keys); |
407 | 2.69M | for (size_t i = 0; i < end_key_size; ++i) { |
408 | 1.67M | if (read_params.end_key[i].size() != scan_key_size) { |
409 | 0 | return Status::Error<INVALID_ARGUMENT>( |
410 | 0 | "The end_key.at({}).size={}, not equals the scan_key_size={}", i, |
411 | 0 | read_params.end_key[i].size(), scan_key_size); |
412 | 0 | } |
413 | | |
414 | 1.67M | Status res = _keys_param.end_keys[i].init(_tablet_schema, read_params.end_key[i]); |
415 | 1.67M | if (!res.ok()) { |
416 | 0 | LOG(WARNING) << "fail to init row cursor. res = " << res; |
417 | 0 | return res; |
418 | 0 | } |
419 | 1.67M | } |
420 | | |
421 | | //TODO:check the valid of start_key and end_key.(eg. start_key <= end_key) |
422 | | |
423 | 1.01M | return Status::OK(); |
424 | 1.01M | } |
425 | | |
426 | 1.26M | Status TabletReader::_init_orderby_keys_param(const ReaderParams& read_params) { |
427 | 1.26M | SCOPED_RAW_TIMER(&_stats.tablet_reader_init_orderby_keys_param_timer_ns); |
428 | | // UNIQUE_KEYS will compare all keys as before |
429 | 1.26M | if (_tablet_schema->keys_type() == DUP_KEYS || (_tablet_schema->keys_type() == UNIQUE_KEYS && |
430 | 1.21M | _tablet->enable_unique_key_merge_on_write())) { |
431 | 1.21M | if (!_tablet_schema->cluster_key_uids().empty()) { |
432 | 5.89k | if (read_params.read_orderby_key_num_prefix_columns > |
433 | 5.89k | _tablet_schema->cluster_key_uids().size()) { |
434 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
435 | 0 | "read_orderby_key_num_prefix_columns={} > cluster_keys.size()={}", |
436 | 0 | read_params.read_orderby_key_num_prefix_columns, |
437 | 0 | _tablet_schema->cluster_key_uids().size()); |
438 | 0 | } |
439 | 5.89k | for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { |
440 | 0 | auto cid = _tablet_schema->cluster_key_uids()[i]; |
441 | 0 | auto index = _tablet_schema->field_index(cid); |
442 | 0 | if (index < 0) { |
443 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
444 | 0 | "could not find cluster key column with unique_id=" + |
445 | 0 | std::to_string(cid) + |
446 | 0 | " in tablet schema, tablet_id=" + std::to_string(_tablet->tablet_id())); |
447 | 0 | } |
448 | 0 | for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { |
449 | 0 | if (_return_columns[idx] == index) { |
450 | 0 | _orderby_key_columns.push_back(idx); |
451 | 0 | break; |
452 | 0 | } |
453 | 0 | } |
454 | 0 | } |
455 | 1.21M | } else { |
456 | | // find index in vector _return_columns |
457 | | // for the read_orderby_key_num_prefix_columns orderby keys |
458 | 1.21M | for (uint32_t i = 0; i < read_params.read_orderby_key_num_prefix_columns; i++) { |
459 | 1.38k | for (uint32_t idx = 0; idx < _return_columns.size(); idx++) { |
460 | 1.38k | if (_return_columns[idx] == i) { |
461 | 1.10k | _orderby_key_columns.push_back(idx); |
462 | 1.10k | break; |
463 | 1.10k | } |
464 | 1.38k | } |
465 | 1.10k | } |
466 | 1.21M | } |
467 | 1.21M | if (read_params.read_orderby_key_num_prefix_columns != _orderby_key_columns.size()) { |
468 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
469 | 0 | "read_orderby_key_num_prefix_columns != _orderby_key_columns.size, " |
470 | 0 | "read_params.read_orderby_key_num_prefix_columns={}, " |
471 | 0 | "_orderby_key_columns.size()={}", |
472 | 0 | read_params.read_orderby_key_num_prefix_columns, _orderby_key_columns.size()); |
473 | 0 | } |
474 | 1.21M | } |
475 | | |
476 | 1.26M | return Status::OK(); |
477 | 1.26M | } |
478 | | |
479 | 1.26M | Status TabletReader::_init_conditions_param(const ReaderParams& read_params) { |
480 | 1.26M | SCOPED_RAW_TIMER(&_stats.tablet_reader_init_conditions_param_timer_ns); |
481 | 1.26M | std::vector<std::shared_ptr<ColumnPredicate>> predicates; |
482 | 1.26M | std::copy(read_params.predicates.cbegin(), read_params.predicates.cend(), |
483 | 1.26M | std::inserter(predicates, predicates.begin())); |
484 | | // Function filter push down to storage engine |
485 | 1.26M | auto is_like_predicate = [](std::shared_ptr<ColumnPredicate> _pred) { |
486 | 512 | return dynamic_cast<LikeColumnPredicate*>(_pred.get()) != nullptr; |
487 | 512 | }; |
488 | | |
489 | 1.26M | for (const auto& filter : read_params.function_filters) { |
490 | 513 | predicates.emplace_back(_parse_to_predicate(filter)); |
491 | 513 | auto pred = predicates.back(); |
492 | | |
493 | 513 | const auto& col = _tablet_schema->column(pred->column_id()); |
494 | 513 | const auto* tablet_index = _tablet_schema->get_ngram_bf_index(col.unique_id()); |
495 | 513 | if (is_like_predicate(pred) && tablet_index && config::enable_query_like_bloom_filter) { |
496 | 16 | std::unique_ptr<segment_v2::BloomFilter> ng_bf; |
497 | 16 | std::string pattern = pred->get_search_str(); |
498 | 16 | auto gram_bf_size = tablet_index->get_gram_bf_size(); |
499 | 16 | auto gram_size = tablet_index->get_gram_size(); |
500 | | |
501 | 16 | RETURN_IF_ERROR(segment_v2::BloomFilter::create(segment_v2::NGRAM_BLOOM_FILTER, &ng_bf, |
502 | 16 | gram_bf_size)); |
503 | 16 | NgramTokenExtractor _token_extractor(gram_size); |
504 | | |
505 | 16 | if (_token_extractor.string_like_to_bloom_filter(pattern.data(), pattern.length(), |
506 | 16 | *ng_bf)) { |
507 | 16 | pred->set_page_ng_bf(std::move(ng_bf)); |
508 | 16 | } |
509 | 16 | } |
510 | 513 | } |
511 | | |
512 | 1.26M | int32_t delete_sign_idx = _tablet_schema->delete_sign_idx(); |
513 | 1.26M | for (auto predicate : predicates) { |
514 | 1.00M | auto column = _tablet_schema->column(predicate->column_id()); |
515 | 1.00M | if (column.aggregation() != FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE) { |
516 | | // When MOR value predicate pushdown is enabled, drop __DORIS_DELETE_SIGN__ |
517 | | // from storage-layer predicates entirely. Delete sign must only be evaluated |
518 | | // post-merge via VExpr to prevent deleted rows from reappearing. |
519 | 2.97k | if (read_params.enable_mor_value_predicate_pushdown && delete_sign_idx >= 0 && |
520 | 2.97k | predicate->column_id() == static_cast<uint32_t>(delete_sign_idx)) { |
521 | 25 | continue; |
522 | 25 | } |
523 | 2.95k | _value_col_predicates.push_back(predicate); |
524 | 997k | } else { |
525 | 997k | _col_predicates.push_back(predicate); |
526 | 997k | } |
527 | 1.00M | } |
528 | | |
529 | 1.26M | return Status::OK(); |
530 | 1.26M | } |
531 | | |
532 | | std::shared_ptr<ColumnPredicate> TabletReader::_parse_to_predicate( |
533 | 513 | const FunctionFilter& function_filter) { |
534 | 513 | int32_t index = _tablet_schema->field_index(function_filter._col_name); |
535 | 513 | if (index < 0) { |
536 | 0 | throw Exception(Status::InternalError("Column {} not found in tablet schema", |
537 | 0 | function_filter._col_name)); |
538 | 0 | return nullptr; |
539 | 0 | } |
540 | 513 | const TabletColumn& column = materialize_column(_tablet_schema->column(index)); |
541 | 513 | return create_column_predicate(index, std::make_shared<FunctionFilter>(function_filter), |
542 | 513 | column.type(), &column); |
543 | 513 | } |
544 | | |
545 | 1.26M | Status TabletReader::_init_delete_condition(const ReaderParams& read_params) { |
546 | 1.26M | SCOPED_RAW_TIMER(&_stats.tablet_reader_init_delete_condition_param_timer_ns); |
547 | | // If it's cumu and not allow do delete when cumu |
548 | 1.26M | if (read_params.reader_type == ReaderType::READER_SEGMENT_COMPACTION || |
549 | 1.26M | (read_params.reader_type == ReaderType::READER_CUMULATIVE_COMPACTION && |
550 | 1.26M | !config::enable_delete_when_cumu_compaction)) { |
551 | 28.8k | return Status::OK(); |
552 | 28.8k | } |
553 | 1.23M | bool cumu_delete = read_params.reader_type == ReaderType::READER_CUMULATIVE_COMPACTION && |
554 | 1.23M | config::enable_delete_when_cumu_compaction; |
555 | | // Delete sign could not be applied when delete on cumu compaction is enabled, bucause it is meant for delete with predicates. |
556 | | // If delete design is applied on cumu compaction, it will lose effect when doing base compaction. |
557 | | // `_delete_sign_available` indicates the condition where we could apply delete signs to data. |
558 | 1.23M | _delete_sign_available = (((read_params.reader_type == ReaderType::READER_BASE_COMPACTION || |
559 | 1.23M | read_params.reader_type == ReaderType::READER_FULL_COMPACTION) && |
560 | 1.23M | config::enable_prune_delete_sign_when_base_compaction) || |
561 | 1.23M | read_params.reader_type == ReaderType::READER_COLD_DATA_COMPACTION || |
562 | 1.23M | read_params.reader_type == ReaderType::READER_CHECKSUM); |
563 | | |
564 | | // `_filter_delete` indicates the condition where we should execlude deleted tuples when reading data. |
565 | | // However, queries will not use this condition but generate special where predicates to filter data. |
566 | | // (Though a lille bit confused, it is how the current logic working...) |
567 | 1.23M | _filter_delete = _delete_sign_available || cumu_delete; |
568 | 1.23M | return _delete_handler.init(_tablet_schema, read_params.delete_predicates, |
569 | 1.23M | read_params.version.second); |
570 | 1.26M | } |
571 | | |
572 | | Status TabletReader::init_reader_params_and_create_block( |
573 | | TabletSharedPtr tablet, ReaderType reader_type, |
574 | | const std::vector<RowsetSharedPtr>& input_rowsets, |
575 | 0 | TabletReader::ReaderParams* reader_params, Block* block) { |
576 | 0 | reader_params->tablet = tablet; |
577 | 0 | reader_params->reader_type = reader_type; |
578 | 0 | reader_params->version = |
579 | 0 | Version(input_rowsets.front()->start_version(), input_rowsets.back()->end_version()); |
580 | |
|
581 | 0 | TabletReadSource read_source; |
582 | 0 | for (const auto& rowset : input_rowsets) { |
583 | 0 | RowsetReaderSharedPtr rs_reader; |
584 | 0 | RETURN_IF_ERROR(rowset->create_reader(&rs_reader)); |
585 | 0 | read_source.rs_splits.emplace_back(std::move(rs_reader)); |
586 | 0 | } |
587 | 0 | read_source.fill_delete_predicates(); |
588 | 0 | reader_params->set_read_source(std::move(read_source)); |
589 | |
|
590 | 0 | std::vector<RowsetMetaSharedPtr> rowset_metas(input_rowsets.size()); |
591 | 0 | std::transform(input_rowsets.begin(), input_rowsets.end(), rowset_metas.begin(), |
592 | 0 | [](const RowsetSharedPtr& rowset) { return rowset->rowset_meta(); }); |
593 | 0 | TabletSchemaSPtr read_tablet_schema = |
594 | 0 | tablet->tablet_schema_with_merged_max_schema_version(rowset_metas); |
595 | 0 | TabletSchemaSPtr merge_tablet_schema = std::make_shared<TabletSchema>(); |
596 | 0 | merge_tablet_schema->copy_from(*read_tablet_schema); |
597 | | |
598 | | // Merge the columns in delete predicate that not in latest schema in to current tablet schema |
599 | 0 | for (auto& del_pred : reader_params->delete_predicates) { |
600 | 0 | merge_tablet_schema->merge_dropped_columns(*del_pred->tablet_schema()); |
601 | 0 | } |
602 | 0 | reader_params->tablet_schema = merge_tablet_schema; |
603 | |
|
604 | 0 | reader_params->return_columns.resize(read_tablet_schema->num_columns()); |
605 | 0 | std::iota(reader_params->return_columns.begin(), reader_params->return_columns.end(), 0); |
606 | 0 | reader_params->origin_return_columns = &reader_params->return_columns; |
607 | |
|
608 | 0 | *block = read_tablet_schema->create_block(); |
609 | |
|
610 | 0 | return Status::OK(); |
611 | 0 | } |
612 | | |
613 | | } // namespace doris |