be/src/format/parquet/vparquet_group_reader.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | #pragma once |
18 | | #include <stddef.h> |
19 | | #include <stdint.h> |
20 | | |
21 | | #include <limits> |
22 | | #include <memory> |
23 | | #include <string> |
24 | | #include <tuple> |
25 | | #include <unordered_map> |
26 | | #include <utility> |
27 | | #include <vector> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "core/column/column.h" |
31 | | #include "exprs/vexpr_fwd.h" |
32 | | #include "format/parquet/parquet_common.h" |
33 | | #include "format/parquet/vparquet_column_reader.h" |
34 | | #include "format/table/table_format_reader.h" |
35 | | #include "io/fs/file_reader_writer_fwd.h" |
36 | | #include "storage/id_manager.h" |
37 | | #include "storage/utils.h" |
38 | | |
39 | | namespace cctz { |
40 | | class time_zone; |
41 | | } // namespace cctz |
42 | | namespace doris { |
43 | | class ObjectPool; |
44 | | class RowDescriptor; |
45 | | class RuntimeState; |
46 | | class SlotDescriptor; |
47 | | class TupleDescriptor; |
48 | | |
49 | | namespace io { |
50 | | struct IOContext; |
51 | | } // namespace io |
52 | | class Block; |
53 | | class FieldDescriptor; |
54 | | } // namespace doris |
55 | | namespace tparquet { |
56 | | class ColumnMetaData; |
57 | | class OffsetIndex; |
58 | | class RowGroup; |
59 | | } // namespace tparquet |
60 | | |
61 | | namespace doris::segment_v2 { |
62 | | class RowIdColumnIteratorV2; |
63 | | } |
64 | | |
65 | | namespace doris { |
66 | | #include "common/compile_check_begin.h" |
67 | | // TODO: we need to determine it by test. |
68 | | |
69 | | class RowGroupReader : public ProfileCollector { |
70 | | public: |
71 | | struct IcebergRowIdParams { |
72 | | bool enabled = false; |
73 | | std::string file_path; |
74 | | int32_t partition_spec_id = 0; |
75 | | std::string partition_data_json; |
76 | | int row_id_column_pos = -1; |
77 | | }; |
78 | | std::shared_ptr<TableSchemaChangeHelper::Node> _table_info_node_ptr; |
79 | | static const std::vector<int64_t> NO_DELETE; |
80 | | |
81 | | struct RowGroupIndex { |
82 | | int32_t row_group_id; |
83 | | int64_t first_row; |
84 | | int64_t last_row; |
85 | | RowGroupIndex(int32_t id, int64_t first, int64_t last) |
86 | 188 | : row_group_id(id), first_row(first), last_row(last) {} |
87 | | }; |
88 | | |
89 | | // table name |
90 | | struct LazyReadContext { |
91 | | // all conjuncts: in sql, join runtime filter, topn runtime filter. |
92 | | VExprContextSPtrs conjuncts; |
93 | | |
94 | | // ParquetReader::set_fill_columns(xxx, xxx) will set these two members |
95 | | std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>> |
96 | | fill_partition_columns; |
97 | | std::unordered_map<std::string, VExprContextSPtr> fill_missing_columns; |
98 | | |
99 | | phmap::flat_hash_map<int, std::vector<std::shared_ptr<ColumnPredicate>>> |
100 | | slot_id_to_predicates; |
101 | | bool can_lazy_read = false; |
102 | | // block->rows() returns the number of rows of the first column, |
103 | | // so we should check and resize the first column |
104 | | bool resize_first_column = true; |
105 | | std::vector<std::string> all_read_columns; |
106 | | // include predicate_partition_columns & predicate_missing_columns |
107 | | std::vector<uint32_t> all_predicate_col_ids; |
108 | | // save slot_id to find dict filter column name, because expr column name may |
109 | | // be different with parquet column name |
110 | | // std::pair<std::vector<col_name>, std::vector<slot_id>> |
111 | | std::pair<std::vector<std::string>, std::vector<int>> predicate_columns; |
112 | | std::vector<std::string> lazy_read_columns; |
113 | | std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>> |
114 | | predicate_partition_columns; |
115 | | // lazy read partition columns or all partition columns |
116 | | std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>> |
117 | | partition_columns; |
118 | | std::unordered_map<std::string, VExprContextSPtr> predicate_missing_columns; |
119 | | VExprContextSPtrs missing_columns_conjuncts; |
120 | | // lazy read missing columns or all missing columns |
121 | | std::unordered_map<std::string, VExprContextSPtr> missing_columns; |
122 | | // should turn off filtering by page index, lazy read and dict filter if having complex type |
123 | | bool has_complex_type = false; |
124 | | }; |
125 | | |
126 | | /** |
127 | | * Support row-level delete in iceberg: |
128 | | * https://iceberg.apache.org/spec/#position-delete-files |
129 | | */ |
130 | | struct PositionDeleteContext { |
131 | | // the filtered rows in current row group |
132 | | const std::vector<int64_t>& delete_rows; |
133 | | // the first row id of current row group in parquet file |
134 | | const int64_t first_row_id; |
135 | | // the number of rows in current row group |
136 | | const int64_t num_rows; |
137 | | const int64_t last_row_id; |
138 | | // current row id to read in the row group |
139 | | int64_t current_row_id; |
140 | | // start index in delete_rows |
141 | | const int64_t start_index; |
142 | | // end index in delete_rows |
143 | | const int64_t end_index; |
144 | | // current index in delete_rows |
145 | | int64_t index; |
146 | | const bool has_filter; |
147 | | |
148 | | PositionDeleteContext(const std::vector<int64_t>& delete_rows, const int64_t num_rows, |
149 | | const int64_t first_row_id, const int64_t start_index, |
150 | | const int64_t end_index) |
151 | 37 | : delete_rows(delete_rows), |
152 | 37 | first_row_id(first_row_id), |
153 | 37 | num_rows(num_rows), |
154 | 37 | last_row_id(first_row_id + num_rows), |
155 | 37 | current_row_id(first_row_id), |
156 | 37 | start_index(start_index), |
157 | 37 | end_index(end_index), |
158 | 37 | index(start_index), |
159 | 37 | has_filter(end_index > start_index) {} |
160 | | |
161 | | PositionDeleteContext(const int64_t num_rows, const int64_t first_row) |
162 | 37 | : PositionDeleteContext(NO_DELETE, num_rows, first_row, 0, 0) {} |
163 | | |
164 | | PositionDeleteContext(const PositionDeleteContext& filter) = default; |
165 | | }; |
166 | | |
167 | | RowGroupReader(io::FileReaderSPtr file_reader, const std::vector<std::string>& read_columns, |
168 | | const int32_t row_group_id, const tparquet::RowGroup& row_group, |
169 | | const cctz::time_zone* ctz, io::IOContext* io_ctx, |
170 | | const PositionDeleteContext& position_delete_ctx, |
171 | | const LazyReadContext& lazy_read_ctx, RuntimeState* state, |
172 | | const std::set<uint64_t>& column_ids, |
173 | | const std::set<uint64_t>& filter_column_ids); |
174 | | |
175 | | ~RowGroupReader(); |
176 | | Status init(const FieldDescriptor& schema, RowRanges& row_ranges, |
177 | | std::unordered_map<int, tparquet::OffsetIndex>& col_offsets, |
178 | | const TupleDescriptor* tuple_descriptor, const RowDescriptor* row_descriptor, |
179 | | const std::unordered_map<std::string, int>* colname_to_slot_id, |
180 | | const VExprContextSPtrs* not_single_slot_filter_conjuncts, |
181 | | const std::unordered_map<int, VExprContextSPtrs>* slot_id_to_filter_conjuncts); |
182 | | Status next_batch(Block* block, size_t batch_size, size_t* read_rows, bool* batch_eof); |
183 | 37 | int64_t lazy_read_filtered_rows() const { return _lazy_read_filtered_rows; } |
184 | 37 | int64_t predicate_filter_time() const { return _predicate_filter_time; } |
185 | 37 | int64_t dict_filter_rewrite_time() const { return _dict_filter_rewrite_time; } |
186 | 14 | int64_t condition_cache_filtered_rows() const { return _condition_cache_filtered_rows; } |
187 | | |
188 | | ParquetColumnReader::ColumnStatistics merged_column_statistics(); |
189 | 0 | void set_remaining_rows(int64_t rows) { _remaining_rows = rows; } |
190 | 0 | int64_t get_remaining_rows() { return _remaining_rows; } |
191 | | |
192 | | // Filters read_ranges by removing row chunks whose condition cache granules are all-false. |
193 | | // Pure algorithm, exposed as static for testability. |
194 | | static RowRanges filter_ranges_by_cache(const RowRanges& read_ranges, |
195 | | const std::vector<bool>& cache, int64_t first_row, |
196 | | int64_t base_granule = 0); |
197 | | |
198 | | void set_row_id_column_iterator( |
199 | | const std::pair<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>, int>& |
200 | 37 | iterator_pair) { |
201 | 37 | _row_id_column_iterator_pair = iterator_pair; |
202 | 37 | } |
203 | | |
204 | 0 | void set_iceberg_rowid_params(const IcebergRowIdParams& params) { |
205 | 0 | _iceberg_rowid_params = params; |
206 | 0 | } |
207 | | |
208 | 37 | void set_current_row_group_idx(RowGroupIndex row_group_idx) { |
209 | 37 | _current_row_group_idx = row_group_idx; |
210 | 37 | } |
211 | | |
212 | | void set_col_name_to_block_idx( |
213 | 37 | std::unordered_map<std::string, uint32_t>* col_name_to_block_idx) { |
214 | 37 | _col_name_to_block_idx = col_name_to_block_idx; |
215 | 37 | } |
216 | | |
217 | 0 | void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) { |
218 | 0 | _condition_cache_ctx = std::move(ctx); |
219 | 0 | } |
220 | | |
221 | | protected: |
222 | 15 | void _collect_profile_before_close() override { |
223 | 15 | if (_file_reader != nullptr) { |
224 | 15 | _file_reader->collect_profile_before_close(); |
225 | 15 | } |
226 | 15 | } |
227 | | |
228 | | private: |
229 | | Status _read_empty_batch(size_t batch_size, size_t* read_rows, bool* batch_eof, |
230 | | bool* modify_row_ids); |
231 | | |
232 | | Status _read_column_data(Block* block, const std::vector<std::string>& columns, |
233 | | size_t batch_size, size_t* read_rows, bool* batch_eof, |
234 | | FilterMap& filter_map); |
235 | | |
236 | | Status _do_lazy_read(Block* block, size_t batch_size, size_t* read_rows, bool* batch_eof); |
237 | | Status _rebuild_filter_map(FilterMap& filter_map, |
238 | | DorisUniqueBufferPtr<uint8_t>& filter_map_data, |
239 | | size_t pre_read_rows) const; |
240 | | Status _fill_partition_columns( |
241 | | Block* block, size_t rows, |
242 | | const std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>& |
243 | | partition_columns); |
244 | | Status _fill_missing_columns( |
245 | | Block* block, size_t rows, |
246 | | const std::unordered_map<std::string, VExprContextSPtr>& missing_columns); |
247 | | Status _build_pos_delete_filter(size_t read_rows); |
248 | | Status _filter_block(Block* block, int column_to_keep, |
249 | | const std::vector<uint32_t>& columns_to_filter); |
250 | | Status _filter_block_internal(Block* block, const std::vector<uint32_t>& columns_to_filter, |
251 | | const IColumn::Filter& filter); |
252 | | |
253 | | bool _can_filter_by_dict(int slot_id, const tparquet::ColumnMetaData& column_metadata); |
254 | | bool is_dictionary_encoded(const tparquet::ColumnMetaData& column_metadata); |
255 | | Status _rewrite_dict_predicates(); |
256 | | Status _rewrite_dict_conjuncts(std::vector<int32_t>& dict_codes, int slot_id, bool is_nullable); |
257 | | Status _convert_dict_cols_to_string_cols(Block* block); |
258 | | void _filter_read_ranges_by_condition_cache(); |
259 | | void _mark_condition_cache_granules(const uint8_t* filter_data, size_t num_rows, |
260 | | int64_t batch_seq_start); |
261 | | |
262 | | Status _get_current_batch_row_id(size_t read_rows); |
263 | | Status _fill_row_id_columns(Block* block, size_t read_rows, bool is_current_row_ids); |
264 | | Status _append_iceberg_rowid_column(Block* block, size_t read_rows, bool is_current_row_ids); |
265 | | |
266 | | io::FileReaderSPtr _file_reader; |
267 | | std::unordered_map<std::string, std::unique_ptr<ParquetColumnReader>> |
268 | | _column_readers; // table_column_name |
269 | | const std::vector<std::string>& _read_table_columns; |
270 | | |
271 | | const int32_t _row_group_id; |
272 | | const tparquet::RowGroup& _row_group_meta; |
273 | | int64_t _remaining_rows; |
274 | | const cctz::time_zone* _ctz = nullptr; |
275 | | io::IOContext* _io_ctx = nullptr; |
276 | | PositionDeleteContext _position_delete_ctx; |
277 | | // merge the row ranges generated from page index and position delete. |
278 | | RowRanges _read_ranges; |
279 | | |
280 | | const LazyReadContext& _lazy_read_ctx; |
281 | | int64_t _lazy_read_filtered_rows = 0; |
282 | | int64_t _predicate_filter_time = 0; |
283 | | int64_t _dict_filter_rewrite_time = 0; |
284 | | int64_t _condition_cache_filtered_rows = 0; |
285 | | // If continuous batches are skipped, we can cache them to skip a whole page |
286 | | size_t _cached_filtered_rows = 0; |
287 | | std::shared_ptr<ConditionCacheContext> _condition_cache_ctx; |
288 | | std::unique_ptr<IColumn::Filter> _pos_delete_filter_ptr; |
289 | | int64_t _total_read_rows = 0; |
290 | | const TupleDescriptor* _tuple_descriptor = nullptr; |
291 | | const RowDescriptor* _row_descriptor = nullptr; |
292 | | const std::unordered_map<std::string, int>* _col_name_to_slot_id = nullptr; |
293 | | const std::unordered_map<int, VExprContextSPtrs>* _slot_id_to_filter_conjuncts = nullptr; |
294 | | VExprContextSPtrs _dict_filter_conjuncts; |
295 | | VExprContextSPtrs _filter_conjuncts; |
296 | | // std::pair<col_name, slot_id> |
297 | | std::vector<std::pair<std::string, int>> _dict_filter_cols; |
298 | | RuntimeState* _state = nullptr; |
299 | | std::shared_ptr<ObjectPool> _obj_pool; |
300 | | const std::set<uint64_t>& _column_ids; |
301 | | const std::set<uint64_t>& _filter_column_ids; |
302 | | bool _is_row_group_filtered = false; |
303 | | |
304 | | RowGroupIndex _current_row_group_idx {0, 0, 0}; |
305 | | std::pair<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>, int> |
306 | | _row_id_column_iterator_pair = {nullptr, -1}; |
307 | | std::vector<rowid_t> _current_batch_row_ids; |
308 | | |
309 | | std::unordered_map<std::string, uint32_t>* _col_name_to_block_idx = nullptr; |
310 | | IcebergRowIdParams _iceberg_rowid_params; |
311 | | }; |
312 | | #include "common/compile_check_end.h" |
313 | | |
314 | | } // namespace doris |