be/src/core/block/block.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Block.h |
19 | | // and modified by Doris |
20 | | |
21 | | #pragma once |
22 | | |
23 | | #include <glog/logging.h> |
24 | | #include <parallel_hashmap/phmap.h> |
25 | | |
26 | | #include <cstddef> |
27 | | #include <cstdint> |
28 | | #include <initializer_list> |
29 | | #include <list> |
30 | | #include <memory> |
31 | | #include <ostream> |
32 | | #include <set> |
33 | | #include <string> |
34 | | #include <utility> |
35 | | #include <vector> |
36 | | |
37 | | #include "common/be_mock_util.h" |
38 | | #include "common/exception.h" |
39 | | #include "common/factory_creator.h" |
40 | | #include "common/status.h" |
41 | | #include "core/block/column_with_type_and_name.h" |
42 | | #include "core/block/columns_with_type_and_name.h" |
43 | | #include "core/column/column.h" |
44 | | #include "core/column/column_nullable.h" |
45 | | #include "core/data_type/data_type.h" |
46 | | #include "core/data_type/data_type_nullable.h" |
47 | | #include "core/types.h" |
48 | | |
49 | | class SipHash; |
50 | | |
51 | | namespace doris { |
52 | | |
53 | | class TupleDescriptor; |
54 | | class PBlock; |
55 | | class SlotDescriptor; |
56 | | |
57 | | namespace segment_v2 { |
58 | | enum CompressionTypePB : int; |
59 | | } // namespace segment_v2 |
60 | | |
61 | | /** Container for set of columns for bunch of rows in memory. |
62 | | * This is unit of data processing. |
63 | | * Also contains metadata - data types of columns and their names |
64 | | * (either original names from a table, or generated names during temporary calculations). |
65 | | * Allows to insert, remove columns in arbitrary position, to change order of columns. |
66 | | */ |
67 | | class MutableBlock; |
68 | | |
69 | | class Block { |
70 | | ENABLE_FACTORY_CREATOR(Block); |
71 | | |
72 | | private: |
73 | | using Container = ColumnsWithTypeAndName; |
74 | | Container data; |
75 | | |
76 | | public: |
77 | 1.25M | Block() = default; |
78 | | Block(std::initializer_list<ColumnWithTypeAndName> il); |
79 | | Block(ColumnsWithTypeAndName data_); |
80 | | Block(const std::vector<SlotDescriptor*>& slots, size_t block_size); |
81 | | Block(const std::vector<SlotDescriptor>& slots, size_t block_size); |
82 | | |
83 | 1.63M | MOCK_FUNCTION ~Block() = default; |
84 | 886 | Block(const Block& block) = default; |
85 | 48.2k | Block& operator=(const Block& p) = default; |
86 | 63.4k | Block(Block&& block) = default; |
87 | 866 | Block& operator=(Block&& other) = default; |
88 | | |
89 | | void reserve(size_t count); |
90 | | // Make sure the nammes is useless when use block |
91 | | void clear_names(); |
92 | | |
93 | | /// insert the column at the specified position |
94 | | void insert(size_t position, const ColumnWithTypeAndName& elem); |
95 | | void insert(size_t position, ColumnWithTypeAndName&& elem); |
96 | | /// insert the column to the end |
97 | | void insert(const ColumnWithTypeAndName& elem); |
98 | | void insert(ColumnWithTypeAndName&& elem); |
99 | | /// remove the column at the specified position |
100 | | void erase(size_t position); |
101 | | /// remove the column at the [start, end) |
102 | | void erase_tail(size_t start); |
103 | | /// remove the columns at the specified positions |
104 | | void erase(const std::set<size_t>& positions); |
105 | | // T was std::set<int>, std::vector<int>, std::list<int> |
106 | | template <class T> |
107 | | void erase_not_in(const T& container) { |
108 | | Container new_data; |
109 | | for (auto pos : container) { |
110 | | new_data.emplace_back(std::move(data[pos])); |
111 | | } |
112 | | std::swap(data, new_data); |
113 | | } |
114 | | |
115 | 15 | std::unordered_map<std::string, uint32_t> get_name_to_pos_map() const { |
116 | 15 | std::unordered_map<std::string, uint32_t> name_to_index_map; |
117 | 123 | for (uint32_t i = 0; i < data.size(); ++i) { |
118 | 108 | name_to_index_map[data[i].name] = i; |
119 | 108 | } |
120 | 15 | return name_to_index_map; |
121 | 15 | } |
122 | | |
123 | | /// References are invalidated after calling functions above. |
124 | 17.6M | ColumnWithTypeAndName& get_by_position(size_t position) { |
125 | 17.6M | DCHECK(data.size() > position) |
126 | 0 | << ", data.size()=" << data.size() << ", position=" << position; |
127 | 17.6M | return data[position]; |
128 | 17.6M | } |
129 | 29.1M | const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; } |
130 | | |
131 | 45.1k | void replace_by_position(size_t position, ColumnPtr&& res) { |
132 | 45.1k | this->get_by_position(position).column = std::move(res); |
133 | 45.1k | } |
134 | | |
135 | 0 | void replace_by_position(size_t position, const ColumnPtr& res) { |
136 | 0 | this->get_by_position(position).column = res; |
137 | 0 | } |
138 | | |
139 | 668 | void replace_by_position_if_const(size_t position) { |
140 | 668 | auto& element = this->get_by_position(position); |
141 | 668 | element.column = element.column->convert_to_full_column_if_const(); |
142 | 668 | } |
143 | | |
144 | | ColumnWithTypeAndName& safe_get_by_position(size_t position); |
145 | | const ColumnWithTypeAndName& safe_get_by_position(size_t position) const; |
146 | | |
147 | 78.9k | Container::iterator begin() { return data.begin(); } |
148 | 78.9k | Container::iterator end() { return data.end(); } |
149 | 11.3k | Container::const_iterator begin() const { return data.begin(); } |
150 | 11.2k | Container::const_iterator end() const { return data.end(); } |
151 | 0 | Container::const_iterator cbegin() const { return data.cbegin(); } |
152 | 0 | Container::const_iterator cend() const { return data.cend(); } |
153 | | |
154 | | // Get position of column by name. Returns -1 if there is no column with that name. |
155 | | // ATTN: this method is O(N). better maintain name -> position map in caller if you need to call it frequently. |
156 | | int get_position_by_name(const std::string& name) const; |
157 | | |
158 | | const ColumnsWithTypeAndName& get_columns_with_type_and_name() const; |
159 | | |
160 | | std::vector<std::string> get_names() const; |
161 | | DataTypes get_data_types() const; |
162 | | |
163 | 59 | DataTypePtr get_data_type(size_t index) const { |
164 | 59 | CHECK(index < data.size()); |
165 | 59 | return data[index].type; |
166 | 59 | } |
167 | | |
168 | | /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0. |
169 | | size_t rows() const; |
170 | | |
171 | | // Cut the rows in block, use in LIMIT operation |
172 | | void set_num_rows(size_t length); |
173 | | |
174 | | // Skip the rows in block, use in OFFSET, LIMIT operation |
175 | | void skip_num_rows(int64_t& offset); |
176 | | |
177 | | /// As the assumption we used around, the number of columns won't exceed int16 range. so no need to worry when we |
178 | | /// assign it to int32. |
179 | 18.3M | uint32_t columns() const { return static_cast<uint32_t>(data.size()); } |
180 | | |
181 | | /// Checks that every column in block is not nullptr and has same number of elements. |
182 | | void check_number_of_rows(bool allow_null_columns = false) const; |
183 | | |
184 | | Status check_type_and_column() const; |
185 | | |
186 | | /// Approximate number of bytes in memory - for profiling and limits. |
187 | | size_t bytes() const; |
188 | | |
189 | | std::string columns_bytes() const; |
190 | | |
191 | | /// Approximate number of allocated bytes in memory - for profiling and limits. |
192 | | MOCK_FUNCTION size_t allocated_bytes() const; |
193 | | |
194 | | /** Get a list of column names separated by commas. */ |
195 | | std::string dump_names() const; |
196 | | |
197 | | std::string dump_types() const; |
198 | | |
199 | | /** List of names, types and lengths of columns. Designed for debugging. */ |
200 | | std::string dump_structure() const; |
201 | | |
202 | | /** Get the same block, but empty. */ |
203 | | Block clone_empty() const; |
204 | | |
205 | | Columns get_columns() const; |
206 | | Columns get_columns_and_convert(); |
207 | | |
208 | | Block clone_without_columns(const std::vector<int>* column_offset = nullptr) const; |
209 | | |
210 | | /** Get empty columns with the same types as in block. */ |
211 | | MutableColumns clone_empty_columns() const; |
212 | | |
213 | | /** Get columns from block for mutation. Columns in block will be nullptr. */ |
214 | | MutableColumns mutate_columns(); |
215 | | |
216 | | /** Replace columns in a block */ |
217 | | void set_columns(MutableColumns&& columns); |
218 | | Block clone_with_columns(MutableColumns&& columns) const; |
219 | | |
220 | | void clear(); |
221 | | void swap(Block& other) noexcept; |
222 | | void swap(Block&& other) noexcept; |
223 | | |
224 | | // Shuffle columns in place based on the result_column_ids |
225 | | void shuffle_columns(const std::vector<int>& result_column_ids); |
226 | | |
227 | | // Default column size = -1 means clear all column in block |
228 | | // Else clear column [0, column_size) delete column [column_size, data.size) |
229 | | void clear_column_data(int64_t column_size = -1) noexcept; |
230 | | |
231 | 23.8k | MOCK_FUNCTION bool mem_reuse() { return !data.empty(); } |
232 | | |
233 | 7 | bool is_empty_column() { return data.empty(); } |
234 | | |
235 | 1.92M | bool empty() const { return rows() == 0; } |
236 | | |
237 | | /** |
238 | | * Updates SipHash of the Block, using update method of columns. |
239 | | * Returns hash for block, that could be used to differentiate blocks |
240 | | * with same structure, but different data. |
241 | | */ |
242 | | void update_hash(SipHash& hash) const; |
243 | | |
244 | | /** |
245 | | * Get block data in string. |
246 | | * If code is in default_implementation_for_nulls or something likely, type and column's nullity could |
247 | | * temporarily be not same. set allow_null_mismatch to true to dump it correctly. |
248 | | */ |
249 | | std::string dump_data(size_t begin = 0, size_t row_limit = 100, |
250 | | bool allow_null_mismatch = false) const; |
251 | | |
252 | | std::string dump_data_json(size_t begin = 0, size_t row_limit = 100, |
253 | | bool allow_null_mismatch = false) const; |
254 | | |
255 | | /** Get one line data from block, only use in load data */ |
256 | | std::string dump_one_line(size_t row, int column_end) const; |
257 | | |
258 | | Status append_to_block_by_selector(MutableBlock* dst, const IColumn::Selector& selector) const; |
259 | | |
260 | | // need exception safety |
261 | | static void filter_block_internal(Block* block, const std::vector<uint32_t>& columns_to_filter, |
262 | | const IColumn::Filter& filter); |
263 | | // need exception safety |
264 | | static void filter_block_internal(Block* block, const IColumn::Filter& filter, |
265 | | uint32_t column_to_keep); |
266 | | // need exception safety |
267 | | static void filter_block_internal(Block* block, const IColumn::Filter& filter); |
268 | | |
269 | | static Status filter_block(Block* block, const std::vector<uint32_t>& columns_to_filter, |
270 | | size_t filter_column_id, size_t column_to_keep); |
271 | | |
272 | | static Status filter_block(Block* block, size_t filter_column_id, size_t column_to_keep); |
273 | | |
274 | 1.15k | static void erase_useless_column(Block* block, size_t column_to_keep) { |
275 | 1.15k | block->erase_tail(column_to_keep); |
276 | 1.15k | } |
277 | | |
278 | | // serialize block to PBlock |
279 | | Status serialize(int be_exec_version, PBlock* pblock, size_t* uncompressed_bytes, |
280 | | size_t* compressed_bytes, int64_t* compress_time, |
281 | | segment_v2::CompressionTypePB compression_type, |
282 | | bool allow_transfer_large_data = false) const; |
283 | | |
284 | | Status deserialize(const PBlock& pblock, size_t* uncompressed_bytes, int64_t* decompress_time); |
285 | | |
286 | | std::unique_ptr<Block> create_same_struct_block(size_t size, bool is_reserve = false) const; |
287 | | |
288 | | /** Compares (*this) n-th row and rhs m-th row. |
289 | | * Returns negative number, 0, or positive number (*this) n-th row is less, equal, greater than rhs m-th row respectively. |
290 | | * Is used in sortings. |
291 | | * |
292 | | * If one of element's value is NaN or NULLs, then: |
293 | | * - if nan_direction_hint == -1, NaN and NULLs are considered as least than everything other; |
294 | | * - if nan_direction_hint == 1, NaN and NULLs are considered as greatest than everything other. |
295 | | * For example, if nan_direction_hint == -1 is used by descending sorting, NaNs will be at the end. |
296 | | * |
297 | | * For non Nullable and non floating point types, nan_direction_hint is ignored. |
298 | | */ |
299 | 3 | int compare_at(size_t n, size_t m, const Block& rhs, int nan_direction_hint) const { |
300 | 3 | DCHECK_EQ(columns(), rhs.columns()); |
301 | 3 | return compare_at(n, m, columns(), rhs, nan_direction_hint); |
302 | 3 | } |
303 | | |
304 | | int compare_at(size_t n, size_t m, size_t num_columns, const Block& rhs, |
305 | 7.15M | int nan_direction_hint) const { |
306 | 7.15M | DCHECK_GE(columns(), num_columns); |
307 | 7.15M | DCHECK_GE(rhs.columns(), num_columns); |
308 | | |
309 | 7.15M | DCHECK_LE(n, rows()); |
310 | 7.15M | DCHECK_LE(m, rhs.rows()); |
311 | 9.94M | for (size_t i = 0; i < num_columns; ++i) { |
312 | 7.19M | DCHECK(get_by_position(i).type->equals(*rhs.get_by_position(i).type)); |
313 | 7.19M | auto res = get_by_position(i).column->compare_at(n, m, *(rhs.get_by_position(i).column), |
314 | 7.19M | nan_direction_hint); |
315 | 7.19M | if (res) { |
316 | 4.40M | return res; |
317 | 4.40M | } |
318 | 7.19M | } |
319 | 2.74M | return 0; |
320 | 7.15M | } |
321 | | |
322 | | int compare_at(size_t n, size_t m, const std::vector<uint32_t>* compare_columns, |
323 | 2 | const Block& rhs, int nan_direction_hint) const { |
324 | 2 | DCHECK_GE(columns(), compare_columns->size()); |
325 | 2 | DCHECK_GE(rhs.columns(), compare_columns->size()); |
326 | | |
327 | 2 | DCHECK_LE(n, rows()); |
328 | 2 | DCHECK_LE(m, rhs.rows()); |
329 | 3 | for (auto i : *compare_columns) { |
330 | 3 | DCHECK(get_by_position(i).type->equals(*rhs.get_by_position(i).type)); |
331 | 3 | auto res = get_by_position(i).column->compare_at(n, m, *(rhs.get_by_position(i).column), |
332 | 3 | nan_direction_hint); |
333 | 3 | if (res) { |
334 | 2 | return res; |
335 | 2 | } |
336 | 3 | } |
337 | 0 | return 0; |
338 | 2 | } |
339 | | |
340 | | //note(wb) no DCHECK here, because this method is only used after compare_at now, so no need to repeat check here. |
341 | | // If this method is used in more places, you can add DCHECK case by case. |
342 | | int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& rhs, |
343 | 26.9k | int nan_direction_hint) const { |
344 | 26.9k | auto res = get_by_position(col_idx).column->compare_at( |
345 | 26.9k | n, m, *(rhs.get_by_position(col_idx).column), nan_direction_hint); |
346 | 26.9k | return res; |
347 | 26.9k | } |
348 | | |
349 | | // for String type or Array<String> type |
350 | | void shrink_char_type_column_suffix_zero(const std::vector<size_t>& char_type_idx); |
351 | | |
352 | | void clear_column_mem_not_keep(const std::vector<bool>& column_keep_flags, |
353 | | bool need_keep_first); |
354 | | |
355 | | private: |
356 | | void erase_impl(size_t position); |
357 | | }; |
358 | | |
359 | | using Blocks = std::vector<Block>; |
360 | | using BlocksList = std::list<Block>; |
361 | | using BlocksPtr = std::shared_ptr<Blocks>; |
362 | | using BlocksPtrs = std::shared_ptr<std::vector<BlocksPtr>>; |
363 | | |
364 | | class MutableBlock { |
365 | | ENABLE_FACTORY_CREATOR(MutableBlock); |
366 | | |
367 | | private: |
368 | | MutableColumns _columns; |
369 | | DataTypes _data_types; |
370 | | std::vector<std::string> _names; |
371 | | |
372 | | public: |
373 | 48.6k | static MutableBlock build_mutable_block(Block* block) { |
374 | 48.6k | return block == nullptr ? MutableBlock() : MutableBlock(block); |
375 | 48.6k | } |
376 | 72.1k | MutableBlock() = default; |
377 | 217k | ~MutableBlock() = default; |
378 | | |
379 | | MutableBlock(Block* block) |
380 | 48.8k | : _columns(block->mutate_columns()), |
381 | 48.8k | _data_types(block->get_data_types()), |
382 | 48.8k | _names(block->get_names()) {} |
383 | | MutableBlock(Block&& block) |
384 | 96.6k | : _columns(block.mutate_columns()), |
385 | 96.6k | _data_types(block.get_data_types()), |
386 | 96.6k | _names(block.get_names()) {} |
387 | | |
388 | 96.0k | void operator=(MutableBlock&& m_block) { |
389 | 96.0k | _columns = std::move(m_block._columns); |
390 | 96.0k | _data_types = std::move(m_block._data_types); |
391 | 96.0k | _names = std::move(m_block._names); |
392 | 96.0k | } |
393 | | |
394 | | size_t rows() const; |
395 | 344 | size_t columns() const { return _columns.size(); } |
396 | | |
397 | 144k | bool empty() const { return rows() == 0; } |
398 | | |
399 | 48.5k | MutableColumns& mutable_columns() { return _columns; } |
400 | | |
401 | 1 | void set_mutable_columns(MutableColumns&& columns) { _columns = std::move(columns); } |
402 | | |
403 | 0 | DataTypes& data_types() { return _data_types; } |
404 | | |
405 | 126 | MutableColumnPtr& get_column_by_position(size_t position) { return _columns[position]; } |
406 | 60 | const MutableColumnPtr& get_column_by_position(size_t position) const { |
407 | 60 | return _columns[position]; |
408 | 60 | } |
409 | | |
410 | 0 | DataTypePtr& get_datatype_by_position(size_t position) { return _data_types[position]; } |
411 | 22 | const DataTypePtr& get_datatype_by_position(size_t position) const { |
412 | 22 | return _data_types[position]; |
413 | 22 | } |
414 | | |
415 | 38 | int compare_one_column(size_t n, size_t m, size_t column_id, int nan_direction_hint) const { |
416 | 38 | DCHECK_LE(column_id, columns()); |
417 | 38 | DCHECK_LE(n, rows()); |
418 | 38 | DCHECK_LE(m, rows()); |
419 | 38 | auto& column = get_column_by_position(column_id); |
420 | 38 | return column->compare_at(n, m, *column, nan_direction_hint); |
421 | 38 | } |
422 | | |
423 | | int compare_at(size_t n, size_t m, size_t num_columns, const MutableBlock& rhs, |
424 | 6 | int nan_direction_hint) const { |
425 | 6 | DCHECK_GE(columns(), num_columns); |
426 | 6 | DCHECK_GE(rhs.columns(), num_columns); |
427 | | |
428 | 6 | DCHECK_LE(n, rows()); |
429 | 6 | DCHECK_LE(m, rhs.rows()); |
430 | 14 | for (size_t i = 0; i < num_columns; ++i) { |
431 | 11 | DCHECK(get_datatype_by_position(i)->equals(*rhs.get_datatype_by_position(i))); |
432 | 11 | auto res = get_column_by_position(i)->compare_at(n, m, *(rhs.get_column_by_position(i)), |
433 | 11 | nan_direction_hint); |
434 | 11 | if (res) { |
435 | 3 | return res; |
436 | 3 | } |
437 | 11 | } |
438 | 3 | return 0; |
439 | 6 | } |
440 | | |
441 | | int compare_at(size_t n, size_t m, const std::vector<uint32_t>* compare_columns, |
442 | 0 | const MutableBlock& rhs, int nan_direction_hint) const { |
443 | 0 | DCHECK_GE(columns(), compare_columns->size()); |
444 | 0 | DCHECK_GE(rhs.columns(), compare_columns->size()); |
445 | |
|
446 | 0 | DCHECK_LE(n, rows()); |
447 | 0 | DCHECK_LE(m, rhs.rows()); |
448 | 0 | for (auto i : *compare_columns) { |
449 | 0 | DCHECK(get_datatype_by_position(i)->equals(*rhs.get_datatype_by_position(i))); |
450 | 0 | auto res = get_column_by_position(i)->compare_at(n, m, *(rhs.get_column_by_position(i)), |
451 | 0 | nan_direction_hint); |
452 | 0 | if (res) { |
453 | 0 | return res; |
454 | 0 | } |
455 | 0 | } |
456 | 0 | return 0; |
457 | 0 | } |
458 | | |
459 | 3 | std::string dump_types() const { |
460 | 3 | std::string res; |
461 | 9 | for (auto type : _data_types) { |
462 | 9 | if (!res.empty()) { |
463 | 6 | res += ", "; |
464 | 6 | } |
465 | 9 | res += type->get_name(); |
466 | 9 | } |
467 | 3 | return res; |
468 | 3 | } |
469 | | |
470 | | template <typename T> |
471 | 124 | [[nodiscard]] Status merge(T&& block) { |
472 | 124 | RETURN_IF_CATCH_EXCEPTION(return merge_impl(block);); |
473 | 124 | } _ZN5doris12MutableBlock5mergeIRNS_5BlockEEENS_6StatusEOT_ Line | Count | Source | 471 | 59 | [[nodiscard]] Status merge(T&& block) { | 472 | 59 | RETURN_IF_CATCH_EXCEPTION(return merge_impl(block);); | 473 | 59 | } |
_ZN5doris12MutableBlock5mergeINS_5BlockEEENS_6StatusEOT_ Line | Count | Source | 471 | 65 | [[nodiscard]] Status merge(T&& block) { | 472 | 65 | RETURN_IF_CATCH_EXCEPTION(return merge_impl(block);); | 473 | 65 | } |
|
474 | | |
475 | | template <typename T> |
476 | 48.0k | [[nodiscard]] Status merge_ignore_overflow(T&& block) { |
477 | 48.0k | RETURN_IF_CATCH_EXCEPTION(return merge_impl_ignore_overflow(block);); |
478 | 48.0k | } |
479 | | |
480 | | // only use for join. call ignore_overflow to prevent from throw exception in join |
481 | | template <typename T> |
482 | 48.0k | [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) { |
483 | 48.0k | if (_columns.size() != block.columns()) { |
484 | 1 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
485 | 1 | "Merge block not match, self column count: {}, [columns: {}, types: {}], " |
486 | 1 | "input column count: {}, [columns: {}, " |
487 | 1 | "types: {}], ", |
488 | 1 | _columns.size(), dump_names(), dump_types(), block.columns(), |
489 | 1 | block.dump_names(), block.dump_types()); |
490 | 1 | } |
491 | 142k | for (int i = 0; i < _columns.size(); ++i) { |
492 | 94.2k | if (!_data_types[i]->equals(*block.get_by_position(i).type)) { |
493 | 1 | throw doris::Exception(doris::ErrorCode::FATAL_ERROR, |
494 | 1 | "Merge block not match, self:[columns: {}, types: {}], " |
495 | 1 | "input:[columns: {}, types: {}], ", |
496 | 1 | dump_names(), dump_types(), block.dump_names(), |
497 | 1 | block.dump_types()); |
498 | 1 | } |
499 | 94.2k | _columns[i]->insert_range_from_ignore_overflow( |
500 | 94.2k | *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0, |
501 | 94.2k | block.rows()); |
502 | 94.2k | } |
503 | 48.0k | return Status::OK(); |
504 | 48.0k | } _ZN5doris12MutableBlock26merge_impl_ignore_overflowIRNS_5BlockEEENS_6StatusEOT_ Line | Count | Source | 482 | 48.0k | [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) { | 483 | 48.0k | if (_columns.size() != block.columns()) { | 484 | 1 | return Status::Error<ErrorCode::INTERNAL_ERROR>( | 485 | 1 | "Merge block not match, self column count: {}, [columns: {}, types: {}], " | 486 | 1 | "input column count: {}, [columns: {}, " | 487 | 1 | "types: {}], ", | 488 | 1 | _columns.size(), dump_names(), dump_types(), block.columns(), | 489 | 1 | block.dump_names(), block.dump_types()); | 490 | 1 | } | 491 | 142k | for (int i = 0; i < _columns.size(); ++i) { | 492 | 94.2k | if (!_data_types[i]->equals(*block.get_by_position(i).type)) { | 493 | 0 | throw doris::Exception(doris::ErrorCode::FATAL_ERROR, | 494 | 0 | "Merge block not match, self:[columns: {}, types: {}], " | 495 | 0 | "input:[columns: {}, types: {}], ", | 496 | 0 | dump_names(), dump_types(), block.dump_names(), | 497 | 0 | block.dump_types()); | 498 | 0 | } | 499 | 94.2k | _columns[i]->insert_range_from_ignore_overflow( | 500 | 94.2k | *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0, | 501 | 94.2k | block.rows()); | 502 | 94.2k | } | 503 | 48.0k | return Status::OK(); | 504 | 48.0k | } |
_ZN5doris12MutableBlock26merge_impl_ignore_overflowINS_5BlockEEENS_6StatusEOT_ Line | Count | Source | 482 | 1 | [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) { | 483 | 1 | if (_columns.size() != block.columns()) { | 484 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( | 485 | 0 | "Merge block not match, self column count: {}, [columns: {}, types: {}], " | 486 | 0 | "input column count: {}, [columns: {}, " | 487 | 0 | "types: {}], ", | 488 | 0 | _columns.size(), dump_names(), dump_types(), block.columns(), | 489 | 0 | block.dump_names(), block.dump_types()); | 490 | 0 | } | 491 | 3 | for (int i = 0; i < _columns.size(); ++i) { | 492 | 3 | if (!_data_types[i]->equals(*block.get_by_position(i).type)) { | 493 | 1 | throw doris::Exception(doris::ErrorCode::FATAL_ERROR, | 494 | 1 | "Merge block not match, self:[columns: {}, types: {}], " | 495 | 1 | "input:[columns: {}, types: {}], ", | 496 | 1 | dump_names(), dump_types(), block.dump_names(), | 497 | 1 | block.dump_types()); | 498 | 1 | } | 499 | 2 | _columns[i]->insert_range_from_ignore_overflow( | 500 | 2 | *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0, | 501 | 2 | block.rows()); | 502 | 2 | } | 503 | 0 | return Status::OK(); | 504 | 1 | } |
|
505 | | |
506 | | template <typename T> |
507 | 126 | [[nodiscard]] Status merge_impl(T&& block) { |
508 | | // merge is not supported in dynamic block |
509 | 126 | if (_columns.empty() && _data_types.empty()) { |
510 | 50 | _data_types = block.get_data_types(); |
511 | 50 | _names = block.get_names(); |
512 | 50 | _columns.resize(block.columns()); |
513 | 160 | for (size_t i = 0; i < block.columns(); ++i) { |
514 | 110 | if (block.get_by_position(i).column) { |
515 | 109 | _columns[i] = (*std::move(block.get_by_position(i) |
516 | 109 | .column->convert_to_full_column_if_const())) |
517 | 109 | .mutate(); |
518 | 109 | } else { |
519 | 1 | _columns[i] = _data_types[i]->create_column(); |
520 | 1 | } |
521 | 110 | } |
522 | 76 | } else { |
523 | 76 | if (_columns.size() != block.columns()) { |
524 | 1 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
525 | 1 | "Merge block not match, self column count: {}, [columns: {}, types: {}], " |
526 | 1 | "input column count: {}, [columns: {}, " |
527 | 1 | "types: {}], ", |
528 | 1 | _columns.size(), dump_names(), dump_types(), block.columns(), |
529 | 1 | block.dump_names(), block.dump_types()); |
530 | 1 | } |
531 | 195 | for (int i = 0; i < _columns.size(); ++i) { |
532 | 120 | if (!_data_types[i]->equals(*block.get_by_position(i).type)) { |
533 | 1 | DCHECK(_data_types[i]->is_nullable()) |
534 | 0 | << " target type: " << _data_types[i]->get_name() |
535 | 0 | << " src type: " << block.get_by_position(i).type->get_name(); |
536 | 1 | DCHECK(((DataTypeNullable*)_data_types[i].get()) |
537 | 1 | ->get_nested_type() |
538 | 1 | ->equals(*block.get_by_position(i).type)); |
539 | 1 | DCHECK(!block.get_by_position(i).type->is_nullable()); |
540 | 1 | _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column) |
541 | 1 | ->convert_to_full_column_if_const(), |
542 | 1 | 0, block.rows()); |
543 | 119 | } else { |
544 | 119 | _columns[i]->insert_range_from( |
545 | 119 | *block.get_by_position(i) |
546 | 119 | .column->convert_to_full_column_if_const() |
547 | 119 | .get(), |
548 | 119 | 0, block.rows()); |
549 | 119 | } |
550 | 120 | } |
551 | 75 | } |
552 | 125 | return Status::OK(); |
553 | 126 | } _ZN5doris12MutableBlock10merge_implIRNS_5BlockEEENS_6StatusEOT_ Line | Count | Source | 507 | 124 | [[nodiscard]] Status merge_impl(T&& block) { | 508 | | // merge is not supported in dynamic block | 509 | 124 | if (_columns.empty() && _data_types.empty()) { | 510 | 50 | _data_types = block.get_data_types(); | 511 | 50 | _names = block.get_names(); | 512 | 50 | _columns.resize(block.columns()); | 513 | 160 | for (size_t i = 0; i < block.columns(); ++i) { | 514 | 110 | if (block.get_by_position(i).column) { | 515 | 109 | _columns[i] = (*std::move(block.get_by_position(i) | 516 | 109 | .column->convert_to_full_column_if_const())) | 517 | 109 | .mutate(); | 518 | 109 | } else { | 519 | 1 | _columns[i] = _data_types[i]->create_column(); | 520 | 1 | } | 521 | 110 | } | 522 | 74 | } else { | 523 | 74 | if (_columns.size() != block.columns()) { | 524 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( | 525 | 0 | "Merge block not match, self column count: {}, [columns: {}, types: {}], " | 526 | 0 | "input column count: {}, [columns: {}, " | 527 | 0 | "types: {}], ", | 528 | 0 | _columns.size(), dump_names(), dump_types(), block.columns(), | 529 | 0 | block.dump_names(), block.dump_types()); | 530 | 0 | } | 531 | 191 | for (int i = 0; i < _columns.size(); ++i) { | 532 | 117 | if (!_data_types[i]->equals(*block.get_by_position(i).type)) { | 533 | 0 | DCHECK(_data_types[i]->is_nullable()) | 534 | 0 | << " target type: " << _data_types[i]->get_name() | 535 | 0 | << " src type: " << block.get_by_position(i).type->get_name(); | 536 | 0 | DCHECK(((DataTypeNullable*)_data_types[i].get()) | 537 | 0 | ->get_nested_type() | 538 | 0 | ->equals(*block.get_by_position(i).type)); | 539 | 0 | DCHECK(!block.get_by_position(i).type->is_nullable()); | 540 | 0 | _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column) | 541 | 0 | ->convert_to_full_column_if_const(), | 542 | 0 | 0, block.rows()); | 543 | 117 | } else { | 544 | 117 | _columns[i]->insert_range_from( | 545 | 117 | *block.get_by_position(i) | 546 | 117 | .column->convert_to_full_column_if_const() | 547 | 117 | .get(), | 548 | 117 | 0, block.rows()); | 549 | 117 | } | 550 | 117 | } | 551 | 74 | } | 552 | 124 | return Status::OK(); | 553 | 124 | } |
_ZN5doris12MutableBlock10merge_implINS_5BlockEEENS_6StatusEOT_ Line | Count | Source | 507 | 2 | [[nodiscard]] Status merge_impl(T&& block) { | 508 | | // merge is not supported in dynamic block | 509 | 2 | if (_columns.empty() && _data_types.empty()) { | 510 | 0 | _data_types = block.get_data_types(); | 511 | 0 | _names = block.get_names(); | 512 | 0 | _columns.resize(block.columns()); | 513 | 0 | for (size_t i = 0; i < block.columns(); ++i) { | 514 | 0 | if (block.get_by_position(i).column) { | 515 | 0 | _columns[i] = (*std::move(block.get_by_position(i) | 516 | 0 | .column->convert_to_full_column_if_const())) | 517 | 0 | .mutate(); | 518 | 0 | } else { | 519 | 0 | _columns[i] = _data_types[i]->create_column(); | 520 | 0 | } | 521 | 0 | } | 522 | 2 | } else { | 523 | 2 | if (_columns.size() != block.columns()) { | 524 | 1 | return Status::Error<ErrorCode::INTERNAL_ERROR>( | 525 | 1 | "Merge block not match, self column count: {}, [columns: {}, types: {}], " | 526 | 1 | "input column count: {}, [columns: {}, " | 527 | 1 | "types: {}], ", | 528 | 1 | _columns.size(), dump_names(), dump_types(), block.columns(), | 529 | 1 | block.dump_names(), block.dump_types()); | 530 | 1 | } | 531 | 4 | for (int i = 0; i < _columns.size(); ++i) { | 532 | 3 | if (!_data_types[i]->equals(*block.get_by_position(i).type)) { | 533 | 1 | DCHECK(_data_types[i]->is_nullable()) | 534 | 0 | << " target type: " << _data_types[i]->get_name() | 535 | 0 | << " src type: " << block.get_by_position(i).type->get_name(); | 536 | 1 | DCHECK(((DataTypeNullable*)_data_types[i].get()) | 537 | 1 | ->get_nested_type() | 538 | 1 | ->equals(*block.get_by_position(i).type)); | 539 | 1 | DCHECK(!block.get_by_position(i).type->is_nullable()); | 540 | 1 | _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column) | 541 | 1 | ->convert_to_full_column_if_const(), | 542 | 1 | 0, block.rows()); | 543 | 2 | } else { | 544 | 2 | _columns[i]->insert_range_from( | 545 | 2 | *block.get_by_position(i) | 546 | 2 | .column->convert_to_full_column_if_const() | 547 | 2 | .get(), | 548 | 2 | 0, block.rows()); | 549 | 2 | } | 550 | 3 | } | 551 | 1 | } | 552 | 1 | return Status::OK(); | 553 | 2 | } |
|
554 | | |
555 | | // move to columns' data to a Block. this will invalidate |
556 | | Block to_block(int start_column = 0); |
557 | | Block to_block(int start_column, int end_column); |
558 | | |
559 | | void swap(MutableBlock& other) noexcept; |
560 | | |
561 | | void add_row(const Block* block, int row); |
562 | | // Batch add row should return error status if allocate memory failed. |
563 | | Status add_rows(const Block* block, const uint32_t* row_begin, const uint32_t* row_end, |
564 | | const std::vector<int>* column_offset = nullptr); |
565 | | Status add_rows(const Block* block, size_t row_begin, size_t length); |
566 | | |
567 | | std::string dump_data(size_t row_limit = 100) const; |
568 | | std::string dump_data_json(size_t row_limit = 100) const; |
569 | | |
570 | 83 | void clear() { |
571 | 83 | _columns.clear(); |
572 | 83 | _data_types.clear(); |
573 | 83 | _names.clear(); |
574 | 83 | } |
575 | | |
576 | | // columns resist. columns' inner data removed. |
577 | | void clear_column_data() noexcept; |
578 | | |
579 | | size_t allocated_bytes() const; |
580 | | |
581 | 48.0k | size_t bytes() const { |
582 | 48.0k | size_t res = 0; |
583 | 94.0k | for (const auto& elem : _columns) { |
584 | 94.0k | res += elem->byte_size(); |
585 | 94.0k | } |
586 | | |
587 | 48.0k | return res; |
588 | 48.0k | } |
589 | | |
590 | 0 | std::vector<std::string>& get_names() { return _names; } |
591 | | |
592 | | /** Get a list of column names separated by commas. */ |
593 | | std::string dump_names() const; |
594 | | }; |
595 | | |
596 | | struct IteratorRowRef { |
597 | | std::shared_ptr<Block> block; |
598 | | int row_pos; |
599 | | bool is_same; |
600 | | |
601 | | template <typename T> |
602 | 1.06M | int compare(const IteratorRowRef& rhs, const T& compare_arguments) const { |
603 | 1.06M | return block->compare_at(row_pos, rhs.row_pos, compare_arguments, *rhs.block, -1); |
604 | 1.06M | } Unexecuted instantiation: _ZNK5doris14IteratorRowRef7compareIPKSt6vectorIjSaIjEEEEiRKS0_RKT_ _ZNK5doris14IteratorRowRef7compareImEEiRKS0_RKT_ Line | Count | Source | 602 | 1.06M | int compare(const IteratorRowRef& rhs, const T& compare_arguments) const { | 603 | 1.06M | return block->compare_at(row_pos, rhs.row_pos, compare_arguments, *rhs.block, -1); | 604 | 1.06M | } |
|
605 | | |
606 | 512 | void reset() { |
607 | 512 | block = nullptr; |
608 | 512 | row_pos = -1; |
609 | 512 | is_same = false; |
610 | 512 | } |
611 | | }; |
612 | | |
613 | | using BlockView = std::vector<IteratorRowRef>; |
614 | | using BlockUPtr = std::unique_ptr<Block>; |
615 | | |
616 | | } // namespace doris |