Coverage Report

Created: 2026-03-15 20:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/block/block.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Block.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <glog/logging.h>
24
#include <parallel_hashmap/phmap.h>
25
26
#include <cstddef>
27
#include <cstdint>
28
#include <initializer_list>
29
#include <list>
30
#include <memory>
31
#include <ostream>
32
#include <set>
33
#include <string>
34
#include <utility>
35
#include <vector>
36
37
#include "common/be_mock_util.h"
38
#include "common/exception.h"
39
#include "common/factory_creator.h"
40
#include "common/status.h"
41
#include "core/block/column_with_type_and_name.h"
42
#include "core/block/columns_with_type_and_name.h"
43
#include "core/column/column.h"
44
#include "core/column/column_nullable.h"
45
#include "core/data_type/data_type.h"
46
#include "core/data_type/data_type_nullable.h"
47
#include "core/types.h"
48
49
class SipHash;
50
51
namespace doris {
52
53
class TupleDescriptor;
54
class PBlock;
55
class SlotDescriptor;
56
57
namespace segment_v2 {
58
enum CompressionTypePB : int;
59
} // namespace segment_v2
60
61
/** Container for set of columns for bunch of rows in memory.
62
  * This is unit of data processing.
63
  * Also contains metadata - data types of columns and their names
64
  *  (either original names from a table, or generated names during temporary calculations).
65
  * Allows to insert, remove columns in arbitrary position, to change order of columns.
66
  */
67
class MutableBlock;
68
69
class Block {
70
    ENABLE_FACTORY_CREATOR(Block);
71
72
private:
73
    using Container = ColumnsWithTypeAndName;
74
    Container data;
75
76
public:
77
1.25M
    Block() = default;
78
    Block(std::initializer_list<ColumnWithTypeAndName> il);
79
    Block(ColumnsWithTypeAndName data_);
80
    Block(const std::vector<SlotDescriptor*>& slots, size_t block_size);
81
    Block(const std::vector<SlotDescriptor>& slots, size_t block_size);
82
83
1.63M
    MOCK_FUNCTION ~Block() = default;
84
886
    Block(const Block& block) = default;
85
48.2k
    Block& operator=(const Block& p) = default;
86
63.4k
    Block(Block&& block) = default;
87
866
    Block& operator=(Block&& other) = default;
88
89
    void reserve(size_t count);
90
    // Make sure the nammes is useless when use block
91
    void clear_names();
92
93
    /// insert the column at the specified position
94
    void insert(size_t position, const ColumnWithTypeAndName& elem);
95
    void insert(size_t position, ColumnWithTypeAndName&& elem);
96
    /// insert the column to the end
97
    void insert(const ColumnWithTypeAndName& elem);
98
    void insert(ColumnWithTypeAndName&& elem);
99
    /// remove the column at the specified position
100
    void erase(size_t position);
101
    /// remove the column at the [start, end)
102
    void erase_tail(size_t start);
103
    /// remove the columns at the specified positions
104
    void erase(const std::set<size_t>& positions);
105
    // T was std::set<int>, std::vector<int>, std::list<int>
106
    template <class T>
107
    void erase_not_in(const T& container) {
108
        Container new_data;
109
        for (auto pos : container) {
110
            new_data.emplace_back(std::move(data[pos]));
111
        }
112
        std::swap(data, new_data);
113
    }
114
115
15
    std::unordered_map<std::string, uint32_t> get_name_to_pos_map() const {
116
15
        std::unordered_map<std::string, uint32_t> name_to_index_map;
117
123
        for (uint32_t i = 0; i < data.size(); ++i) {
118
108
            name_to_index_map[data[i].name] = i;
119
108
        }
120
15
        return name_to_index_map;
121
15
    }
122
123
    /// References are invalidated after calling functions above.
124
17.6M
    ColumnWithTypeAndName& get_by_position(size_t position) {
125
17.6M
        DCHECK(data.size() > position)
126
0
                << ", data.size()=" << data.size() << ", position=" << position;
127
17.6M
        return data[position];
128
17.6M
    }
129
29.1M
    const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; }
130
131
45.1k
    void replace_by_position(size_t position, ColumnPtr&& res) {
132
45.1k
        this->get_by_position(position).column = std::move(res);
133
45.1k
    }
134
135
0
    void replace_by_position(size_t position, const ColumnPtr& res) {
136
0
        this->get_by_position(position).column = res;
137
0
    }
138
139
668
    void replace_by_position_if_const(size_t position) {
140
668
        auto& element = this->get_by_position(position);
141
668
        element.column = element.column->convert_to_full_column_if_const();
142
668
    }
143
144
    ColumnWithTypeAndName& safe_get_by_position(size_t position);
145
    const ColumnWithTypeAndName& safe_get_by_position(size_t position) const;
146
147
78.9k
    Container::iterator begin() { return data.begin(); }
148
78.9k
    Container::iterator end() { return data.end(); }
149
11.3k
    Container::const_iterator begin() const { return data.begin(); }
150
11.2k
    Container::const_iterator end() const { return data.end(); }
151
0
    Container::const_iterator cbegin() const { return data.cbegin(); }
152
0
    Container::const_iterator cend() const { return data.cend(); }
153
154
    // Get position of column by name. Returns -1 if there is no column with that name.
155
    // ATTN: this method is O(N). better maintain name -> position map in caller if you need to call it frequently.
156
    int get_position_by_name(const std::string& name) const;
157
158
    const ColumnsWithTypeAndName& get_columns_with_type_and_name() const;
159
160
    std::vector<std::string> get_names() const;
161
    DataTypes get_data_types() const;
162
163
59
    DataTypePtr get_data_type(size_t index) const {
164
59
        CHECK(index < data.size());
165
59
        return data[index].type;
166
59
    }
167
168
    /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
169
    size_t rows() const;
170
171
    // Cut the rows in block, use in LIMIT operation
172
    void set_num_rows(size_t length);
173
174
    // Skip the rows in block, use in OFFSET, LIMIT operation
175
    void skip_num_rows(int64_t& offset);
176
177
    /// As the assumption we used around, the number of columns won't exceed int16 range. so no need to worry when we
178
    ///  assign it to int32.
179
18.3M
    uint32_t columns() const { return static_cast<uint32_t>(data.size()); }
180
181
    /// Checks that every column in block is not nullptr and has same number of elements.
182
    void check_number_of_rows(bool allow_null_columns = false) const;
183
184
    Status check_type_and_column() const;
185
186
    /// Approximate number of bytes in memory - for profiling and limits.
187
    size_t bytes() const;
188
189
    std::string columns_bytes() const;
190
191
    /// Approximate number of allocated bytes in memory - for profiling and limits.
192
    MOCK_FUNCTION size_t allocated_bytes() const;
193
194
    /** Get a list of column names separated by commas. */
195
    std::string dump_names() const;
196
197
    std::string dump_types() const;
198
199
    /** List of names, types and lengths of columns. Designed for debugging. */
200
    std::string dump_structure() const;
201
202
    /** Get the same block, but empty. */
203
    Block clone_empty() const;
204
205
    Columns get_columns() const;
206
    Columns get_columns_and_convert();
207
208
    Block clone_without_columns(const std::vector<int>* column_offset = nullptr) const;
209
210
    /** Get empty columns with the same types as in block. */
211
    MutableColumns clone_empty_columns() const;
212
213
    /** Get columns from block for mutation. Columns in block will be nullptr. */
214
    MutableColumns mutate_columns();
215
216
    /** Replace columns in a block */
217
    void set_columns(MutableColumns&& columns);
218
    Block clone_with_columns(MutableColumns&& columns) const;
219
220
    void clear();
221
    void swap(Block& other) noexcept;
222
    void swap(Block&& other) noexcept;
223
224
    // Shuffle columns in place based on the result_column_ids
225
    void shuffle_columns(const std::vector<int>& result_column_ids);
226
227
    // Default column size = -1 means clear all column in block
228
    // Else clear column [0, column_size) delete column [column_size, data.size)
229
    void clear_column_data(int64_t column_size = -1) noexcept;
230
231
23.8k
    MOCK_FUNCTION bool mem_reuse() { return !data.empty(); }
232
233
7
    bool is_empty_column() { return data.empty(); }
234
235
1.92M
    bool empty() const { return rows() == 0; }
236
237
    /** 
238
      * Updates SipHash of the Block, using update method of columns.
239
      * Returns hash for block, that could be used to differentiate blocks
240
      *  with same structure, but different data.
241
      */
242
    void update_hash(SipHash& hash) const;
243
244
    /** 
245
     *  Get block data in string. 
246
     *  If code is in default_implementation_for_nulls or something likely, type and column's nullity could
247
     *   temporarily be not same. set allow_null_mismatch to true to dump it correctly.
248
    */
249
    std::string dump_data(size_t begin = 0, size_t row_limit = 100,
250
                          bool allow_null_mismatch = false) const;
251
252
    std::string dump_data_json(size_t begin = 0, size_t row_limit = 100,
253
                               bool allow_null_mismatch = false) const;
254
255
    /** Get one line data from block, only use in load data */
256
    std::string dump_one_line(size_t row, int column_end) const;
257
258
    Status append_to_block_by_selector(MutableBlock* dst, const IColumn::Selector& selector) const;
259
260
    // need exception safety
261
    static void filter_block_internal(Block* block, const std::vector<uint32_t>& columns_to_filter,
262
                                      const IColumn::Filter& filter);
263
    // need exception safety
264
    static void filter_block_internal(Block* block, const IColumn::Filter& filter,
265
                                      uint32_t column_to_keep);
266
    // need exception safety
267
    static void filter_block_internal(Block* block, const IColumn::Filter& filter);
268
269
    static Status filter_block(Block* block, const std::vector<uint32_t>& columns_to_filter,
270
                               size_t filter_column_id, size_t column_to_keep);
271
272
    static Status filter_block(Block* block, size_t filter_column_id, size_t column_to_keep);
273
274
1.15k
    static void erase_useless_column(Block* block, size_t column_to_keep) {
275
1.15k
        block->erase_tail(column_to_keep);
276
1.15k
    }
277
278
    // serialize block to PBlock
279
    Status serialize(int be_exec_version, PBlock* pblock, size_t* uncompressed_bytes,
280
                     size_t* compressed_bytes, int64_t* compress_time,
281
                     segment_v2::CompressionTypePB compression_type,
282
                     bool allow_transfer_large_data = false) const;
283
284
    Status deserialize(const PBlock& pblock, size_t* uncompressed_bytes, int64_t* decompress_time);
285
286
    std::unique_ptr<Block> create_same_struct_block(size_t size, bool is_reserve = false) const;
287
288
    /** Compares (*this) n-th row and rhs m-th row.
289
      * Returns negative number, 0, or positive number  (*this) n-th row is less, equal, greater than rhs m-th row respectively.
290
      * Is used in sortings.
291
      *
292
      * If one of element's value is NaN or NULLs, then:
293
      * - if nan_direction_hint == -1, NaN and NULLs are considered as least than everything other;
294
      * - if nan_direction_hint ==  1, NaN and NULLs are considered as greatest than everything other.
295
      * For example, if nan_direction_hint == -1 is used by descending sorting, NaNs will be at the end.
296
      *
297
      * For non Nullable and non floating point types, nan_direction_hint is ignored.
298
      */
299
3
    int compare_at(size_t n, size_t m, const Block& rhs, int nan_direction_hint) const {
300
3
        DCHECK_EQ(columns(), rhs.columns());
301
3
        return compare_at(n, m, columns(), rhs, nan_direction_hint);
302
3
    }
303
304
    int compare_at(size_t n, size_t m, size_t num_columns, const Block& rhs,
305
7.15M
                   int nan_direction_hint) const {
306
7.15M
        DCHECK_GE(columns(), num_columns);
307
7.15M
        DCHECK_GE(rhs.columns(), num_columns);
308
309
7.15M
        DCHECK_LE(n, rows());
310
7.15M
        DCHECK_LE(m, rhs.rows());
311
9.94M
        for (size_t i = 0; i < num_columns; ++i) {
312
7.19M
            DCHECK(get_by_position(i).type->equals(*rhs.get_by_position(i).type));
313
7.19M
            auto res = get_by_position(i).column->compare_at(n, m, *(rhs.get_by_position(i).column),
314
7.19M
                                                             nan_direction_hint);
315
7.19M
            if (res) {
316
4.40M
                return res;
317
4.40M
            }
318
7.19M
        }
319
2.74M
        return 0;
320
7.15M
    }
321
322
    int compare_at(size_t n, size_t m, const std::vector<uint32_t>* compare_columns,
323
2
                   const Block& rhs, int nan_direction_hint) const {
324
2
        DCHECK_GE(columns(), compare_columns->size());
325
2
        DCHECK_GE(rhs.columns(), compare_columns->size());
326
327
2
        DCHECK_LE(n, rows());
328
2
        DCHECK_LE(m, rhs.rows());
329
3
        for (auto i : *compare_columns) {
330
3
            DCHECK(get_by_position(i).type->equals(*rhs.get_by_position(i).type));
331
3
            auto res = get_by_position(i).column->compare_at(n, m, *(rhs.get_by_position(i).column),
332
3
                                                             nan_direction_hint);
333
3
            if (res) {
334
2
                return res;
335
2
            }
336
3
        }
337
0
        return 0;
338
2
    }
339
340
    //note(wb) no DCHECK here, because this method is only used after compare_at now, so no need to repeat check here.
341
    // If this method is used in more places, you can add DCHECK case by case.
342
    int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& rhs,
343
26.9k
                          int nan_direction_hint) const {
344
26.9k
        auto res = get_by_position(col_idx).column->compare_at(
345
26.9k
                n, m, *(rhs.get_by_position(col_idx).column), nan_direction_hint);
346
26.9k
        return res;
347
26.9k
    }
348
349
    // for String type or Array<String> type
350
    void shrink_char_type_column_suffix_zero(const std::vector<size_t>& char_type_idx);
351
352
    void clear_column_mem_not_keep(const std::vector<bool>& column_keep_flags,
353
                                   bool need_keep_first);
354
355
private:
356
    void erase_impl(size_t position);
357
};
358
359
using Blocks = std::vector<Block>;
360
using BlocksList = std::list<Block>;
361
using BlocksPtr = std::shared_ptr<Blocks>;
362
using BlocksPtrs = std::shared_ptr<std::vector<BlocksPtr>>;
363
364
class MutableBlock {
365
    ENABLE_FACTORY_CREATOR(MutableBlock);
366
367
private:
368
    MutableColumns _columns;
369
    DataTypes _data_types;
370
    std::vector<std::string> _names;
371
372
public:
373
48.6k
    static MutableBlock build_mutable_block(Block* block) {
374
48.6k
        return block == nullptr ? MutableBlock() : MutableBlock(block);
375
48.6k
    }
376
72.1k
    MutableBlock() = default;
377
217k
    ~MutableBlock() = default;
378
379
    MutableBlock(Block* block)
380
48.8k
            : _columns(block->mutate_columns()),
381
48.8k
              _data_types(block->get_data_types()),
382
48.8k
              _names(block->get_names()) {}
383
    MutableBlock(Block&& block)
384
96.6k
            : _columns(block.mutate_columns()),
385
96.6k
              _data_types(block.get_data_types()),
386
96.6k
              _names(block.get_names()) {}
387
388
96.0k
    void operator=(MutableBlock&& m_block) {
389
96.0k
        _columns = std::move(m_block._columns);
390
96.0k
        _data_types = std::move(m_block._data_types);
391
96.0k
        _names = std::move(m_block._names);
392
96.0k
    }
393
394
    size_t rows() const;
395
344
    size_t columns() const { return _columns.size(); }
396
397
144k
    bool empty() const { return rows() == 0; }
398
399
48.5k
    MutableColumns& mutable_columns() { return _columns; }
400
401
1
    void set_mutable_columns(MutableColumns&& columns) { _columns = std::move(columns); }
402
403
0
    DataTypes& data_types() { return _data_types; }
404
405
126
    MutableColumnPtr& get_column_by_position(size_t position) { return _columns[position]; }
406
60
    const MutableColumnPtr& get_column_by_position(size_t position) const {
407
60
        return _columns[position];
408
60
    }
409
410
0
    DataTypePtr& get_datatype_by_position(size_t position) { return _data_types[position]; }
411
22
    const DataTypePtr& get_datatype_by_position(size_t position) const {
412
22
        return _data_types[position];
413
22
    }
414
415
38
    int compare_one_column(size_t n, size_t m, size_t column_id, int nan_direction_hint) const {
416
38
        DCHECK_LE(column_id, columns());
417
38
        DCHECK_LE(n, rows());
418
38
        DCHECK_LE(m, rows());
419
38
        auto& column = get_column_by_position(column_id);
420
38
        return column->compare_at(n, m, *column, nan_direction_hint);
421
38
    }
422
423
    int compare_at(size_t n, size_t m, size_t num_columns, const MutableBlock& rhs,
424
6
                   int nan_direction_hint) const {
425
6
        DCHECK_GE(columns(), num_columns);
426
6
        DCHECK_GE(rhs.columns(), num_columns);
427
428
6
        DCHECK_LE(n, rows());
429
6
        DCHECK_LE(m, rhs.rows());
430
14
        for (size_t i = 0; i < num_columns; ++i) {
431
11
            DCHECK(get_datatype_by_position(i)->equals(*rhs.get_datatype_by_position(i)));
432
11
            auto res = get_column_by_position(i)->compare_at(n, m, *(rhs.get_column_by_position(i)),
433
11
                                                             nan_direction_hint);
434
11
            if (res) {
435
3
                return res;
436
3
            }
437
11
        }
438
3
        return 0;
439
6
    }
440
441
    int compare_at(size_t n, size_t m, const std::vector<uint32_t>* compare_columns,
442
0
                   const MutableBlock& rhs, int nan_direction_hint) const {
443
0
        DCHECK_GE(columns(), compare_columns->size());
444
0
        DCHECK_GE(rhs.columns(), compare_columns->size());
445
446
0
        DCHECK_LE(n, rows());
447
0
        DCHECK_LE(m, rhs.rows());
448
0
        for (auto i : *compare_columns) {
449
0
            DCHECK(get_datatype_by_position(i)->equals(*rhs.get_datatype_by_position(i)));
450
0
            auto res = get_column_by_position(i)->compare_at(n, m, *(rhs.get_column_by_position(i)),
451
0
                                                             nan_direction_hint);
452
0
            if (res) {
453
0
                return res;
454
0
            }
455
0
        }
456
0
        return 0;
457
0
    }
458
459
3
    std::string dump_types() const {
460
3
        std::string res;
461
9
        for (auto type : _data_types) {
462
9
            if (!res.empty()) {
463
6
                res += ", ";
464
6
            }
465
9
            res += type->get_name();
466
9
        }
467
3
        return res;
468
3
    }
469
470
    template <typename T>
471
124
    [[nodiscard]] Status merge(T&& block) {
472
124
        RETURN_IF_CATCH_EXCEPTION(return merge_impl(block););
473
124
    }
_ZN5doris12MutableBlock5mergeIRNS_5BlockEEENS_6StatusEOT_
Line
Count
Source
471
59
    [[nodiscard]] Status merge(T&& block) {
472
59
        RETURN_IF_CATCH_EXCEPTION(return merge_impl(block););
473
59
    }
_ZN5doris12MutableBlock5mergeINS_5BlockEEENS_6StatusEOT_
Line
Count
Source
471
65
    [[nodiscard]] Status merge(T&& block) {
472
65
        RETURN_IF_CATCH_EXCEPTION(return merge_impl(block););
473
65
    }
474
475
    template <typename T>
476
48.0k
    [[nodiscard]] Status merge_ignore_overflow(T&& block) {
477
48.0k
        RETURN_IF_CATCH_EXCEPTION(return merge_impl_ignore_overflow(block););
478
48.0k
    }
479
480
    // only use for join. call ignore_overflow to prevent from throw exception in join
481
    template <typename T>
482
48.0k
    [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) {
483
48.0k
        if (_columns.size() != block.columns()) {
484
1
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
485
1
                    "Merge block not match, self column count: {}, [columns: {}, types: {}], "
486
1
                    "input column count: {}, [columns: {}, "
487
1
                    "types: {}], ",
488
1
                    _columns.size(), dump_names(), dump_types(), block.columns(),
489
1
                    block.dump_names(), block.dump_types());
490
1
        }
491
142k
        for (int i = 0; i < _columns.size(); ++i) {
492
94.2k
            if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
493
1
                throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
494
1
                                       "Merge block not match, self:[columns: {}, types: {}], "
495
1
                                       "input:[columns: {}, types: {}], ",
496
1
                                       dump_names(), dump_types(), block.dump_names(),
497
1
                                       block.dump_types());
498
1
            }
499
94.2k
            _columns[i]->insert_range_from_ignore_overflow(
500
94.2k
                    *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0,
501
94.2k
                    block.rows());
502
94.2k
        }
503
48.0k
        return Status::OK();
504
48.0k
    }
_ZN5doris12MutableBlock26merge_impl_ignore_overflowIRNS_5BlockEEENS_6StatusEOT_
Line
Count
Source
482
48.0k
    [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) {
483
48.0k
        if (_columns.size() != block.columns()) {
484
1
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
485
1
                    "Merge block not match, self column count: {}, [columns: {}, types: {}], "
486
1
                    "input column count: {}, [columns: {}, "
487
1
                    "types: {}], ",
488
1
                    _columns.size(), dump_names(), dump_types(), block.columns(),
489
1
                    block.dump_names(), block.dump_types());
490
1
        }
491
142k
        for (int i = 0; i < _columns.size(); ++i) {
492
94.2k
            if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
493
0
                throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
494
0
                                       "Merge block not match, self:[columns: {}, types: {}], "
495
0
                                       "input:[columns: {}, types: {}], ",
496
0
                                       dump_names(), dump_types(), block.dump_names(),
497
0
                                       block.dump_types());
498
0
            }
499
94.2k
            _columns[i]->insert_range_from_ignore_overflow(
500
94.2k
                    *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0,
501
94.2k
                    block.rows());
502
94.2k
        }
503
48.0k
        return Status::OK();
504
48.0k
    }
_ZN5doris12MutableBlock26merge_impl_ignore_overflowINS_5BlockEEENS_6StatusEOT_
Line
Count
Source
482
1
    [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) {
483
1
        if (_columns.size() != block.columns()) {
484
0
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
485
0
                    "Merge block not match, self column count: {}, [columns: {}, types: {}], "
486
0
                    "input column count: {}, [columns: {}, "
487
0
                    "types: {}], ",
488
0
                    _columns.size(), dump_names(), dump_types(), block.columns(),
489
0
                    block.dump_names(), block.dump_types());
490
0
        }
491
3
        for (int i = 0; i < _columns.size(); ++i) {
492
3
            if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
493
1
                throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
494
1
                                       "Merge block not match, self:[columns: {}, types: {}], "
495
1
                                       "input:[columns: {}, types: {}], ",
496
1
                                       dump_names(), dump_types(), block.dump_names(),
497
1
                                       block.dump_types());
498
1
            }
499
2
            _columns[i]->insert_range_from_ignore_overflow(
500
2
                    *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0,
501
2
                    block.rows());
502
2
        }
503
0
        return Status::OK();
504
1
    }
505
506
    template <typename T>
507
126
    [[nodiscard]] Status merge_impl(T&& block) {
508
        // merge is not supported in dynamic block
509
126
        if (_columns.empty() && _data_types.empty()) {
510
50
            _data_types = block.get_data_types();
511
50
            _names = block.get_names();
512
50
            _columns.resize(block.columns());
513
160
            for (size_t i = 0; i < block.columns(); ++i) {
514
110
                if (block.get_by_position(i).column) {
515
109
                    _columns[i] = (*std::move(block.get_by_position(i)
516
109
                                                      .column->convert_to_full_column_if_const()))
517
109
                                          .mutate();
518
109
                } else {
519
1
                    _columns[i] = _data_types[i]->create_column();
520
1
                }
521
110
            }
522
76
        } else {
523
76
            if (_columns.size() != block.columns()) {
524
1
                return Status::Error<ErrorCode::INTERNAL_ERROR>(
525
1
                        "Merge block not match, self column count: {}, [columns: {}, types: {}], "
526
1
                        "input column count: {}, [columns: {}, "
527
1
                        "types: {}], ",
528
1
                        _columns.size(), dump_names(), dump_types(), block.columns(),
529
1
                        block.dump_names(), block.dump_types());
530
1
            }
531
195
            for (int i = 0; i < _columns.size(); ++i) {
532
120
                if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
533
1
                    DCHECK(_data_types[i]->is_nullable())
534
0
                            << " target type: " << _data_types[i]->get_name()
535
0
                            << " src type: " << block.get_by_position(i).type->get_name();
536
1
                    DCHECK(((DataTypeNullable*)_data_types[i].get())
537
1
                                   ->get_nested_type()
538
1
                                   ->equals(*block.get_by_position(i).type));
539
1
                    DCHECK(!block.get_by_position(i).type->is_nullable());
540
1
                    _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column)
541
1
                                                            ->convert_to_full_column_if_const(),
542
1
                                                   0, block.rows());
543
119
                } else {
544
119
                    _columns[i]->insert_range_from(
545
119
                            *block.get_by_position(i)
546
119
                                     .column->convert_to_full_column_if_const()
547
119
                                     .get(),
548
119
                            0, block.rows());
549
119
                }
550
120
            }
551
75
        }
552
125
        return Status::OK();
553
126
    }
_ZN5doris12MutableBlock10merge_implIRNS_5BlockEEENS_6StatusEOT_
Line
Count
Source
507
124
    [[nodiscard]] Status merge_impl(T&& block) {
508
        // merge is not supported in dynamic block
509
124
        if (_columns.empty() && _data_types.empty()) {
510
50
            _data_types = block.get_data_types();
511
50
            _names = block.get_names();
512
50
            _columns.resize(block.columns());
513
160
            for (size_t i = 0; i < block.columns(); ++i) {
514
110
                if (block.get_by_position(i).column) {
515
109
                    _columns[i] = (*std::move(block.get_by_position(i)
516
109
                                                      .column->convert_to_full_column_if_const()))
517
109
                                          .mutate();
518
109
                } else {
519
1
                    _columns[i] = _data_types[i]->create_column();
520
1
                }
521
110
            }
522
74
        } else {
523
74
            if (_columns.size() != block.columns()) {
524
0
                return Status::Error<ErrorCode::INTERNAL_ERROR>(
525
0
                        "Merge block not match, self column count: {}, [columns: {}, types: {}], "
526
0
                        "input column count: {}, [columns: {}, "
527
0
                        "types: {}], ",
528
0
                        _columns.size(), dump_names(), dump_types(), block.columns(),
529
0
                        block.dump_names(), block.dump_types());
530
0
            }
531
191
            for (int i = 0; i < _columns.size(); ++i) {
532
117
                if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
533
0
                    DCHECK(_data_types[i]->is_nullable())
534
0
                            << " target type: " << _data_types[i]->get_name()
535
0
                            << " src type: " << block.get_by_position(i).type->get_name();
536
0
                    DCHECK(((DataTypeNullable*)_data_types[i].get())
537
0
                                   ->get_nested_type()
538
0
                                   ->equals(*block.get_by_position(i).type));
539
0
                    DCHECK(!block.get_by_position(i).type->is_nullable());
540
0
                    _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column)
541
0
                                                            ->convert_to_full_column_if_const(),
542
0
                                                   0, block.rows());
543
117
                } else {
544
117
                    _columns[i]->insert_range_from(
545
117
                            *block.get_by_position(i)
546
117
                                     .column->convert_to_full_column_if_const()
547
117
                                     .get(),
548
117
                            0, block.rows());
549
117
                }
550
117
            }
551
74
        }
552
124
        return Status::OK();
553
124
    }
_ZN5doris12MutableBlock10merge_implINS_5BlockEEENS_6StatusEOT_
Line
Count
Source
507
2
    [[nodiscard]] Status merge_impl(T&& block) {
508
        // merge is not supported in dynamic block
509
2
        if (_columns.empty() && _data_types.empty()) {
510
0
            _data_types = block.get_data_types();
511
0
            _names = block.get_names();
512
0
            _columns.resize(block.columns());
513
0
            for (size_t i = 0; i < block.columns(); ++i) {
514
0
                if (block.get_by_position(i).column) {
515
0
                    _columns[i] = (*std::move(block.get_by_position(i)
516
0
                                                      .column->convert_to_full_column_if_const()))
517
0
                                          .mutate();
518
0
                } else {
519
0
                    _columns[i] = _data_types[i]->create_column();
520
0
                }
521
0
            }
522
2
        } else {
523
2
            if (_columns.size() != block.columns()) {
524
1
                return Status::Error<ErrorCode::INTERNAL_ERROR>(
525
1
                        "Merge block not match, self column count: {}, [columns: {}, types: {}], "
526
1
                        "input column count: {}, [columns: {}, "
527
1
                        "types: {}], ",
528
1
                        _columns.size(), dump_names(), dump_types(), block.columns(),
529
1
                        block.dump_names(), block.dump_types());
530
1
            }
531
4
            for (int i = 0; i < _columns.size(); ++i) {
532
3
                if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
533
1
                    DCHECK(_data_types[i]->is_nullable())
534
0
                            << " target type: " << _data_types[i]->get_name()
535
0
                            << " src type: " << block.get_by_position(i).type->get_name();
536
1
                    DCHECK(((DataTypeNullable*)_data_types[i].get())
537
1
                                   ->get_nested_type()
538
1
                                   ->equals(*block.get_by_position(i).type));
539
1
                    DCHECK(!block.get_by_position(i).type->is_nullable());
540
1
                    _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column)
541
1
                                                            ->convert_to_full_column_if_const(),
542
1
                                                   0, block.rows());
543
2
                } else {
544
2
                    _columns[i]->insert_range_from(
545
2
                            *block.get_by_position(i)
546
2
                                     .column->convert_to_full_column_if_const()
547
2
                                     .get(),
548
2
                            0, block.rows());
549
2
                }
550
3
            }
551
1
        }
552
1
        return Status::OK();
553
2
    }
554
555
    // move to columns' data to a Block. this will invalidate
556
    Block to_block(int start_column = 0);
557
    Block to_block(int start_column, int end_column);
558
559
    void swap(MutableBlock& other) noexcept;
560
561
    void add_row(const Block* block, int row);
562
    // Batch add row should return error status if allocate memory failed.
563
    Status add_rows(const Block* block, const uint32_t* row_begin, const uint32_t* row_end,
564
                    const std::vector<int>* column_offset = nullptr);
565
    Status add_rows(const Block* block, size_t row_begin, size_t length);
566
567
    std::string dump_data(size_t row_limit = 100) const;
568
    std::string dump_data_json(size_t row_limit = 100) const;
569
570
83
    void clear() {
571
83
        _columns.clear();
572
83
        _data_types.clear();
573
83
        _names.clear();
574
83
    }
575
576
    // columns resist. columns' inner data removed.
577
    void clear_column_data() noexcept;
578
579
    size_t allocated_bytes() const;
580
581
48.0k
    size_t bytes() const {
582
48.0k
        size_t res = 0;
583
94.0k
        for (const auto& elem : _columns) {
584
94.0k
            res += elem->byte_size();
585
94.0k
        }
586
587
48.0k
        return res;
588
48.0k
    }
589
590
0
    std::vector<std::string>& get_names() { return _names; }
591
592
    /** Get a list of column names separated by commas. */
593
    std::string dump_names() const;
594
};
595
596
struct IteratorRowRef {
597
    std::shared_ptr<Block> block;
598
    int row_pos;
599
    bool is_same;
600
601
    template <typename T>
602
1.06M
    int compare(const IteratorRowRef& rhs, const T& compare_arguments) const {
603
1.06M
        return block->compare_at(row_pos, rhs.row_pos, compare_arguments, *rhs.block, -1);
604
1.06M
    }
Unexecuted instantiation: _ZNK5doris14IteratorRowRef7compareIPKSt6vectorIjSaIjEEEEiRKS0_RKT_
_ZNK5doris14IteratorRowRef7compareImEEiRKS0_RKT_
Line
Count
Source
602
1.06M
    int compare(const IteratorRowRef& rhs, const T& compare_arguments) const {
603
1.06M
        return block->compare_at(row_pos, rhs.row_pos, compare_arguments, *rhs.block, -1);
604
1.06M
    }
605
606
512
    void reset() {
607
512
        block = nullptr;
608
512
        row_pos = -1;
609
512
        is_same = false;
610
512
    }
611
};
612
613
using BlockView = std::vector<IteratorRowRef>;
614
using BlockUPtr = std::unique_ptr<Block>;
615
616
} // namespace doris