Coverage Report

Created: 2026-06-02 01:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/core/block/block.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Block.h
19
// and modified by Doris
20
21
#pragma once
22
23
#include <glog/logging.h>
24
#include <parallel_hashmap/phmap.h>
25
26
#include <cstddef>
27
#include <cstdint>
28
#include <initializer_list>
29
#include <list>
30
#include <memory>
31
#include <ostream>
32
#include <set>
33
#include <string>
34
#include <utility>
35
#include <vector>
36
37
#include "common/be_mock_util.h"
38
#include "common/exception.h"
39
#include "common/factory_creator.h"
40
#include "common/status.h"
41
#include "core/block/column_with_type_and_name.h"
42
#include "core/block/columns_with_type_and_name.h"
43
#include "core/column/column.h"
44
#include "core/column/column_nullable.h"
45
#include "core/data_type/data_type.h"
46
#include "core/data_type/data_type_nullable.h"
47
#include "core/types.h"
48
49
class SipHash;
50
51
namespace doris {
52
53
class TupleDescriptor;
54
class PBlock;
55
class SlotDescriptor;
56
57
namespace segment_v2 {
58
enum CompressionTypePB : int;
59
} // namespace segment_v2
60
61
/** Container for set of columns for bunch of rows in memory.
62
  * This is unit of data processing.
63
  * Also contains metadata - data types of columns and their names
64
  *  (either original names from a table, or generated names during temporary calculations).
65
  * Allows to insert, remove columns in arbitrary position, to change order of columns.
66
  */
67
class MutableBlock;
68
69
class Block {
70
    ENABLE_FACTORY_CREATOR(Block);
71
72
private:
73
    using Container = ColumnsWithTypeAndName;
74
    Container data;
75
76
public:
77
1.41M
    Block() = default;
78
    Block(std::initializer_list<ColumnWithTypeAndName> il);
79
    Block(ColumnsWithTypeAndName data_);
80
    Block(const std::vector<SlotDescriptor*>& slots, size_t block_size);
81
    Block(const std::vector<SlotDescriptor>& slots, size_t block_size);
82
83
1.80M
    MOCK_FUNCTION ~Block() = default;
84
850
    Block(const Block& block) = default;
85
48.2k
    Block& operator=(const Block& p) = default;
86
63.6k
    Block(Block&& block) = default;
87
871
    Block& operator=(Block&& other) = default;
88
89
    void reserve(size_t count);
90
    // Make sure the nammes is useless when use block
91
    void clear_names();
92
93
    /// insert the column at the specified position
94
    void insert(size_t position, const ColumnWithTypeAndName& elem);
95
    void insert(size_t position, ColumnWithTypeAndName&& elem);
96
    /// insert the column to the end
97
    void insert(const ColumnWithTypeAndName& elem);
98
    void insert(ColumnWithTypeAndName&& elem);
99
    /// remove the column at the specified position
100
    void erase(size_t position);
101
    /// remove the column at the [start, end)
102
    void erase_tail(size_t start);
103
    /// remove the columns at the specified positions
104
    void erase(const std::set<size_t>& positions);
105
    // T was std::set<int>, std::vector<int>, std::list<int>
106
    template <class T>
107
0
    void erase_not_in(const T& container) {
108
0
        Container new_data;
109
0
        for (auto pos : container) {
110
0
            new_data.emplace_back(std::move(data[pos]));
111
0
        }
112
0
        std::swap(data, new_data);
113
0
    }
114
115
17
    std::unordered_map<std::string, uint32_t> get_name_to_pos_map() const {
116
17
        std::unordered_map<std::string, uint32_t> name_to_index_map;
117
136
        for (uint32_t i = 0; i < data.size(); ++i) {
118
119
            name_to_index_map[data[i].name] = i;
119
119
        }
120
17
        return name_to_index_map;
121
17
    }
122
123
    /// References are invalidated after calling functions above.
124
17.6M
    ColumnWithTypeAndName& get_by_position(size_t position) {
125
17.6M
        DCHECK(data.size() > position)
126
0
                << ", data.size()=" << data.size() << ", position=" << position;
127
17.6M
        return data[position];
128
17.6M
    }
129
29.1M
    const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; }
130
131
31.6k
    void replace_by_position(size_t position, ColumnPtr&& res) {
132
31.6k
        this->get_by_position(position).column = std::move(res);
133
31.6k
    }
134
135
4
    void replace_by_position(size_t position, const ColumnPtr& res) {
136
4
        this->get_by_position(position).column = res;
137
4
    }
138
139
705
    void replace_by_position_if_const(size_t position) {
140
705
        auto& element = this->get_by_position(position);
141
705
        element.column = element.column->convert_to_full_column_if_const();
142
705
    }
143
144
    ColumnWithTypeAndName& safe_get_by_position(size_t position);
145
    const ColumnWithTypeAndName& safe_get_by_position(size_t position) const;
146
147
71.7k
    Container::iterator begin() { return data.begin(); }
148
71.7k
    Container::iterator end() { return data.end(); }
149
9.48k
    Container::const_iterator begin() const { return data.begin(); }
150
9.47k
    Container::const_iterator end() const { return data.end(); }
151
0
    Container::const_iterator cbegin() const { return data.cbegin(); }
152
0
    Container::const_iterator cend() const { return data.cend(); }
153
154
    // Get position of column by name. Returns -1 if there is no column with that name.
155
    // ATTN: this method is O(N). better maintain name -> position map in caller if you need to call it frequently.
156
    int get_position_by_name(const std::string& name) const;
157
158
    const ColumnsWithTypeAndName& get_columns_with_type_and_name() const;
159
160
    std::vector<std::string> get_names() const;
161
    DataTypes get_data_types() const;
162
163
59
    DataTypePtr get_data_type(size_t index) const {
164
59
        CHECK(index < data.size());
165
59
        return data[index].type;
166
59
    }
167
168
    /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
169
    size_t rows() const;
170
171
    // Cut the rows in block, use in LIMIT operation
172
    void set_num_rows(size_t length);
173
174
    // Skip the rows in block, use in OFFSET, LIMIT operation
175
    void skip_num_rows(int64_t& offset);
176
177
    /// As the assumption we used around, the number of columns won't exceed int16 range. so no need to worry when we
178
    ///  assign it to int32.
179
18.1M
    uint32_t columns() const { return static_cast<uint32_t>(data.size()); }
180
181
    /// Checks that every column in block is not nullptr and has same number of elements.
182
    void check_number_of_rows(bool allow_null_columns = false) const;
183
184
    Status check_type_and_column() const;
185
186
    /// Approximate number of bytes used by column data in memory.
187
    /// This reflects the actual data footprint (e.g. string contents, numeric arrays)
188
    /// and is the metric used by adaptive batch size byte budgets.
189
    size_t bytes() const;
190
191
    /// Approximate number of allocated (reserved) bytes in memory.
192
    /// This may be larger than bytes() due to pre-allocated capacity in vectors/arenas.
193
    /// Used for memory tracking and profiling.
194
    MOCK_FUNCTION size_t allocated_bytes() const;
195
196
    /** Get a list of column names separated by commas. */
197
    std::string dump_names() const;
198
199
    std::string dump_types() const;
200
201
    /** List of names, types and lengths of columns. Designed for debugging. */
202
    std::string dump_structure() const;
203
204
    /** Get the same block, but empty. */
205
    Block clone_empty() const;
206
207
    Columns get_columns() const;
208
    Columns get_columns_and_convert();
209
210
    Block clone_without_columns(const std::vector<int>* column_offset = nullptr) const;
211
212
    /** Get empty columns with the same types as in block. */
213
    MutableColumns clone_empty_columns() const;
214
215
    // RAII owner for mutating columns borrowed from a live Block. While the
216
    // guard is alive, the Block's column slots are moved out and column data
217
    // must be accessed through mutable_columns(). The guard restores columns on
218
    // destruction, so use it when the caller may exit early after detaching.
219
    class ScopedMutableColumns {
220
    public:
221
        explicit ScopedMutableColumns(Block& block);
222
        ~ScopedMutableColumns();
223
224
        ScopedMutableColumns(const ScopedMutableColumns&) = delete;
225
        ScopedMutableColumns& operator=(const ScopedMutableColumns&) = delete;
226
        ScopedMutableColumns(ScopedMutableColumns&& other) noexcept;
227
        ScopedMutableColumns& operator=(ScopedMutableColumns&& other) noexcept;
228
229
1.66k
        MutableColumns& mutable_columns() { return _columns; }
230
0
        const MutableColumns& mutable_columns() const { return _columns; }
231
        const DataTypePtr& get_datatype_by_position(size_t position) const;
232
        const std::string& get_name_by_position(size_t position) const;
233
234
        // Transfer the borrowed owners to another RAII object that will restore
235
        // them. After release(), the original Block remains without columns
236
        // until that owner restores them. Normal callers should let this guard
237
        // restore on destruction.
238
        MutableColumns release();
239
        void restore();
240
241
    private:
242
        Block* _block = nullptr;
243
        MutableColumns _columns;
244
    };
245
246
    // Single-column variant for localized mutation of a live Block slot. The
247
    // selected slot is unavailable from the Block until this guard restores it.
248
    class ScopedMutableColumn {
249
    public:
250
        ScopedMutableColumn(Block& block, size_t position);
251
        ~ScopedMutableColumn();
252
253
        ScopedMutableColumn(const ScopedMutableColumn&) = delete;
254
        ScopedMutableColumn& operator=(const ScopedMutableColumn&) = delete;
255
        ScopedMutableColumn(ScopedMutableColumn&& other) noexcept;
256
        ScopedMutableColumn& operator=(ScopedMutableColumn&& other) noexcept;
257
258
97
        MutableColumnPtr& mutable_column() { return _column; }
259
0
        const MutableColumnPtr& mutable_column() const { return _column; }
260
261
        void restore();
262
263
    private:
264
        Block* _block = nullptr;
265
        size_t _position = 0;
266
        MutableColumnPtr _column;
267
    };
268
269
    /** Get columns from a consumed block for mutation. Columns in block will be nullptr. */
270
    MutableColumns mutate_columns() &&;
271
    MutableColumns mutate_columns() & = delete;
272
273
    /** Temporarily mutate a live Block's columns. The returned guard owns the columns and
274
      * restores them on destruction; prefer this over manual move/writeback.
275
      */
276
    ScopedMutableColumns mutate_columns_scoped() &;
277
    ScopedMutableColumns mutate_columns_scoped() && = delete;
278
279
    /** Temporarily mutate one live Block column; use when only one slot needs ownership. */
280
    ScopedMutableColumn mutate_column_scoped(size_t position) &;
281
    ScopedMutableColumn mutate_column_scoped(size_t position) && = delete;
282
283
    /** Replace columns in a block */
284
    void set_columns(MutableColumns&& columns);
285
    void clear();
286
    void swap(Block& other) noexcept;
287
    void swap(Block&& other) noexcept;
288
289
    // Shuffle columns in place based on the result_column_ids
290
    void shuffle_columns(const std::vector<int>& result_column_ids);
291
292
    // column_size == -1 clears all columns; otherwise clear [0, column_size)
293
    // and drop the rest. Shared columns are detached through clone_empty(), so
294
    // allocation or clone failures propagate.
295
    void clear_column_data(int64_t column_size = -1);
296
    void clear_column_data(const std::vector<uint32_t>& columns_to_clear);
297
298
15.4k
    MOCK_FUNCTION bool mem_reuse() { return !data.empty(); }
299
300
12
    bool is_empty_column() { return data.empty(); }
301
302
3.33M
    bool empty() const { return rows() == 0; }
303
304
    /** 
305
      * Updates SipHash of the Block, using update method of columns.
306
      * Returns hash for block, that could be used to differentiate blocks
307
      *  with same structure, but different data.
308
      */
309
    void update_hash(SipHash& hash) const;
310
311
    /** 
312
     *  Get block data in string. 
313
     *  If code is in default_implementation_for_nulls or something likely, type and column's nullity could
314
     *   temporarily be not same. set allow_null_mismatch to true to dump it correctly.
315
    */
316
    std::string dump_data(size_t begin = 0, size_t row_limit = 100,
317
                          bool allow_null_mismatch = false) const;
318
319
    std::string dump_data_json(size_t begin = 0, size_t row_limit = 100,
320
                               bool allow_null_mismatch = false) const;
321
322
    /** Get one line data from block, only use in load data */
323
    std::string dump_one_line(size_t row, int column_end) const;
324
325
    Status append_to_block_by_selector(MutableBlock* dst, const IColumn::Selector& selector) const;
326
327
    // need exception safety
328
    static void filter_block_internal(Block* block, const std::vector<uint32_t>& columns_to_filter,
329
                                      const IColumn::Filter& filter);
330
    // need exception safety
331
    static void filter_block_internal(Block* block, const IColumn::Filter& filter,
332
                                      uint32_t column_to_keep);
333
    // need exception safety
334
    static void filter_block_internal(Block* block, const IColumn::Filter& filter);
335
336
    static Status filter_block(Block* block, const std::vector<uint32_t>& columns_to_filter,
337
                               size_t filter_column_id, size_t column_to_keep);
338
339
    static Status filter_block(Block* block, size_t filter_column_id, size_t column_to_keep);
340
341
708
    static void erase_useless_column(Block* block, size_t column_to_keep) {
342
708
        block->erase_tail(column_to_keep);
343
708
    }
344
345
    // serialize block to PBlock
346
    Status serialize(int be_exec_version, PBlock* pblock, size_t* uncompressed_bytes,
347
                     size_t* compressed_bytes, int64_t* compress_time,
348
                     segment_v2::CompressionTypePB compression_type,
349
                     bool allow_transfer_large_data = false) const;
350
351
    Status deserialize(const PBlock& pblock, size_t* uncompressed_bytes, int64_t* decompress_time);
352
353
    std::unique_ptr<Block> create_same_struct_block(size_t size, bool is_reserve = false) const;
354
355
    /** Compares (*this) n-th row and rhs m-th row.
356
      * Returns negative number, 0, or positive number  (*this) n-th row is less, equal, greater than rhs m-th row respectively.
357
      * Is used in sortings.
358
      *
359
      * If one of element's value is NaN or NULLs, then:
360
      * - if nan_direction_hint == -1, NaN and NULLs are considered as least than everything other;
361
      * - if nan_direction_hint ==  1, NaN and NULLs are considered as greatest than everything other.
362
      * For example, if nan_direction_hint == -1 is used by descending sorting, NaNs will be at the end.
363
      *
364
      * For non Nullable and non floating point types, nan_direction_hint is ignored.
365
      */
366
3
    int compare_at(size_t n, size_t m, const Block& rhs, int nan_direction_hint) const {
367
3
        DCHECK_EQ(columns(), rhs.columns());
368
3
        return compare_at(n, m, columns(), rhs, nan_direction_hint);
369
3
    }
370
371
    int compare_at(size_t n, size_t m, size_t num_columns, const Block& rhs,
372
7.15M
                   int nan_direction_hint) const {
373
7.15M
        DCHECK_GE(columns(), num_columns);
374
7.15M
        DCHECK_GE(rhs.columns(), num_columns);
375
376
7.15M
        DCHECK_LE(n, rows());
377
7.15M
        DCHECK_LE(m, rhs.rows());
378
9.94M
        for (size_t i = 0; i < num_columns; ++i) {
379
7.19M
            DCHECK(get_by_position(i).type->equals(*rhs.get_by_position(i).type));
380
7.19M
            auto res = get_by_position(i).column->compare_at(n, m, *(rhs.get_by_position(i).column),
381
7.19M
                                                             nan_direction_hint);
382
7.19M
            if (res) {
383
4.40M
                return res;
384
4.40M
            }
385
7.19M
        }
386
2.74M
        return 0;
387
7.15M
    }
388
389
    int compare_at(size_t n, size_t m, const std::vector<uint32_t>* compare_columns,
390
2
                   const Block& rhs, int nan_direction_hint) const {
391
2
        DCHECK_GE(columns(), compare_columns->size());
392
2
        DCHECK_GE(rhs.columns(), compare_columns->size());
393
394
2
        DCHECK_LE(n, rows());
395
2
        DCHECK_LE(m, rhs.rows());
396
3
        for (auto i : *compare_columns) {
397
3
            DCHECK(get_by_position(i).type->equals(*rhs.get_by_position(i).type));
398
3
            auto res = get_by_position(i).column->compare_at(n, m, *(rhs.get_by_position(i).column),
399
3
                                                             nan_direction_hint);
400
3
            if (res) {
401
2
                return res;
402
2
            }
403
3
        }
404
0
        return 0;
405
2
    }
406
407
    //note(wb) no DCHECK here, because this method is only used after compare_at now, so no need to repeat check here.
408
    // If this method is used in more places, you can add DCHECK case by case.
409
    int compare_column_at(size_t n, size_t m, size_t col_idx, const Block& rhs,
410
26.7k
                          int nan_direction_hint) const {
411
26.7k
        auto res = get_by_position(col_idx).column->compare_at(
412
26.7k
                n, m, *(rhs.get_by_position(col_idx).column), nan_direction_hint);
413
26.7k
        return res;
414
26.7k
    }
415
416
    void clear_column_mem_not_keep(const std::vector<bool>& column_keep_flags,
417
                                   bool need_keep_first);
418
419
    // Helper: sum byte_size() of all mutable columns.
420
    // Unlike Block::bytes() which operates on immutable ColumnPtr,
421
    // this works on MutableColumns during block construction (e.g. in BlockReader).
422
300k
    static inline size_t columns_byte_size(const MutableColumns& cols) {
423
300k
        size_t total = 0;
424
605k
        for (const auto& col : cols) {
425
605k
            total += col->byte_size();
426
605k
        }
427
300k
        return total;
428
300k
    }
429
430
private:
431
    void erase_impl(size_t position);
432
};
433
434
using Blocks = std::vector<Block>;
435
using BlocksList = std::list<Block>;
436
using BlocksPtr = std::shared_ptr<Blocks>;
437
using BlocksPtrs = std::shared_ptr<std::vector<BlocksPtr>>;
438
439
class MutableBlock {
440
    ENABLE_FACTORY_CREATOR(MutableBlock);
441
442
private:
443
    MutableColumns _columns;
444
    DataTypes _data_types;
445
    std::vector<std::string> _names;
446
447
public:
448
    // Build from a consumed Block. This has no restore contract: the source
449
    // Block is left without columns and must not be used as a live output block.
450
    // For caller-owned live Blocks, use ScopedMutableBlock or
451
    // mutate_columns_scoped() instead.
452
48.0k
    static MutableBlock build_mutable_block(Block&& block) {
453
48.0k
        return MutableBlock(std::move(block));
454
48.0k
    }
455
1
    static MutableBlock build_mutable_block(std::nullptr_t) { return MutableBlock(); }
456
    static MutableBlock build_mutable_block(Block* block) = delete;
457
73.0k
    MutableBlock() = default;
458
217k
    ~MutableBlock() = default;
459
    MutableBlock(const MutableBlock&) = delete;
460
    MutableBlock& operator=(const MutableBlock&) = delete;
461
    MutableBlock(MutableBlock&& m_block) noexcept
462
            : _columns(std::move(m_block._columns)),
463
              _data_types(std::move(m_block._data_types)),
464
0
              _names(std::move(m_block._names)) {}
465
466
    // Consumes block columns and converts them to mutable columns recursively.
467
    // This constructor is for temporary/owned Blocks only.
468
    MutableBlock(Block&& block)
469
144k
            : _columns(std::move(block).mutate_columns()),
470
144k
              _data_types(block.get_data_types()),
471
144k
              _names(block.get_names()) {}
472
473
96.0k
    MutableBlock& operator=(MutableBlock&& m_block) noexcept {
474
96.0k
        _columns = std::move(m_block._columns);
475
96.0k
        _data_types = std::move(m_block._data_types);
476
96.0k
        _names = std::move(m_block._names);
477
96.0k
        return *this;
478
96.0k
    }
479
480
    size_t rows() const;
481
444
    size_t columns() const { return _columns.size(); }
482
483
144k
    bool empty() const { return rows() == 0; }
484
485
49.4k
    MutableColumns& mutable_columns() { return _columns; }
486
0
    const MutableColumns& mutable_columns() const { return _columns; }
487
488
814
    void set_mutable_columns(MutableColumns&& columns) { _columns = std::move(columns); }
489
490
813
    DataTypes& data_types() { return _data_types; }
491
492
158
    MutableColumnPtr& get_column_by_position(size_t position) { return _columns[position]; }
493
64
    const MutableColumnPtr& get_column_by_position(size_t position) const {
494
64
        return _columns[position];
495
64
    }
496
497
9
    DataTypePtr& get_datatype_by_position(size_t position) { return _data_types[position]; }
498
26
    const DataTypePtr& get_datatype_by_position(size_t position) const {
499
26
        return _data_types[position];
500
26
    }
501
502
38
    int compare_one_column(size_t n, size_t m, size_t column_id, int nan_direction_hint) const {
503
38
        DCHECK_LE(column_id, columns());
504
38
        DCHECK_LE(n, rows());
505
38
        DCHECK_LE(m, rows());
506
38
        auto& column = get_column_by_position(column_id);
507
38
        return column->compare_at(n, m, *column, nan_direction_hint);
508
38
    }
509
510
    int compare_at(size_t n, size_t m, size_t num_columns, const MutableBlock& rhs,
511
6
                   int nan_direction_hint) const {
512
6
        DCHECK_GE(columns(), num_columns);
513
6
        DCHECK_GE(rhs.columns(), num_columns);
514
515
6
        DCHECK_LE(n, rows());
516
6
        DCHECK_LE(m, rhs.rows());
517
14
        for (size_t i = 0; i < num_columns; ++i) {
518
11
            DCHECK(get_datatype_by_position(i)->equals(*rhs.get_datatype_by_position(i)));
519
11
            auto res = get_column_by_position(i)->compare_at(n, m, *(rhs.get_column_by_position(i)),
520
11
                                                             nan_direction_hint);
521
11
            if (res) {
522
3
                return res;
523
3
            }
524
11
        }
525
3
        return 0;
526
6
    }
527
528
    int compare_at(size_t n, size_t m, const std::vector<uint32_t>* compare_columns,
529
0
                   const MutableBlock& rhs, int nan_direction_hint) const {
530
0
        DCHECK_GE(columns(), compare_columns->size());
531
0
        DCHECK_GE(rhs.columns(), compare_columns->size());
532
533
0
        DCHECK_LE(n, rows());
534
0
        DCHECK_LE(m, rhs.rows());
535
0
        for (auto i : *compare_columns) {
536
0
            DCHECK(get_datatype_by_position(i)->equals(*rhs.get_datatype_by_position(i)));
537
0
            auto res = get_column_by_position(i)->compare_at(n, m, *(rhs.get_column_by_position(i)),
538
0
                                                             nan_direction_hint);
539
0
            if (res) {
540
0
                return res;
541
0
            }
542
0
        }
543
0
        return 0;
544
0
    }
545
546
4
    std::string dump_types() const {
547
4
        std::string res;
548
10
        for (auto type : _data_types) {
549
10
            if (!res.empty()) {
550
6
                res += ", ";
551
6
            }
552
10
            res += type->get_name();
553
10
        }
554
4
        return res;
555
4
    }
556
557
    template <typename T>
558
135
    [[nodiscard]] Status merge(T&& block) {
559
135
        RETURN_IF_CATCH_EXCEPTION(return merge_impl(block););
560
135
    }
_ZN5doris12MutableBlock5mergeIRNS_5BlockEEENS_6StatusEOT_
Line
Count
Source
558
57
    [[nodiscard]] Status merge(T&& block) {
559
57
        RETURN_IF_CATCH_EXCEPTION(return merge_impl(block););
560
57
    }
_ZN5doris12MutableBlock5mergeINS_5BlockEEENS_6StatusEOT_
Line
Count
Source
558
78
    [[nodiscard]] Status merge(T&& block) {
559
78
        RETURN_IF_CATCH_EXCEPTION(return merge_impl(block););
560
78
    }
561
562
    template <typename T>
563
48.0k
    [[nodiscard]] Status merge_ignore_overflow(T&& block) {
564
48.0k
        RETURN_IF_CATCH_EXCEPTION(return merge_impl_ignore_overflow(block););
565
48.0k
    }
_ZN5doris12MutableBlock21merge_ignore_overflowIRNS_5BlockEEENS_6StatusEOT_
Line
Count
Source
563
1
    [[nodiscard]] Status merge_ignore_overflow(T&& block) {
564
1
        RETURN_IF_CATCH_EXCEPTION(return merge_impl_ignore_overflow(block););
565
1
    }
_ZN5doris12MutableBlock21merge_ignore_overflowINS_5BlockEEENS_6StatusEOT_
Line
Count
Source
563
48.0k
    [[nodiscard]] Status merge_ignore_overflow(T&& block) {
564
48.0k
        RETURN_IF_CATCH_EXCEPTION(return merge_impl_ignore_overflow(block););
565
48.0k
    }
566
567
    // only use for join. call ignore_overflow to prevent from throw exception in join
568
    template <typename T>
569
48.0k
    [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) {
570
48.0k
        if (_columns.size() != block.columns()) {
571
1
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
572
1
                    "Merge block not match, self column count: {}, [columns: {}, types: {}], "
573
1
                    "input column count: {}, [columns: {}, "
574
1
                    "types: {}], ",
575
1
                    _columns.size(), dump_names(), dump_types(), block.columns(),
576
1
                    block.dump_names(), block.dump_types());
577
1
        }
578
142k
        for (int i = 0; i < _columns.size(); ++i) {
579
94.2k
            if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
580
1
                throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
581
1
                                       "Merge block not match, self:[columns: {}, types: {}], "
582
1
                                       "input:[columns: {}, types: {}], ",
583
1
                                       dump_names(), dump_types(), block.dump_names(),
584
1
                                       block.dump_types());
585
1
            }
586
94.2k
            _columns[i]->insert_range_from_ignore_overflow(
587
94.2k
                    *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0,
588
94.2k
                    block.rows());
589
94.2k
        }
590
48.0k
        return Status::OK();
591
48.0k
    }
_ZN5doris12MutableBlock26merge_impl_ignore_overflowIRNS_5BlockEEENS_6StatusEOT_
Line
Count
Source
569
48.0k
    [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) {
570
48.0k
        if (_columns.size() != block.columns()) {
571
1
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
572
1
                    "Merge block not match, self column count: {}, [columns: {}, types: {}], "
573
1
                    "input column count: {}, [columns: {}, "
574
1
                    "types: {}], ",
575
1
                    _columns.size(), dump_names(), dump_types(), block.columns(),
576
1
                    block.dump_names(), block.dump_types());
577
1
        }
578
142k
        for (int i = 0; i < _columns.size(); ++i) {
579
94.2k
            if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
580
0
                throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
581
0
                                       "Merge block not match, self:[columns: {}, types: {}], "
582
0
                                       "input:[columns: {}, types: {}], ",
583
0
                                       dump_names(), dump_types(), block.dump_names(),
584
0
                                       block.dump_types());
585
0
            }
586
94.2k
            _columns[i]->insert_range_from_ignore_overflow(
587
94.2k
                    *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0,
588
94.2k
                    block.rows());
589
94.2k
        }
590
48.0k
        return Status::OK();
591
48.0k
    }
_ZN5doris12MutableBlock26merge_impl_ignore_overflowINS_5BlockEEENS_6StatusEOT_
Line
Count
Source
569
1
    [[nodiscard]] Status merge_impl_ignore_overflow(T&& block) {
570
1
        if (_columns.size() != block.columns()) {
571
0
            return Status::Error<ErrorCode::INTERNAL_ERROR>(
572
0
                    "Merge block not match, self column count: {}, [columns: {}, types: {}], "
573
0
                    "input column count: {}, [columns: {}, "
574
0
                    "types: {}], ",
575
0
                    _columns.size(), dump_names(), dump_types(), block.columns(),
576
0
                    block.dump_names(), block.dump_types());
577
0
        }
578
3
        for (int i = 0; i < _columns.size(); ++i) {
579
3
            if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
580
1
                throw doris::Exception(doris::ErrorCode::FATAL_ERROR,
581
1
                                       "Merge block not match, self:[columns: {}, types: {}], "
582
1
                                       "input:[columns: {}, types: {}], ",
583
1
                                       dump_names(), dump_types(), block.dump_names(),
584
1
                                       block.dump_types());
585
1
            }
586
2
            _columns[i]->insert_range_from_ignore_overflow(
587
2
                    *block.get_by_position(i).column->convert_to_full_column_if_const().get(), 0,
588
2
                    block.rows());
589
2
        }
590
0
        return Status::OK();
591
1
    }
592
593
    template <typename T>
594
137
    [[nodiscard]] Status merge_impl(T&& block) {
595
        // merge is not supported in dynamic block
596
137
        if (_columns.empty() && _data_types.empty()) {
597
40
            _data_types = block.get_data_types();
598
40
            _names = block.get_names();
599
40
            _columns.resize(block.columns());
600
139
            for (size_t i = 0; i < block.columns(); ++i) {
601
99
                if (block.get_by_position(i).column) {
602
98
                    _columns[i] = (*std::move(block.get_by_position(i)
603
98
                                                      .column->convert_to_full_column_if_const()))
604
98
                                          .mutate();
605
98
                } else {
606
1
                    _columns[i] = _data_types[i]->create_column();
607
1
                }
608
99
            }
609
97
        } else {
610
97
            if (_columns.size() != block.columns()) {
611
2
                return Status::Error<ErrorCode::INTERNAL_ERROR>(
612
2
                        "Merge block not match, self column count: {}, [columns: {}, types: {}], "
613
2
                        "input column count: {}, [columns: {}, "
614
2
                        "types: {}], ",
615
2
                        _columns.size(), dump_names(), dump_types(), block.columns(),
616
2
                        block.dump_names(), block.dump_types());
617
2
            }
618
250
            for (int i = 0; i < _columns.size(); ++i) {
619
155
                if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
620
1
                    DCHECK(_data_types[i]->is_nullable())
621
0
                            << " target type: " << _data_types[i]->get_name()
622
0
                            << " src type: " << block.get_by_position(i).type->get_name();
623
1
                    DCHECK(((DataTypeNullable*)_data_types[i].get())
624
1
                                   ->get_nested_type()
625
1
                                   ->equals(*block.get_by_position(i).type));
626
1
                    DCHECK(!block.get_by_position(i).type->is_nullable());
627
1
                    _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column)
628
1
                                                            ->convert_to_full_column_if_const(),
629
1
                                                   0, block.rows());
630
154
                } else {
631
154
                    _columns[i]->insert_range_from(
632
154
                            *block.get_by_position(i)
633
154
                                     .column->convert_to_full_column_if_const()
634
154
                                     .get(),
635
154
                            0, block.rows());
636
154
                }
637
155
            }
638
95
        }
639
135
        return Status::OK();
640
137
    }
_ZN5doris12MutableBlock10merge_implIRNS_5BlockEEENS_6StatusEOT_
Line
Count
Source
594
135
    [[nodiscard]] Status merge_impl(T&& block) {
595
        // merge is not supported in dynamic block
596
135
        if (_columns.empty() && _data_types.empty()) {
597
40
            _data_types = block.get_data_types();
598
40
            _names = block.get_names();
599
40
            _columns.resize(block.columns());
600
139
            for (size_t i = 0; i < block.columns(); ++i) {
601
99
                if (block.get_by_position(i).column) {
602
98
                    _columns[i] = (*std::move(block.get_by_position(i)
603
98
                                                      .column->convert_to_full_column_if_const()))
604
98
                                          .mutate();
605
98
                } else {
606
1
                    _columns[i] = _data_types[i]->create_column();
607
1
                }
608
99
            }
609
95
        } else {
610
95
            if (_columns.size() != block.columns()) {
611
1
                return Status::Error<ErrorCode::INTERNAL_ERROR>(
612
1
                        "Merge block not match, self column count: {}, [columns: {}, types: {}], "
613
1
                        "input column count: {}, [columns: {}, "
614
1
                        "types: {}], ",
615
1
                        _columns.size(), dump_names(), dump_types(), block.columns(),
616
1
                        block.dump_names(), block.dump_types());
617
1
            }
618
246
            for (int i = 0; i < _columns.size(); ++i) {
619
152
                if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
620
0
                    DCHECK(_data_types[i]->is_nullable())
621
0
                            << " target type: " << _data_types[i]->get_name()
622
0
                            << " src type: " << block.get_by_position(i).type->get_name();
623
0
                    DCHECK(((DataTypeNullable*)_data_types[i].get())
624
0
                                   ->get_nested_type()
625
0
                                   ->equals(*block.get_by_position(i).type));
626
0
                    DCHECK(!block.get_by_position(i).type->is_nullable());
627
0
                    _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column)
628
0
                                                            ->convert_to_full_column_if_const(),
629
0
                                                   0, block.rows());
630
152
                } else {
631
152
                    _columns[i]->insert_range_from(
632
152
                            *block.get_by_position(i)
633
152
                                     .column->convert_to_full_column_if_const()
634
152
                                     .get(),
635
152
                            0, block.rows());
636
152
                }
637
152
            }
638
94
        }
639
134
        return Status::OK();
640
135
    }
_ZN5doris12MutableBlock10merge_implINS_5BlockEEENS_6StatusEOT_
Line
Count
Source
594
2
    [[nodiscard]] Status merge_impl(T&& block) {
595
        // merge is not supported in dynamic block
596
2
        if (_columns.empty() && _data_types.empty()) {
597
0
            _data_types = block.get_data_types();
598
0
            _names = block.get_names();
599
0
            _columns.resize(block.columns());
600
0
            for (size_t i = 0; i < block.columns(); ++i) {
601
0
                if (block.get_by_position(i).column) {
602
0
                    _columns[i] = (*std::move(block.get_by_position(i)
603
0
                                                      .column->convert_to_full_column_if_const()))
604
0
                                          .mutate();
605
0
                } else {
606
0
                    _columns[i] = _data_types[i]->create_column();
607
0
                }
608
0
            }
609
2
        } else {
610
2
            if (_columns.size() != block.columns()) {
611
1
                return Status::Error<ErrorCode::INTERNAL_ERROR>(
612
1
                        "Merge block not match, self column count: {}, [columns: {}, types: {}], "
613
1
                        "input column count: {}, [columns: {}, "
614
1
                        "types: {}], ",
615
1
                        _columns.size(), dump_names(), dump_types(), block.columns(),
616
1
                        block.dump_names(), block.dump_types());
617
1
            }
618
4
            for (int i = 0; i < _columns.size(); ++i) {
619
3
                if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
620
1
                    DCHECK(_data_types[i]->is_nullable())
621
0
                            << " target type: " << _data_types[i]->get_name()
622
0
                            << " src type: " << block.get_by_position(i).type->get_name();
623
1
                    DCHECK(((DataTypeNullable*)_data_types[i].get())
624
1
                                   ->get_nested_type()
625
1
                                   ->equals(*block.get_by_position(i).type));
626
1
                    DCHECK(!block.get_by_position(i).type->is_nullable());
627
1
                    _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column)
628
1
                                                            ->convert_to_full_column_if_const(),
629
1
                                                   0, block.rows());
630
2
                } else {
631
2
                    _columns[i]->insert_range_from(
632
2
                            *block.get_by_position(i)
633
2
                                     .column->convert_to_full_column_if_const()
634
2
                                     .get(),
635
2
                            0, block.rows());
636
2
                }
637
3
            }
638
1
        }
639
1
        return Status::OK();
640
2
    }
641
642
    // move to columns' data to a Block. this will invalidate
643
    Block to_block(int start_column = 0);
644
    Block to_block(int start_column, int end_column);
645
646
    void swap(MutableBlock& other) noexcept;
647
648
    void add_row(const Block* block, int row);
649
    // Batch add row should return error status if allocate memory failed.
650
    Status add_rows(const Block* block, const uint32_t* row_begin, const uint32_t* row_end,
651
                    const std::vector<int>* column_offset = nullptr);
652
    Status add_rows(const Block* block, size_t row_begin, size_t length);
653
654
    std::string dump_data(size_t row_limit = 100) const;
655
    std::string dump_data_json(size_t row_limit = 100) const;
656
657
64
    void clear() {
658
64
        _columns.clear();
659
64
        _data_types.clear();
660
64
        _names.clear();
661
64
    }
662
663
    // Clear owned mutable columns in place. MutableBlock already owns its
664
    // columns exclusively, so this does not perform COW detaching or cloning.
665
    void clear_column_data() noexcept;
666
667
    size_t allocated_bytes() const;
668
669
48.0k
    size_t bytes() const {
670
48.0k
        size_t res = 0;
671
94.0k
        for (const auto& elem : _columns) {
672
94.0k
            res += elem->byte_size();
673
94.0k
        }
674
675
48.0k
        return res;
676
48.0k
    }
677
678
813
    std::vector<std::string>& get_names() { return _names; }
679
680
    /** Get a list of column names separated by commas. */
681
    std::string dump_names() const;
682
};
683
684
// RAII adapter for code that wants the MutableBlock API over a live Block. It
685
// owns only the temporary mutable columns and restores them to the Block on
686
// destruction. While the adapter is alive, read/write column data through
687
// mutable_block()/mutable_columns(); the Block's column slots are moved out.
688
class ScopedMutableBlock {
689
public:
690
    ScopedMutableBlock() = delete;
691
    explicit ScopedMutableBlock(Block* block);
692
813
    ~ScopedMutableBlock() { restore(); }
693
694
    ScopedMutableBlock(const ScopedMutableBlock&) = delete;
695
    ScopedMutableBlock& operator=(const ScopedMutableBlock&) = delete;
696
697
    ScopedMutableBlock(ScopedMutableBlock&& other) noexcept
698
            : _block(std::exchange(other._block, nullptr)),
699
0
              _mutable_block(std::move(other._mutable_block)) {}
700
701
0
    ScopedMutableBlock& operator=(ScopedMutableBlock&& other) noexcept {
702
0
        if (this != &other) {
703
0
            restore();
704
0
            _block = std::exchange(other._block, nullptr);
705
0
            _mutable_block = std::move(other._mutable_block);
706
0
        }
707
0
        return *this;
708
0
    }
709
710
812
    MutableBlock& mutable_block() { return _mutable_block; }
711
0
    const MutableBlock& mutable_block() const { return _mutable_block; }
712
1
    MutableColumns& mutable_columns() { return _mutable_block.mutable_columns(); }
713
0
    const MutableColumns& mutable_columns() const { return _mutable_block.mutable_columns(); }
714
715
936
    void restore() {
716
936
        if (_block != nullptr) {
717
813
            _block->set_columns(std::move(_mutable_block.mutable_columns()));
718
813
            _block = nullptr;
719
813
        }
720
936
    }
721
722
private:
723
    Block* _block = nullptr;
724
    MutableBlock _mutable_block;
725
};
726
727
struct IteratorRowRef {
728
    std::shared_ptr<Block> block;
729
    int row_pos;
730
    bool is_same;
731
732
    template <typename T>
733
1.06M
    int compare(const IteratorRowRef& rhs, const T& compare_arguments) const {
734
1.06M
        return block->compare_at(row_pos, rhs.row_pos, compare_arguments, *rhs.block, -1);
735
1.06M
    }
Unexecuted instantiation: _ZNK5doris14IteratorRowRef7compareIPKSt6vectorIjSaIjEEEEiRKS0_RKT_
_ZNK5doris14IteratorRowRef7compareImEEiRKS0_RKT_
Line
Count
Source
733
1.06M
    int compare(const IteratorRowRef& rhs, const T& compare_arguments) const {
734
1.06M
        return block->compare_at(row_pos, rhs.row_pos, compare_arguments, *rhs.block, -1);
735
1.06M
    }
736
737
512
    void reset() {
738
512
        block = nullptr;
739
512
        row_pos = -1;
740
512
        is_same = false;
741
512
    }
742
};
743
744
using BlockView = std::vector<IteratorRowRef>;
745
using BlockUPtr = std::unique_ptr<Block>;
746
747
} // namespace doris