Coverage Report

Created: 2026-06-24 07:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/tablet_info.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <butil/fast_rand.h>
21
#include <gen_cpp/Descriptors_types.h>
22
#include <gen_cpp/Types_types.h>
23
#include <gen_cpp/descriptors.pb.h>
24
#include <gen_cpp/olap_file.pb.h>
25
26
#include <cstdint>
27
#include <functional>
28
#include <iterator>
29
#include <map>
30
#include <memory>
31
#include <string>
32
#include <tuple>
33
#include <unordered_map>
34
#include <utility>
35
#include <vector>
36
37
#include "common/cast_set.h"
38
#include "common/logging.h"
39
#include "common/object_pool.h"
40
#include "common/status.h"
41
#include "core/block/block.h"
42
#include "core/block/column_with_type_and_name.h"
43
#include "core/column/column.h"
44
#include "exprs/vexpr.h"
45
#include "exprs/vexpr_fwd.h"
46
#include "runtime/descriptors.h"
47
#include "util/raw_value.h"
48
49
namespace doris {
50
class MemTracker;
51
class SlotDescriptor;
52
class TExprNode;
53
class TabletColumn;
54
class TabletIndex;
55
class TupleDescriptor;
56
57
struct OlapTableIndexSchema {
58
    int64_t index_id;
59
    int64_t row_binlog_id = 0;
60
    std::vector<SlotDescriptor*> slots;
61
    int32_t schema_hash;
62
    std::vector<TabletColumn*> columns;
63
    std::vector<TabletIndex*> indexes;
64
    VExprContextSPtr where_clause;
65
66
    void to_protobuf(POlapTableIndexSchema* pindex) const;
67
};
68
69
class OlapTableSchemaParam {
70
public:
71
124k
    OlapTableSchemaParam() = default;
72
124k
    ~OlapTableSchemaParam() noexcept = default;
73
74
    Status init(const TOlapTableSchemaParam& tschema);
75
    Status init(const POlapTableSchemaParam& pschema);
76
77
271k
    int64_t db_id() const { return _db_id; }
78
404k
    int64_t table_id() const { return _table_id; }
79
228k
    int64_t version() const { return _version; }
80
81
115k
    TupleDescriptor* tuple_desc() const { return _tuple_desc; }
82
766k
    const std::vector<OlapTableIndexSchema*>& indexes() const { return _indexes; }
83
32.8k
    const OlapTableIndexSchema* row_binlog_index_schema() const { return _row_binlog_index_schema; }
84
85
    void to_protobuf(POlapTableSchemaParam* pschema) const;
86
87
    // NOTE: this function is not thread-safe.
88
43.0k
    POlapTableSchemaParam* to_protobuf() const {
89
43.0k
        if (_proto_schema == nullptr) {
90
41.7k
            _proto_schema = _obj_pool.add(new POlapTableSchemaParam());
91
41.7k
            to_protobuf(_proto_schema);
92
41.7k
        }
93
43.0k
        return _proto_schema;
94
43.0k
    }
95
96
174k
    UniqueKeyUpdateModePB unique_key_update_mode() const { return _unique_key_update_mode; }
97
98
255k
    bool is_partial_update() const {
99
255k
        return _unique_key_update_mode != UniqueKeyUpdateModePB::UPSERT;
100
255k
    }
101
42.3k
    bool is_fixed_partial_update() const {
102
42.3k
        return _unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FIXED_COLUMNS;
103
42.3k
    }
104
0
    bool is_flexible_partial_update() const {
105
0
        return _unique_key_update_mode == UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS;
106
0
    }
107
108
174k
    const std::set<std::string>& partial_update_input_columns() const {
109
174k
        return _partial_update_input_columns;
110
174k
    }
111
174k
    PartialUpdateNewRowPolicyPB partial_update_new_key_policy() const {
112
174k
        return _partial_update_new_row_policy;
113
174k
    }
114
183k
    std::string auto_increment_coulumn() const { return _auto_increment_column; }
115
42.2k
    int32_t auto_increment_column_unique_id() const { return _auto_increment_column_unique_id; }
116
42.6k
    void set_timestamp_ms(int64_t timestamp_ms) { _timestamp_ms = timestamp_ms; }
117
174k
    int64_t timestamp_ms() const { return _timestamp_ms; }
118
42.4k
    void set_nano_seconds(int32_t nano_seconds) { _nano_seconds = nano_seconds; }
119
174k
    int32_t nano_seconds() const { return _nano_seconds; }
120
42.5k
    void set_timezone(std::string timezone) { _timezone = timezone; }
121
174k
    std::string timezone() const { return _timezone; }
122
218k
    bool is_strict_mode() const { return _is_strict_mode; }
123
173k
    int32_t sequence_map_col_uid() const { return _sequence_map_col_uid; }
124
    std::string debug_string() const;
125
126
    Status init_unique_key_update_mode(const TOlapTableSchemaParam& tschema);
127
128
private:
129
    int64_t _db_id;
130
    int64_t _table_id;
131
    int64_t _version;
132
133
    TupleDescriptor* _tuple_desc = nullptr;
134
    mutable POlapTableSchemaParam* _proto_schema = nullptr;
135
    std::vector<OlapTableIndexSchema*> _indexes;
136
    OlapTableIndexSchema* _row_binlog_index_schema = nullptr;
137
    mutable ObjectPool _obj_pool;
138
    UniqueKeyUpdateModePB _unique_key_update_mode {UniqueKeyUpdateModePB::UPSERT};
139
    PartialUpdateNewRowPolicyPB _partial_update_new_row_policy {
140
            PartialUpdateNewRowPolicyPB::APPEND};
141
    std::set<std::string> _partial_update_input_columns;
142
    bool _is_strict_mode = false;
143
    std::string _auto_increment_column;
144
    int32_t _auto_increment_column_unique_id;
145
    int64_t _timestamp_ms = 0;
146
    int32_t _nano_seconds {0};
147
    std::string _timezone;
148
    int32_t _sequence_map_col_uid {-1};
149
};
150
151
using OlapTableIndexTablets = TOlapTableIndexTablets;
152
// struct TOlapTableIndexTablets {
153
//     1: required i64 index_id
154
//     2: required list<i64> tablets
155
//     3: optional i64 bucket_be_id
156
//     4: optional list<i32> local_bucket_seqs
157
// }
158
159
using BlockRow = std::pair<Block*, int32_t>;
160
using BlockRowWithIndicator = std::tuple<Block*, int32_t, bool>; // [block, row, is_transformed]
161
162
struct VOlapTablePartition {
163
    int64_t id = 0;
164
    BlockRow start_key;
165
    BlockRow end_key;
166
    std::vector<BlockRow> in_keys;
167
    int64_t num_buckets = 0;
168
    std::vector<OlapTableIndexTablets> indexes;
169
    bool is_mutable;
170
    // -1 indicates partition with hash distribution
171
    int64_t load_tablet_idx = -1;
172
    // Fallback FE-selected bucket owner BE for adaptive random bucket mode. New FE versions send
173
    // this per index in OlapTableIndexTablets.
174
    int64_t bucket_be_id = -1;
175
    // Fallback bucket indices (0-based) used by FIND_TABLET_RANDOM_BUCKET rotation. New FE versions
176
    // send this per index in OlapTableIndexTablets.
177
    std::vector<int32_t> local_bucket_seqs;
178
    int total_replica_num = 0;
179
    int load_required_replica_num = 0;
180
    // tablet_id -> set of backend_ids that have version gaps
181
    std::unordered_map<int64_t, std::unordered_set<int64_t>> tablet_version_gap_backends;
182
183
    VOlapTablePartition(Block* partition_block)
184
            // the default value of partition bound is -1.
185
56.5k
            : start_key {partition_block, -1}, end_key {partition_block, -1} {}
186
};
187
188
// this is only used by tablet_sink. so we can assume it's inited by its' descriptor.
189
class VOlapTablePartKeyComparator {
190
public:
191
    VOlapTablePartKeyComparator(const std::vector<uint16_t>& slot_locs,
192
                                const std::vector<uint16_t>& params_locs)
193
38.0M
            : _slot_locs(slot_locs), _param_locs(params_locs) {}
194
195
    // return true if lhs < rhs
196
    // 'row' is -1 mean maximal boundary
197
    bool operator()(const BlockRowWithIndicator& lhs, const BlockRowWithIndicator& rhs) const;
198
199
private:
200
    const std::vector<uint16_t>& _slot_locs;
201
    const std::vector<uint16_t>& _param_locs;
202
};
203
204
// store an olap table's tablet information
205
class VOlapTablePartitionParam {
206
public:
207
    VOlapTablePartitionParam(std::shared_ptr<OlapTableSchemaParam>& schema,
208
                             const TOlapTablePartitionParam& param);
209
210
    ~VOlapTablePartitionParam();
211
212
    Status init();
213
214
215
    int64_t db_id() const { return _t_param.db_id; }
215
215
    int64_t table_id() const { return _t_param.table_id; }
216
0
    int64_t version() const { return _t_param.version; }
217
218
    // return true if we found this block_row in partition
219
    ALWAYS_INLINE bool find_partition(Block* block, int row,
220
38.2M
                                      VOlapTablePartition*& partition) const {
221
38.2M
        auto it = _is_in_partition ? _partitions_map->find(std::tuple {block, row, true})
222
38.2M
                                   : _partitions_map->upper_bound(std::tuple {block, row, true});
223
18.4E
        VLOG_TRACE << "find row " << row << " of\n"
224
18.4E
                   << block->dump_data() << "in:\n"
225
18.4E
                   << _partition_block.dump_data() << "result line row: " << std::get<1>(it->first);
226
227
        // for list partition it might result in default partition
228
38.2M
        if (_is_in_partition) {
229
222k
            partition = (it != _partitions_map->end()) ? it->second : _default_partition;
230
222k
            it = _partitions_map->end();
231
222k
        }
232
38.2M
        if (it != _partitions_map->end() &&
233
38.2M
            _part_contains(it->second, std::tuple {block, row, true})) {
234
38.0M
            partition = it->second;
235
38.0M
        }
236
38.2M
        return (partition != nullptr);
237
38.2M
    }
238
239
    ALWAYS_INLINE void find_tablets(
240
            Block* block, const std::vector<uint32_t>& indexes,
241
            const std::vector<VOlapTablePartition*>& partitions,
242
            std::vector<uint32_t>& tablet_indexes /*result*/,
243
            /*TODO: check if flat hash map will be better*/
244
32.6k
            std::map<VOlapTablePartition*, int64_t>* partition_tablets_buffer = nullptr) const {
245
32.6k
        std::function<uint32_t(Block*, uint32_t, const VOlapTablePartition&)> compute_function;
246
32.6k
        if (!_distributed_slot_locs.empty()) {
247
            //TODO: refactor by saving the hash values. then we can calculate in columnwise.
248
32.6k
            compute_function = [this](Block* block, uint32_t row,
249
33.8M
                                      const VOlapTablePartition& partition) -> uint32_t {
250
33.8M
                uint32_t hash_val = 0;
251
33.9M
                for (unsigned short _distributed_slot_loc : _distributed_slot_locs) {
252
33.9M
                    auto* slot_desc = _slots[_distributed_slot_loc];
253
33.9M
                    auto& column = block->get_by_position(_distributed_slot_loc).column;
254
33.9M
                    auto val = column->get_data_at(row);
255
33.9M
                    if (val.data != nullptr) {
256
23.7M
                        hash_val = RawValue::zlib_crc32(val.data, val.size,
257
23.7M
                                                        slot_desc->type()->get_primitive_type(),
258
23.7M
                                                        hash_val);
259
23.7M
                    } else {
260
10.1M
                        hash_val = HashUtil::zlib_crc_hash_null(hash_val);
261
10.1M
                    }
262
33.9M
                }
263
33.8M
                return cast_set<uint32_t>(hash_val % partition.num_buckets);
264
33.8M
            };
265
32.6k
        } else { // random distribution
266
11
            compute_function = [](Block* block, uint32_t row,
267
18
                                  const VOlapTablePartition& partition) -> uint32_t {
268
18
                if (partition.load_tablet_idx == -1) {
269
                    // for compatible with old version, just do random
270
0
                    return cast_set<uint32_t>(butil::fast_rand() % partition.num_buckets);
271
0
                }
272
18
                return cast_set<uint32_t>(partition.load_tablet_idx % partition.num_buckets);
273
18
            };
274
11
        }
275
276
32.6k
        if (partition_tablets_buffer == nullptr) {
277
33.8M
            for (auto index : indexes) {
278
33.8M
                tablet_indexes[index] = compute_function(block, index, *partitions[index]);
279
33.8M
            }
280
32.6k
        } else { // use buffer
281
106
            for (auto index : indexes) {
282
106
                auto* partition = partitions[index];
283
106
                if (auto it = partition_tablets_buffer->find(partition);
284
106
                    it != partition_tablets_buffer->end()) {
285
88
                    tablet_indexes[index] = cast_set<uint32_t>(it->second); // tablet
286
88
                } else {
287
                    // compute and save in buffer
288
18
                    (*partition_tablets_buffer)[partition] = tablet_indexes[index] =
289
18
                            compute_function(block, index, *partitions[index]);
290
18
                }
291
106
            }
292
11
        }
293
32.6k
    }
294
295
125k
    const std::vector<VOlapTablePartition*>& get_partitions() const { return _partitions; }
296
297
    // it's same with auto now because we only support transformed partition in auto partition. may expand in future
298
37.7k
    bool is_projection_partition() const { return _is_auto_partition; }
299
123k
    bool is_auto_partition() const { return _is_auto_partition; }
300
301
37.7k
    bool is_auto_detect_overwrite() const { return _is_auto_detect_overwrite; }
302
49
    int64_t get_overwrite_group_id() const { return _overwrite_group_id; }
303
304
183
    std::shared_ptr<TNetworkAddress> get_master_address() const { return _master_address; }
305
306
367
    std::vector<uint16_t> get_partition_keys() const { return _partition_slot_locs; }
307
308
    Status add_partitions(const std::vector<TOlapTablePartition>& partitions);
309
    // no need to del/reinsert partition keys, but change the link. reset the _partitions items
310
    Status replace_partitions(std::vector<int64_t>& old_partition_ids,
311
                              const std::vector<TOlapTablePartition>& new_partitions);
312
313
38.1k
    VExprContextSPtrs get_part_func_ctx() { return _part_func_ctx; }
314
38.1k
    VExprSPtrs get_partition_function() { return _partition_function; }
315
316
    // which will affect _partition_block
317
    Status generate_partition_from(const TOlapTablePartition& t_part,
318
                                   VOlapTablePartition*& part_result);
319
320
531
    void set_transformed_slots(const std::vector<uint16_t>& new_slots) {
321
531
        _transformed_slot_locs = new_slots;
322
531
    }
323
324
private:
325
    Status _create_partition_keys(const std::vector<TExprNode>& t_exprs, BlockRow* part_key);
326
327
    // check if this partition contain this key
328
    bool _part_contains(VOlapTablePartition* part, BlockRowWithIndicator key) const;
329
330
    // this partition only valid in this schema
331
    std::shared_ptr<OlapTableSchemaParam> _schema;
332
    TOlapTablePartitionParam _t_param;
333
334
    const std::vector<SlotDescriptor*>& _slots;
335
    std::vector<uint16_t> _partition_slot_locs;
336
    std::vector<uint16_t> _transformed_slot_locs;
337
    std::vector<uint16_t> _distributed_slot_locs;
338
339
    ObjectPool _obj_pool;
340
    Block _partition_block;
341
    std::unique_ptr<MemTracker> _mem_tracker;
342
    std::vector<VOlapTablePartition*> _partitions;
343
    // For all partition value rows saved in this map, indicator is false. whenever we use a value to find in it, the param is true.
344
    // so that we can distinguish which column index to use (origin slots or transformed slots).
345
    // For range partition we ONLY SAVE RIGHT ENDS. when we find a part's RIGHT by a value, check if part's left cover it then.
346
    std::unique_ptr<
347
            std::map<BlockRowWithIndicator, VOlapTablePartition*, VOlapTablePartKeyComparator>>
348
            _partitions_map;
349
350
    bool _is_in_partition = false;
351
    size_t _mem_usage = 0;
352
    // only works when using list partition, the resource is owned by _partitions
353
    VOlapTablePartition* _default_partition = nullptr;
354
355
    bool _is_auto_partition = false;
356
    VExprContextSPtrs _part_func_ctx = {nullptr};
357
    VExprSPtrs _partition_function = {nullptr};
358
    TPartitionType::type _part_type; // support list or range
359
    // "insert overwrite partition(*)", detect which partitions by BE
360
    bool _is_auto_detect_overwrite = false;
361
    int64_t _overwrite_group_id = 0;
362
    std::shared_ptr<TNetworkAddress> _master_address = nullptr;
363
};
364
365
// indicate where's the tablet and all its replications (node-wise)
366
using TabletLocation = TTabletLocation;
367
// struct TTabletLocation {
368
//     1: required i64 tablet_id
369
//     2: required list<i64> node_ids
370
// }
371
372
class OlapTableLocationParam {
373
public:
374
42.4k
    OlapTableLocationParam(const TOlapTableLocationParam& t_param) : _t_param(t_param) {
375
326k
        for (auto& location : _t_param.tablets) {
376
326k
            _tablets.emplace(location.tablet_id, &location);
377
326k
        }
378
42.4k
    }
379
380
0
    int64_t db_id() const { return _t_param.db_id; }
381
0
    int64_t table_id() const { return _t_param.table_id; }
382
0
    int64_t version() const { return _t_param.version; }
383
384
331k
    TabletLocation* find_tablet(int64_t tablet_id) const {
385
331k
        auto it = _tablets.find(tablet_id);
386
332k
        if (it != std::end(_tablets)) {
387
332k
            return it->second;
388
332k
        }
389
18.4E
        return nullptr;
390
331k
    }
391
392
183
    void add_locations(std::vector<TTabletLocation>& locations) {
393
6.55k
        for (auto& location : locations) {
394
6.55k
            if (_tablets.find(location.tablet_id) == _tablets.end()) {
395
6.55k
                _tablets[location.tablet_id] = &location;
396
6.55k
            }
397
6.55k
        }
398
183
    }
399
400
private:
401
    TOlapTableLocationParam _t_param;
402
    // [tablet_id, tablet]. tablet has id, also.
403
    std::unordered_map<int64_t, TabletLocation*> _tablets;
404
};
405
406
struct NodeInfo {
407
    int64_t id;
408
    int64_t option;
409
    std::string host;
410
    int32_t brpc_port;
411
412
43.3k
    NodeInfo() = default;
413
414
    NodeInfo(const TNodeInfo& tnode)
415
42.4k
            : id(tnode.id),
416
42.4k
              option(tnode.option),
417
42.4k
              host(tnode.host),
418
42.4k
              brpc_port(tnode.async_internal_port) {}
419
};
420
421
class DorisNodesInfo {
422
public:
423
0
    DorisNodesInfo() = default;
424
42.2k
    DorisNodesInfo(const TPaloNodesInfo& t_nodes) {
425
42.6k
        for (const auto& node : t_nodes.nodes) {
426
42.6k
            _nodes.emplace(node.id, node);
427
42.6k
        }
428
42.2k
    }
429
0
    void setNodes(const TPaloNodesInfo& t_nodes) {
430
0
        _nodes.clear();
431
0
        for (const auto& node : t_nodes.nodes) {
432
0
            _nodes.emplace(node.id, node);
433
0
        }
434
0
    }
435
43.2k
    const NodeInfo* find_node(int64_t id) const {
436
43.2k
        auto it = _nodes.find(id);
437
43.7k
        if (it != std::end(_nodes)) {
438
43.7k
            return &it->second;
439
43.7k
        }
440
18.4E
        return nullptr;
441
43.2k
    }
442
443
183
    void add_nodes(const std::vector<TNodeInfo>& t_nodes) {
444
183
        for (const auto& node : t_nodes) {
445
0
            const auto* node_info = find_node(node.id);
446
0
            if (node_info == nullptr) {
447
0
                _nodes.emplace(node.id, node);
448
0
            }
449
0
        }
450
183
    }
451
452
0
    const std::unordered_map<int64_t, NodeInfo>& nodes_info() { return _nodes; }
453
454
private:
455
    std::unordered_map<int64_t, NodeInfo> _nodes;
456
};
457
458
} // namespace doris