Coverage Report

Created: 2026-03-19 17:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/tablet/tablet_meta.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/tablet/tablet_meta.h"
19
20
#include <bvar/bvar.h>
21
#include <gen_cpp/Descriptors_types.h>
22
#include <gen_cpp/FrontendService_types.h>
23
#include <gen_cpp/Types_types.h>
24
#include <gen_cpp/olap_common.pb.h>
25
#include <gen_cpp/olap_file.pb.h>
26
#include <gen_cpp/segment_v2.pb.h>
27
#include <gen_cpp/types.pb.h>
28
#include <json2pb/pb_to_json.h>
29
#include <time.h>
30
31
#include <cstdint>
32
#include <memory>
33
#include <random>
34
#include <set>
35
#include <utility>
36
37
#include "cloud/cloud_meta_mgr.h"
38
#include "cloud/cloud_storage_engine.h"
39
#include "cloud/config.h"
40
#include "common/config.h"
41
#include "io/fs/file_writer.h"
42
#include "io/fs/local_file_system.h"
43
#include "storage/data_dir.h"
44
#include "storage/file_header.h"
45
#include "storage/olap_common.h"
46
#include "storage/olap_define.h"
47
#include "storage/rowset/rowset.h"
48
#include "storage/rowset/rowset_meta_manager.h"
49
#include "storage/tablet/tablet_fwd.h"
50
#include "storage/tablet/tablet_meta_manager.h"
51
#include "storage/tablet/tablet_schema_cache.h"
52
#include "storage/utils.h"
53
#include "util/debug_points.h"
54
#include "util/lru_cache.h"
55
#include "util/mem_info.h"
56
#include "util/parse_util.h"
57
#include "util/string_util.h"
58
#include "util/time.h"
59
#include "util/uid_util.h"
60
61
using std::string;
62
using std::unordered_map;
63
using std::vector;
64
65
namespace doris {
66
#include "common/compile_check_begin.h"
67
using namespace ErrorCode;
68
69
bvar::Adder<uint64_t> g_contains_agg_with_cache_if_eligible_total(
70
        "g_contains_agg_with_cache_if_eligible_total");
71
bvar::Adder<uint64_t> g_contains_agg_with_cache_if_eligible_partial_hit(
72
        "g_contains_agg_with_cache_if_eligible_partial_hit");
73
bvar::Adder<uint64_t> g_contains_agg_with_cache_if_eligible_full_hit(
74
        "g_contains_agg_with_cache_if_eligible_full_hit");
75
bvar::Window<bvar::Adder<uint64_t>> g_contains_agg_with_cache_if_eligible_total_minute(
76
        "g_contains_agg_with_cache_if_eligible_total_1m",
77
        &g_contains_agg_with_cache_if_eligible_total, 60);
78
bvar::Window<bvar::Adder<uint64_t>> g_contains_agg_with_cache_if_eligible_partial_hit_minute(
79
        "g_contains_agg_with_cache_if_eligible_partial_hit_1m",
80
        &g_contains_agg_with_cache_if_eligible_partial_hit, 60);
81
bvar::Window<bvar::Adder<uint64_t>> g_contains_agg_with_cache_if_eligible_full_hit_minute(
82
        "g_contains_agg_with_cache_if_eligible_full_hit_1m",
83
        &g_contains_agg_with_cache_if_eligible_full_hit, 60);
84
85
TabletMetaSharedPtr TabletMeta::create(
86
        const TCreateTabletReq& request, const TabletUid& tablet_uid, uint64_t shard_id,
87
        uint32_t next_unique_id,
88
6.38k
        const unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id) {
89
6.38k
    std::optional<TBinlogConfig> binlog_config;
90
6.38k
    if (request.__isset.binlog_config) {
91
6.07k
        binlog_config = request.binlog_config;
92
6.07k
    }
93
6.38k
    TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format =
94
6.38k
            request.inverted_index_file_storage_format;
95
96
    // We will discard this format. Don't make any further changes here.
97
6.38k
    if (request.__isset.inverted_index_storage_format) {
98
6.38k
        switch (request.inverted_index_storage_format) {
99
0
        case TInvertedIndexStorageFormat::V1:
100
0
            inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V1;
101
0
            break;
102
0
        case TInvertedIndexStorageFormat::V2:
103
0
            inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V2;
104
0
            break;
105
6.38k
        default:
106
6.38k
            break;
107
6.38k
        }
108
6.38k
    }
109
    // Decide storage format for this tablet. DEFAULT / not-set fall back to V2 on BE side.
110
6.38k
    TStorageFormat::type storage_format =
111
6.38k
            request.__isset.storage_format ? request.storage_format : TStorageFormat::V2;
112
6.38k
    return std::make_shared<TabletMeta>(
113
6.38k
            request.table_id, request.partition_id, request.tablet_id, request.replica_id,
114
6.38k
            request.tablet_schema.schema_hash, shard_id, request.tablet_schema, next_unique_id,
115
6.38k
            col_ordinal_to_unique_id, tablet_uid,
116
6.38k
            request.__isset.tablet_type ? request.tablet_type : TTabletType::TABLET_TYPE_DISK,
117
18.4E
            request.__isset.compression_type ? request.compression_type : TCompressionType::LZ4F,
118
6.38k
            request.__isset.storage_policy_id ? request.storage_policy_id : -1,
119
6.38k
            request.__isset.enable_unique_key_merge_on_write
120
6.38k
                    ? request.enable_unique_key_merge_on_write
121
18.4E
                    : false,
122
6.38k
            std::move(binlog_config), request.compaction_policy,
123
6.38k
            request.time_series_compaction_goal_size_mbytes,
124
6.38k
            request.time_series_compaction_file_count_threshold,
125
6.38k
            request.time_series_compaction_time_threshold_seconds,
126
6.38k
            request.time_series_compaction_empty_rowsets_threshold,
127
6.38k
            request.time_series_compaction_level_threshold, inverted_index_file_storage_format,
128
6.38k
            request.tde_algorithm, storage_format,
129
6.38k
            request.__isset.vertical_compaction_num_columns_per_group
130
6.38k
                    ? request.vertical_compaction_num_columns_per_group
131
18.4E
                    : 5);
132
6.38k
}
133
134
471k
TabletMeta::~TabletMeta() {
135
471k
    if (_handle) {
136
469k
        TabletSchemaCache::instance()->release(_handle);
137
469k
    }
138
471k
}
139
140
TabletMeta::TabletMeta()
141
728k
        : _tablet_uid(0, 0),
142
728k
          _schema(new TabletSchema),
143
728k
          _delete_bitmap(new DeleteBitmap(_tablet_id)) {}
144
145
TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id,
146
                       int64_t replica_id, int32_t schema_hash, int32_t shard_id,
147
                       const TTabletSchema& tablet_schema, uint32_t next_unique_id,
148
                       const std::unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id,
149
                       TabletUid tablet_uid, TTabletType::type tabletType,
150
                       TCompressionType::type compression_type, int64_t storage_policy_id,
151
                       bool enable_unique_key_merge_on_write,
152
                       std::optional<TBinlogConfig> binlog_config, std::string compaction_policy,
153
                       int64_t time_series_compaction_goal_size_mbytes,
154
                       int64_t time_series_compaction_file_count_threshold,
155
                       int64_t time_series_compaction_time_threshold_seconds,
156
                       int64_t time_series_compaction_empty_rowsets_threshold,
157
                       int64_t time_series_compaction_level_threshold,
158
                       TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format,
159
                       TEncryptionAlgorithm::type tde_algorithm,
160
                       TStorageFormat::type storage_format,
161
                       int32_t vertical_compaction_num_columns_per_group)
162
6.74k
        : _tablet_uid(0, 0),
163
6.74k
          _schema(new TabletSchema),
164
6.74k
          _delete_bitmap(new DeleteBitmap(tablet_id)),
165
6.74k
          _storage_format(storage_format) {
166
6.74k
    TabletMetaPB tablet_meta_pb;
167
6.74k
    tablet_meta_pb.set_table_id(table_id);
168
6.74k
    tablet_meta_pb.set_partition_id(partition_id);
169
6.74k
    tablet_meta_pb.set_tablet_id(tablet_id);
170
6.74k
    tablet_meta_pb.set_replica_id(replica_id);
171
6.74k
    tablet_meta_pb.set_schema_hash(schema_hash);
172
6.74k
    tablet_meta_pb.set_shard_id(shard_id);
173
    // Persist the creation time, but it is not used
174
6.74k
    tablet_meta_pb.set_creation_time(time(nullptr));
175
6.74k
    tablet_meta_pb.set_cumulative_layer_point(-1);
176
6.74k
    tablet_meta_pb.set_tablet_state(PB_RUNNING);
177
6.74k
    *(tablet_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto();
178
6.74k
    tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_DISK
179
6.74k
                                           ? TabletTypePB::TABLET_TYPE_DISK
180
6.74k
                                           : TabletTypePB::TABLET_TYPE_MEMORY);
181
6.74k
    tablet_meta_pb.set_enable_unique_key_merge_on_write(enable_unique_key_merge_on_write);
182
6.74k
    tablet_meta_pb.set_storage_policy_id(storage_policy_id);
183
6.74k
    tablet_meta_pb.set_compaction_policy(compaction_policy);
184
6.74k
    tablet_meta_pb.set_time_series_compaction_goal_size_mbytes(
185
6.74k
            time_series_compaction_goal_size_mbytes);
186
6.74k
    tablet_meta_pb.set_time_series_compaction_file_count_threshold(
187
6.74k
            time_series_compaction_file_count_threshold);
188
6.74k
    tablet_meta_pb.set_time_series_compaction_time_threshold_seconds(
189
6.74k
            time_series_compaction_time_threshold_seconds);
190
6.74k
    tablet_meta_pb.set_time_series_compaction_empty_rowsets_threshold(
191
6.74k
            time_series_compaction_empty_rowsets_threshold);
192
6.74k
    tablet_meta_pb.set_time_series_compaction_level_threshold(
193
6.74k
            time_series_compaction_level_threshold);
194
6.74k
    tablet_meta_pb.set_vertical_compaction_num_columns_per_group(
195
6.74k
            vertical_compaction_num_columns_per_group);
196
6.74k
    TabletSchemaPB* schema = tablet_meta_pb.mutable_schema();
197
6.74k
    schema->set_num_short_key_columns(tablet_schema.short_key_column_count);
198
6.74k
    schema->set_num_rows_per_row_block(config::default_num_rows_per_column_file_block);
199
6.74k
    schema->set_sequence_col_idx(tablet_schema.sequence_col_idx);
200
6.74k
    auto p_seq_map = schema->mutable_seq_map(); // ColumnGroupsPB
201
202
6.74k
    for (auto& it : tablet_schema.seq_map) { // std::vector< ::doris::TColumnGroup>
203
0
        uint32_t key = it.sequence_column;
204
0
        ColumnGroupPB* cg_pb = p_seq_map->add_cg(); // ColumnGroupPB {key: {v1, v2, v3}}
205
0
        cg_pb->set_sequence_column(key);
206
0
        for (auto v : it.columns_in_group) {
207
0
            cg_pb->add_columns_in_group(v);
208
0
        }
209
0
    }
210
6.74k
    switch (tablet_schema.keys_type) {
211
4.85k
    case TKeysType::DUP_KEYS:
212
4.85k
        schema->set_keys_type(KeysType::DUP_KEYS);
213
4.85k
        break;
214
871
    case TKeysType::UNIQUE_KEYS:
215
871
        schema->set_keys_type(KeysType::UNIQUE_KEYS);
216
871
        break;
217
764
    case TKeysType::AGG_KEYS:
218
764
        schema->set_keys_type(KeysType::AGG_KEYS);
219
764
        break;
220
247
    default:
221
247
        LOG(WARNING) << "unknown tablet keys type";
222
247
        break;
223
6.74k
    }
224
    // compress_kind used to compress segment files
225
6.71k
    schema->set_compress_kind(COMPRESS_LZ4);
226
227
    // compression_type used to compress segment page
228
6.71k
    switch (compression_type) {
229
0
    case TCompressionType::NO_COMPRESSION:
230
0
        schema->set_compression_type(segment_v2::NO_COMPRESSION);
231
0
        break;
232
0
    case TCompressionType::SNAPPY:
233
0
        schema->set_compression_type(segment_v2::SNAPPY);
234
0
        break;
235
0
    case TCompressionType::LZ4:
236
0
        schema->set_compression_type(segment_v2::LZ4);
237
0
        break;
238
668
    case TCompressionType::LZ4F:
239
668
        schema->set_compression_type(segment_v2::LZ4F);
240
668
        break;
241
0
    case TCompressionType::ZLIB:
242
0
        schema->set_compression_type(segment_v2::ZLIB);
243
0
        break;
244
6.05k
    case TCompressionType::ZSTD:
245
6.05k
        schema->set_compression_type(segment_v2::ZSTD);
246
6.05k
        break;
247
0
    default:
248
0
        schema->set_compression_type(segment_v2::LZ4F);
249
0
        break;
250
6.71k
    }
251
252
6.69k
    switch (inverted_index_file_storage_format) {
253
0
    case TInvertedIndexFileStorageFormat::V1:
254
0
        schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
255
0
        break;
256
668
    case TInvertedIndexFileStorageFormat::V2:
257
668
        schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
258
668
        break;
259
6.02k
    case TInvertedIndexFileStorageFormat::V3:
260
6.02k
        schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3);
261
6.02k
        break;
262
0
    default:
263
0
        schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3);
264
0
        break;
265
6.69k
    }
266
267
6.69k
    switch (tablet_schema.sort_type) {
268
0
    case TSortType::type::ZORDER:
269
0
        schema->set_sort_type(SortType::ZORDER);
270
0
        break;
271
6.69k
    default:
272
6.69k
        schema->set_sort_type(SortType::LEXICAL);
273
6.69k
    }
274
6.70k
    schema->set_sort_col_num(tablet_schema.sort_col_num);
275
6.70k
    for (const auto& i : tablet_schema.cluster_key_uids) {
276
2
        schema->add_cluster_key_uids(i);
277
2
    }
278
6.70k
    tablet_meta_pb.set_in_restore_mode(false);
279
280
    // set column information
281
6.70k
    uint32_t col_ordinal = 0;
282
6.70k
    bool has_bf_columns = false;
283
41.2k
    for (TColumn tcolumn : tablet_schema.columns) {
284
41.2k
        ColumnPB* column = schema->add_column();
285
41.2k
        uint32_t unique_id = -1;
286
41.2k
        if (tcolumn.col_unique_id >= 0) {
287
39.1k
            unique_id = tcolumn.col_unique_id;
288
39.1k
        } else {
289
2.11k
            unique_id = col_ordinal_to_unique_id.at(col_ordinal);
290
2.11k
        }
291
41.2k
        col_ordinal++;
292
41.2k
        init_column_from_tcolumn(unique_id, tcolumn, column);
293
294
41.2k
        if (column->is_bf_column()) {
295
0
            has_bf_columns = true;
296
0
        }
297
298
41.2k
        if (tablet_schema.__isset.indexes) {
299
2
            for (auto& index : tablet_schema.indexes) {
300
2
                if (index.index_type == TIndexType::type::BLOOMFILTER ||
301
2
                    index.index_type == TIndexType::type::NGRAM_BF) {
302
0
                    DCHECK_EQ(index.columns.size(), 1);
303
0
                    if (iequal(tcolumn.column_name, index.columns[0])) {
304
0
                        column->set_is_bf_column(true);
305
0
                        break;
306
0
                    }
307
0
                }
308
2
            }
309
2
        }
310
41.2k
    }
311
312
    // copy index meta
313
6.70k
    if (tablet_schema.__isset.indexes) {
314
1
        for (auto& index : tablet_schema.indexes) {
315
1
            TabletIndexPB* index_pb = schema->add_index();
316
1
            index_pb->set_index_id(index.index_id);
317
1
            index_pb->set_index_name(index.index_name);
318
            // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
319
            // get column unique id by name
320
1
            for (auto column_name : index.columns) {
321
2
                for (auto column : schema->column()) {
322
2
                    if (iequal(column.name(), column_name)) {
323
1
                        index_pb->add_col_unique_id(column.unique_id());
324
1
                    }
325
2
                }
326
1
            }
327
1
            switch (index.index_type) {
328
1
            case TIndexType::BITMAP:
329
1
                index_pb->set_index_type(IndexType::BITMAP);
330
1
                break;
331
0
            case TIndexType::INVERTED:
332
0
                index_pb->set_index_type(IndexType::INVERTED);
333
0
                break;
334
0
            case TIndexType::ANN:
335
0
                index_pb->set_index_type(IndexType::ANN);
336
0
                break;
337
0
            case TIndexType::BLOOMFILTER:
338
0
                index_pb->set_index_type(IndexType::BLOOMFILTER);
339
0
                break;
340
0
            case TIndexType::NGRAM_BF:
341
0
                index_pb->set_index_type(IndexType::NGRAM_BF);
342
0
                break;
343
1
            }
344
345
1
            if (index.__isset.properties) {
346
0
                auto properties = index_pb->mutable_properties();
347
0
                for (auto kv : index.properties) {
348
0
                    (*properties)[kv.first] = kv.second;
349
0
                }
350
0
            }
351
1
        }
352
1
    }
353
354
6.70k
    schema->set_next_column_unique_id(next_unique_id);
355
6.70k
    if (has_bf_columns && tablet_schema.__isset.bloom_filter_fpp) {
356
0
        schema->set_bf_fpp(tablet_schema.bloom_filter_fpp);
357
0
    }
358
359
6.70k
    if (tablet_schema.__isset.is_in_memory) {
360
6.06k
        schema->set_is_in_memory(tablet_schema.is_in_memory);
361
6.06k
    }
362
363
6.70k
    if (tablet_schema.__isset.disable_auto_compaction) {
364
6.07k
        schema->set_disable_auto_compaction(tablet_schema.disable_auto_compaction);
365
6.07k
    }
366
367
    // Deprecated legacy flatten-nested switch. Distinct from variant_enable_nested_group.
368
6.73k
    if (tablet_schema.__isset.variant_enable_flatten_nested) {
369
6.73k
        schema->set_enable_variant_flatten_nested(tablet_schema.variant_enable_flatten_nested);
370
6.73k
    }
371
372
6.73k
    if (tablet_schema.__isset.enable_single_replica_compaction) {
373
6.73k
        schema->set_enable_single_replica_compaction(
374
6.73k
                tablet_schema.enable_single_replica_compaction);
375
6.73k
    }
376
377
6.73k
    if (tablet_schema.__isset.delete_sign_idx) {
378
6.73k
        schema->set_delete_sign_idx(tablet_schema.delete_sign_idx);
379
6.73k
    }
380
6.72k
    if (tablet_schema.__isset.store_row_column) {
381
6.72k
        schema->set_store_row_column(tablet_schema.store_row_column);
382
6.72k
    }
383
6.73k
    if (tablet_schema.__isset.row_store_page_size) {
384
6.73k
        schema->set_row_store_page_size(tablet_schema.row_store_page_size);
385
6.73k
    }
386
6.73k
    if (tablet_schema.__isset.storage_page_size) {
387
6.73k
        schema->set_storage_page_size(tablet_schema.storage_page_size);
388
6.73k
    }
389
6.72k
    if (tablet_schema.__isset.storage_dict_page_size) {
390
6.72k
        schema->set_storage_dict_page_size(tablet_schema.storage_dict_page_size);
391
6.72k
    }
392
6.73k
    if (tablet_schema.__isset.skip_write_index_on_load) {
393
6.73k
        schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load);
394
6.73k
    }
395
6.70k
    if (tablet_schema.__isset.row_store_col_cids) {
396
6.05k
        schema->mutable_row_store_column_unique_ids()->Add(tablet_schema.row_store_col_cids.begin(),
397
6.05k
                                                           tablet_schema.row_store_col_cids.end());
398
6.05k
    }
399
6.70k
    if (binlog_config.has_value()) {
400
6.05k
        BinlogConfig tmp_binlog_config;
401
6.05k
        tmp_binlog_config = binlog_config.value();
402
6.05k
        tmp_binlog_config.to_pb(tablet_meta_pb.mutable_binlog_config());
403
6.05k
    }
404
405
6.70k
    switch (tde_algorithm) {
406
0
    case doris::TEncryptionAlgorithm::AES256:
407
0
        tablet_meta_pb.set_encryption_algorithm(EncryptionAlgorithmPB::AES_256_CTR);
408
0
        break;
409
0
    case doris::TEncryptionAlgorithm::SM4:
410
0
        tablet_meta_pb.set_encryption_algorithm(EncryptionAlgorithmPB::SM4_128_CTR);
411
0
        break;
412
6.72k
    default:
413
6.72k
        tablet_meta_pb.set_encryption_algorithm(EncryptionAlgorithmPB::PLAINTEXT);
414
6.70k
    }
415
416
    // Initialize default external ColumnMeta usage according to storage format.
417
    // V2: legacy behavior, inline ColumnMetaPB only.
418
    // V3: V2 + external ColumnMetaPB (CMO) enabled by default.
419
6.72k
    switch (_storage_format) {
420
6.71k
    case TStorageFormat::V2:
421
6.71k
    case TStorageFormat::DEFAULT:
422
6.71k
    case TStorageFormat::V1:
423
6.71k
        break;
424
0
    case TStorageFormat::V3:
425
0
        schema->set_is_external_segment_column_meta_used(true);
426
0
        _schema->set_external_segment_meta_used_default(true);
427
428
0
        schema->set_integer_type_default_use_plain_encoding(true);
429
0
        _schema->set_integer_type_default_use_plain_encoding(true);
430
0
        schema->set_binary_plain_encoding_default_impl(
431
0
                BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2);
432
0
        _schema->set_binary_plain_encoding_default_impl(
433
0
                BinaryPlainEncodingTypePB::BINARY_PLAIN_ENCODING_V2);
434
0
        break;
435
0
    default:
436
0
        break;
437
6.72k
    }
438
439
6.71k
    init_from_pb(tablet_meta_pb);
440
6.71k
}
441
442
TabletMeta::TabletMeta(const TabletMeta& b)
443
1.30k
        : MetadataAdder(b),
444
1.30k
          _table_id(b._table_id),
445
1.30k
          _index_id(b._index_id),
446
1.30k
          _partition_id(b._partition_id),
447
1.30k
          _tablet_id(b._tablet_id),
448
1.30k
          _replica_id(b._replica_id),
449
1.30k
          _schema_hash(b._schema_hash),
450
1.30k
          _shard_id(b._shard_id),
451
1.30k
          _creation_time(b._creation_time),
452
1.30k
          _cumulative_layer_point(b._cumulative_layer_point),
453
1.30k
          _tablet_uid(b._tablet_uid),
454
1.30k
          _tablet_type(b._tablet_type),
455
1.30k
          _tablet_state(b._tablet_state),
456
1.30k
          _schema(b._schema),
457
1.30k
          _rs_metas(b._rs_metas),
458
1.30k
          _stale_rs_metas(b._stale_rs_metas),
459
1.30k
          _in_restore_mode(b._in_restore_mode),
460
1.30k
          _preferred_rowset_type(b._preferred_rowset_type),
461
1.30k
          _storage_policy_id(b._storage_policy_id),
462
1.30k
          _cooldown_meta_id(b._cooldown_meta_id),
463
1.30k
          _enable_unique_key_merge_on_write(b._enable_unique_key_merge_on_write),
464
1.30k
          _delete_bitmap(b._delete_bitmap),
465
1.30k
          _binlog_config(b._binlog_config),
466
1.30k
          _compaction_policy(b._compaction_policy),
467
1.30k
          _time_series_compaction_goal_size_mbytes(b._time_series_compaction_goal_size_mbytes),
468
          _time_series_compaction_file_count_threshold(
469
1.30k
                  b._time_series_compaction_file_count_threshold),
470
          _time_series_compaction_time_threshold_seconds(
471
1.30k
                  b._time_series_compaction_time_threshold_seconds),
472
          _time_series_compaction_empty_rowsets_threshold(
473
1.30k
                  b._time_series_compaction_empty_rowsets_threshold),
474
1.30k
          _time_series_compaction_level_threshold(b._time_series_compaction_level_threshold),
475
          _vertical_compaction_num_columns_per_group(
476
1.30k
                  b._vertical_compaction_num_columns_per_group) {};
477
478
void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn,
479
8.52M
                                          ColumnPB* column) {
480
8.52M
    column->set_unique_id(unique_id);
481
8.52M
    column->set_name(tcolumn.column_name);
482
8.52M
    column->set_is_auto_increment(tcolumn.is_auto_increment);
483
8.54M
    if (tcolumn.__isset.is_on_update_current_timestamp) {
484
8.54M
        column->set_is_on_update_current_timestamp(tcolumn.is_on_update_current_timestamp);
485
8.54M
    }
486
8.52M
    string data_type;
487
8.52M
    EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type);
488
8.52M
    column->set_type(data_type);
489
490
8.52M
    uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type,
491
8.52M
                                                             tcolumn.column_type.len);
492
8.52M
    column->set_length(length);
493
8.52M
    column->set_index_length(length);
494
8.52M
    column->set_precision(tcolumn.column_type.precision);
495
8.52M
    column->set_frac(tcolumn.column_type.scale);
496
497
8.52M
    if (tcolumn.__isset.result_is_nullable) {
498
1.93k
        column->set_result_is_nullable(tcolumn.result_is_nullable);
499
1.93k
    }
500
501
8.53M
    if (tcolumn.__isset.be_exec_version) {
502
8.53M
        column->set_be_exec_version(tcolumn.be_exec_version);
503
8.53M
    }
504
505
8.52M
    if (tcolumn.column_type.type == TPrimitiveType::VARCHAR ||
506
8.52M
        tcolumn.column_type.type == TPrimitiveType::STRING) {
507
2.41M
        if (!tcolumn.column_type.__isset.index_len) {
508
157
            column->set_index_length(10);
509
2.41M
        } else {
510
2.41M
            column->set_index_length(tcolumn.column_type.index_len);
511
2.41M
        }
512
2.41M
    }
513
8.52M
    if (!tcolumn.is_key) {
514
6.73M
        column->set_is_key(false);
515
6.73M
        if (tcolumn.__isset.aggregation) {
516
1.93k
            column->set_aggregation(tcolumn.aggregation);
517
6.73M
        } else {
518
6.73M
            string aggregation_type;
519
6.73M
            EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type);
520
6.73M
            column->set_aggregation(aggregation_type);
521
6.73M
        }
522
6.73M
    } else {
523
1.78M
        column->set_is_key(true);
524
1.78M
        column->set_aggregation("NONE");
525
1.78M
    }
526
8.52M
    column->set_is_nullable(tcolumn.is_allow_null);
527
8.52M
    if (tcolumn.__isset.default_value) {
528
802k
        column->set_default_value(tcolumn.default_value);
529
802k
    }
530
8.52M
    if (tcolumn.__isset.is_bloom_filter_column) {
531
7.22k
        column->set_is_bf_column(tcolumn.is_bloom_filter_column);
532
7.22k
    }
533
8.54M
    if (tcolumn.__isset.visible) {
534
8.54M
        column->set_visible(tcolumn.visible);
535
8.54M
    }
536
9.89M
    for (size_t i = 0; i < tcolumn.children_column.size(); i++) {
537
1.37M
        ColumnPB* children_column = column->add_children_columns();
538
1.37M
        init_column_from_tcolumn(tcolumn.children_column[i].col_unique_id,
539
1.37M
                                 tcolumn.children_column[i], children_column);
540
1.37M
    }
541
8.54M
    if (tcolumn.column_type.__isset.variant_max_subcolumns_count) {
542
8.54M
        column->set_variant_max_subcolumns_count(tcolumn.column_type.variant_max_subcolumns_count);
543
8.54M
    }
544
8.52M
    if (tcolumn.__isset.pattern_type) {
545
33.7k
        switch (tcolumn.pattern_type) {
546
1.28k
        case TPatternType::MATCH_NAME:
547
1.28k
            column->set_pattern_type(PatternTypePB::MATCH_NAME);
548
1.28k
            break;
549
32.4k
        case TPatternType::MATCH_NAME_GLOB:
550
32.4k
            column->set_pattern_type(PatternTypePB::MATCH_NAME_GLOB);
551
33.7k
        }
552
33.7k
    }
553
8.54M
    if (tcolumn.__isset.variant_enable_typed_paths_to_sparse) {
554
8.54M
        column->set_variant_enable_typed_paths_to_sparse(
555
8.54M
                tcolumn.variant_enable_typed_paths_to_sparse);
556
8.54M
    }
557
8.55M
    if (tcolumn.__isset.variant_max_sparse_column_statistics_size) {
558
8.55M
        column->set_variant_max_sparse_column_statistics_size(
559
8.55M
                tcolumn.variant_max_sparse_column_statistics_size);
560
8.55M
    }
561
8.52M
    if (tcolumn.__isset.variant_sparse_hash_shard_count) {
562
7.17M
        column->set_variant_sparse_hash_shard_count(tcolumn.variant_sparse_hash_shard_count);
563
7.17M
    }
564
8.52M
    if (tcolumn.__isset.variant_enable_doc_mode) {
565
7.17M
        column->set_variant_enable_doc_mode(tcolumn.variant_enable_doc_mode);
566
7.17M
    }
567
8.52M
    if (tcolumn.__isset.variant_doc_materialization_min_rows) {
568
7.17M
        column->set_variant_doc_materialization_min_rows(
569
7.17M
                tcolumn.variant_doc_materialization_min_rows);
570
7.17M
    }
571
8.52M
    if (tcolumn.__isset.variant_doc_hash_shard_count) {
572
7.17M
        column->set_variant_doc_hash_shard_count(tcolumn.variant_doc_hash_shard_count);
573
7.17M
    }
574
8.52M
    if (tcolumn.__isset.variant_enable_nested_group) {
575
7.17M
        column->set_variant_enable_nested_group(tcolumn.variant_enable_nested_group);
576
7.17M
    }
577
8.52M
}
578
579
61.1k
void TabletMeta::remove_rowset_delete_bitmap(const RowsetId& rowset_id, const Version& version) {
580
61.1k
    if (_enable_unique_key_merge_on_write) {
581
37.8k
        delete_bitmap().remove({rowset_id, 0, 0}, {rowset_id, UINT32_MAX, 0});
582
37.8k
        if (config::enable_mow_verbose_log) {
583
0
            LOG_INFO("delete rowset delete bitmap. tablet={}, rowset={}, version={}", tablet_id(),
584
0
                     rowset_id.to_string(), version.to_string());
585
0
        }
586
37.8k
        size_t rowset_cache_version_size = delete_bitmap().remove_rowset_cache_version(rowset_id);
587
37.8k
        _check_mow_rowset_cache_version_size(rowset_cache_version_size);
588
37.8k
    }
589
61.1k
}
590
591
176
Status TabletMeta::create_from_file(const string& file_path) {
592
176
    TabletMetaPB tablet_meta_pb;
593
176
    RETURN_IF_ERROR(load_from_file(file_path, &tablet_meta_pb));
594
176
    init_from_pb(tablet_meta_pb);
595
176
    return Status::OK();
596
176
}
597
598
356
Status TabletMeta::load_from_file(const string& file_path, TabletMetaPB* tablet_meta_pb) {
599
356
    FileHeader<TabletMetaPB> file_header(file_path);
600
    // In file_header.deserialize(), it validates file length, signature, checksum of protobuf.
601
356
    RETURN_IF_ERROR(file_header.deserialize());
602
356
    try {
603
356
        tablet_meta_pb->CopyFrom(file_header.message());
604
356
    } catch (const std::exception& e) {
605
0
        LOG(WARNING) << "Failed to copy protocol buffer object: " << e.what()
606
0
                     << ", file=" << file_path;
607
0
        return Status::Error<PARSE_PROTOBUF_ERROR>(
608
0
                "fail to copy protocol buffer object. file={}, error={}", file_path, e.what());
609
0
    }
610
356
    return Status::OK();
611
356
}
612
613
6
Status TabletMeta::create_from_buffer(const uint8_t* buffer, size_t buffer_size) {
614
6
    FileHeader<TabletMetaPB> file_header(""); // empty file path
615
6
    RETURN_IF_ERROR(file_header.deserialize_from_memory(buffer, buffer_size));
616
617
2
    TabletMetaPB tablet_meta_pb;
618
2
    try {
619
2
        tablet_meta_pb.CopyFrom(file_header.message());
620
2
    } catch (const std::exception& e) {
621
0
        LOG(WARNING) << "Failed to copy protocol buffer object from buffer: " << e.what();
622
0
        return Status::Error<ErrorCode::PARSE_PROTOBUF_ERROR>(
623
0
                "fail to copy protocol buffer object from buffer. error={}", e.what());
624
0
    }
625
626
2
    init_from_pb(tablet_meta_pb);
627
2
    return Status::OK();
628
2
}
629
630
std::string TabletMeta::construct_header_file_path(const string& schema_hash_path,
631
175
                                                   int64_t tablet_id) {
632
175
    std::stringstream header_name_stream;
633
175
    header_name_stream << schema_hash_path << "/" << tablet_id << ".hdr";
634
175
    return header_name_stream.str();
635
175
}
636
637
0
Status TabletMeta::save_as_json(const string& file_path) {
638
0
    std::string json_meta;
639
0
    json2pb::Pb2JsonOptions json_options;
640
0
    json_options.pretty_json = true;
641
0
    json_options.bytes_to_base64 = true;
642
0
    to_json(&json_meta, json_options);
643
    // save to file
644
0
    io::FileWriterPtr file_writer;
645
0
    RETURN_IF_ERROR(io::global_local_filesystem()->create_file(file_path, &file_writer));
646
0
    RETURN_IF_ERROR(file_writer->append(json_meta));
647
0
    RETURN_IF_ERROR(file_writer->close());
648
0
    return Status::OK();
649
0
}
650
651
3.75k
Status TabletMeta::save(const string& file_path) {
652
3.75k
    TabletMetaPB tablet_meta_pb;
653
3.75k
    to_meta_pb(&tablet_meta_pb, false);
654
3.75k
    return TabletMeta::save(file_path, tablet_meta_pb);
655
3.75k
}
656
657
3.93k
Status TabletMeta::save(const string& file_path, const TabletMetaPB& tablet_meta_pb) {
658
3.93k
    DCHECK(!file_path.empty());
659
3.93k
    FileHeader<TabletMetaPB> file_header(file_path);
660
3.93k
    try {
661
3.93k
        file_header.mutable_message()->CopyFrom(tablet_meta_pb);
662
3.93k
    } catch (...) {
663
0
        LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path;
664
0
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
665
0
                "fail to copy protocol buffer object. file={}", file_path);
666
0
    }
667
3.92k
    RETURN_IF_ERROR(file_header.prepare());
668
3.92k
    RETURN_IF_ERROR(file_header.serialize());
669
3.92k
    return Status::OK();
670
3.92k
}
671
672
16.5k
Status TabletMeta::save_meta(DataDir* data_dir) {
673
16.5k
    std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
674
16.5k
    return _save_meta(data_dir);
675
16.5k
}
676
677
16.5k
Status TabletMeta::_save_meta(DataDir* data_dir) {
678
    // check if tablet uid is valid
679
16.5k
    if (_tablet_uid.hi == 0 && _tablet_uid.lo == 0) {
680
0
        LOG(FATAL) << "tablet_uid is invalid"
681
0
                   << " tablet=" << tablet_id() << " _tablet_uid=" << _tablet_uid.to_string();
682
0
    }
683
16.5k
    string meta_binary;
684
685
16.5k
    auto t1 = MonotonicMicros();
686
16.5k
    serialize(&meta_binary);
687
16.5k
    auto t2 = MonotonicMicros();
688
16.5k
    Status status = TabletMetaManager::save(data_dir, tablet_id(), schema_hash(), meta_binary);
689
16.5k
    if (!status.ok()) {
690
0
        LOG(FATAL) << "fail to save tablet_meta. status=" << status << ", tablet_id=" << tablet_id()
691
0
                   << ", schema_hash=" << schema_hash();
692
0
    }
693
16.5k
    auto t3 = MonotonicMicros();
694
16.5k
    auto cost = t3 - t1;
695
16.5k
    if (cost > 1 * 1000 * 1000) {
696
0
        LOG(INFO) << "save tablet(" << tablet_id() << ") meta too slow. serialize cost " << t2 - t1
697
0
                  << "(us), serialized binary size: " << meta_binary.length()
698
0
                  << "(bytes), write rocksdb cost " << t3 - t2 << "(us)";
699
0
    }
700
16.5k
    return status;
701
16.5k
}
702
703
16.6k
void TabletMeta::serialize(string* meta_binary) {
704
16.6k
    TabletMetaPB tablet_meta_pb;
705
16.6k
    to_meta_pb(&tablet_meta_pb, false);
706
16.6k
    if (tablet_meta_pb.partition_id() <= 0) {
707
468
        LOG(WARNING) << "invalid partition id " << tablet_meta_pb.partition_id() << " tablet "
708
468
                     << tablet_meta_pb.tablet_id();
709
468
    }
710
16.6k
    DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", {
711
16.6k
        long partition_id = tablet_meta_pb.partition_id();
712
16.6k
        tablet_meta_pb.set_partition_id(0);
713
16.6k
        LOG(WARNING) << "set debug point TabletMeta::serialize::zero_partition_id old="
714
16.6k
                     << partition_id << " new=" << tablet_meta_pb.DebugString();
715
16.6k
    });
716
16.6k
    bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary);
717
16.6k
    if (!_rs_metas.empty() || !_stale_rs_metas.empty()) {
718
16.6k
        _avg_rs_meta_serialize_size =
719
16.6k
                meta_binary->length() / (_rs_metas.size() + _stale_rs_metas.size());
720
16.6k
        if (meta_binary->length() > config::tablet_meta_serialize_size_limit ||
721
16.6k
            !serialize_success) {
722
0
            int64_t origin_meta_size = meta_binary->length();
723
0
            int64_t stale_rowsets_num = tablet_meta_pb.stale_rs_metas().size();
724
0
            tablet_meta_pb.clear_stale_rs_metas();
725
0
            meta_binary->clear();
726
0
            serialize_success = tablet_meta_pb.SerializeToString(meta_binary);
727
0
            LOG(WARNING) << "tablet meta serialization size exceeds limit: "
728
0
                         << config::tablet_meta_serialize_size_limit
729
0
                         << " clean up stale rowsets, tablet id: " << tablet_id()
730
0
                         << " stale rowset num: " << stale_rowsets_num
731
0
                         << " serialization size before clean " << origin_meta_size
732
0
                         << " serialization size after clean " << meta_binary->length();
733
0
        }
734
16.6k
    }
735
736
16.6k
    if (!serialize_success) {
737
0
        LOG(FATAL) << "failed to serialize meta " << tablet_id();
738
0
    }
739
16.6k
}
740
741
331k
Status TabletMeta::deserialize(std::string_view meta_binary) {
742
331k
    TabletMetaPB tablet_meta_pb;
743
331k
    bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(),
744
331k
                                                static_cast<int32_t>(meta_binary.size()));
745
331k
    if (!parsed) {
746
0
        return Status::Error<INIT_FAILED>("parse tablet meta failed");
747
0
    }
748
331k
    init_from_pb(tablet_meta_pb);
749
331k
    return Status::OK();
750
331k
}
751
752
734k
void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) {
753
734k
    _table_id = tablet_meta_pb.table_id();
754
734k
    _index_id = tablet_meta_pb.index_id();
755
734k
    _partition_id = tablet_meta_pb.partition_id();
756
734k
    _tablet_id = tablet_meta_pb.tablet_id();
757
734k
    _replica_id = tablet_meta_pb.replica_id();
758
734k
    _schema_hash = tablet_meta_pb.schema_hash();
759
734k
    _shard_id = tablet_meta_pb.shard_id();
760
734k
    _creation_time = tablet_meta_pb.creation_time();
761
734k
    _cumulative_layer_point = tablet_meta_pb.cumulative_layer_point();
762
734k
    _tablet_uid = TabletUid(tablet_meta_pb.tablet_uid());
763
734k
    _ttl_seconds = tablet_meta_pb.ttl_seconds();
764
734k
    if (tablet_meta_pb.has_tablet_type()) {
765
733k
        _tablet_type = tablet_meta_pb.tablet_type();
766
733k
    } else {
767
267
        _tablet_type = TabletTypePB::TABLET_TYPE_DISK;
768
267
    }
769
770
    // init _tablet_state
771
734k
    switch (tablet_meta_pb.tablet_state()) {
772
10.4k
    case PB_NOTREADY:
773
10.4k
        _tablet_state = TabletState::TABLET_NOTREADY;
774
10.4k
        break;
775
719k
    case PB_RUNNING:
776
719k
        _tablet_state = TabletState::TABLET_RUNNING;
777
719k
        break;
778
0
    case PB_TOMBSTONED:
779
0
        _tablet_state = TabletState::TABLET_TOMBSTONED;
780
0
        break;
781
0
    case PB_STOPPED:
782
0
        _tablet_state = TabletState::TABLET_STOPPED;
783
0
        break;
784
3.91k
    case PB_SHUTDOWN:
785
3.91k
        _tablet_state = TabletState::TABLET_SHUTDOWN;
786
3.91k
        break;
787
0
    default:
788
0
        LOG(WARNING) << "tablet has no state. tablet=" << tablet_id()
789
0
                     << ", schema_hash=" << schema_hash();
790
734k
    }
791
792
    // init _schema
793
733k
    TabletSchemaSPtr schema = std::make_shared<TabletSchema>();
794
733k
    schema->init_from_pb(tablet_meta_pb.schema());
795
733k
    if (_handle) {
796
4
        TabletSchemaCache::instance()->release(_handle);
797
4
    }
798
733k
    auto pair = TabletSchemaCache::instance()->insert(schema->to_key());
799
733k
    _handle = pair.first;
800
733k
    _schema = pair.second;
801
802
734k
    if (tablet_meta_pb.has_enable_unique_key_merge_on_write()) {
803
734k
        _enable_unique_key_merge_on_write = tablet_meta_pb.enable_unique_key_merge_on_write();
804
734k
        _delete_bitmap->set_tablet_id(_tablet_id);
805
734k
    }
806
807
    // init _rs_metas
808
733k
    for (auto& it : tablet_meta_pb.rs_metas()) {
809
604k
        RowsetMetaSharedPtr rs_meta(new RowsetMeta());
810
604k
        rs_meta->init_from_pb(it);
811
604k
        _rs_metas.emplace(rs_meta->version(), rs_meta);
812
604k
    }
813
814
    // For mow table, delete bitmap of stale rowsets has not been persisted.
815
    // When be restart, query should not read the stale rowset, otherwise duplicate keys
816
    // will be read out. Therefore, we don't add them to _stale_rs_meta for mow table.
817
734k
    if (!config::skip_loading_stale_rowset_meta && !_enable_unique_key_merge_on_write) {
818
579k
        for (auto& it : tablet_meta_pb.stale_rs_metas()) {
819
7.88k
            RowsetMetaSharedPtr rs_meta(new RowsetMeta());
820
7.88k
            rs_meta->init_from_pb(it);
821
7.88k
            _stale_rs_metas.emplace(rs_meta->version(), rs_meta);
822
7.88k
        }
823
579k
    }
824
825
734k
    if (tablet_meta_pb.has_in_restore_mode()) {
826
734k
        _in_restore_mode = tablet_meta_pb.in_restore_mode();
827
734k
    }
828
829
733k
    if (tablet_meta_pb.has_preferred_rowset_type()) {
830
728k
        _preferred_rowset_type = tablet_meta_pb.preferred_rowset_type();
831
728k
    }
832
833
733k
    _storage_policy_id = tablet_meta_pb.storage_policy_id();
834
733k
    if (tablet_meta_pb.has_cooldown_meta_id()) {
835
395k
        _cooldown_meta_id = tablet_meta_pb.cooldown_meta_id();
836
395k
    }
837
838
733k
    if (tablet_meta_pb.has_delete_bitmap()) {
839
50.3k
        int rst_ids_size = tablet_meta_pb.delete_bitmap().rowset_ids_size();
840
50.3k
        int seg_ids_size = tablet_meta_pb.delete_bitmap().segment_ids_size();
841
50.3k
        int versions_size = tablet_meta_pb.delete_bitmap().versions_size();
842
50.3k
        int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size();
843
50.3k
        CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size &&
844
50.3k
              seg_maps_size == versions_size);
845
54.2k
        for (int i = 0; i < rst_ids_size; ++i) {
846
3.88k
            RowsetId rst_id;
847
3.88k
            rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i));
848
3.88k
            auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i);
849
3.88k
            auto ver = tablet_meta_pb.delete_bitmap().versions(i);
850
3.88k
            auto bitmap = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps(i).data();
851
3.88k
            delete_bitmap().delete_bitmap[{rst_id, seg_id, ver}] = roaring::Roaring::read(bitmap);
852
3.88k
        }
853
50.3k
    }
854
855
733k
    if (tablet_meta_pb.has_binlog_config()) {
856
338k
        _binlog_config = tablet_meta_pb.binlog_config();
857
338k
    }
858
733k
    _compaction_policy = tablet_meta_pb.compaction_policy();
859
733k
    _time_series_compaction_goal_size_mbytes =
860
733k
            tablet_meta_pb.time_series_compaction_goal_size_mbytes();
861
733k
    _time_series_compaction_file_count_threshold =
862
733k
            tablet_meta_pb.time_series_compaction_file_count_threshold();
863
733k
    _time_series_compaction_time_threshold_seconds =
864
733k
            tablet_meta_pb.time_series_compaction_time_threshold_seconds();
865
733k
    _time_series_compaction_empty_rowsets_threshold =
866
733k
            tablet_meta_pb.time_series_compaction_empty_rowsets_threshold();
867
733k
    _time_series_compaction_level_threshold =
868
733k
            tablet_meta_pb.time_series_compaction_level_threshold();
869
733k
    _vertical_compaction_num_columns_per_group =
870
733k
            tablet_meta_pb.vertical_compaction_num_columns_per_group();
871
872
734k
    if (tablet_meta_pb.has_encryption_algorithm()) {
873
734k
        _encryption_algorithm = tablet_meta_pb.encryption_algorithm();
874
734k
    }
875
733k
}
876
877
20.7k
void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb, bool cloud_get_rowset_meta) {
878
20.7k
    tablet_meta_pb->set_table_id(table_id());
879
20.7k
    tablet_meta_pb->set_index_id(index_id());
880
20.7k
    tablet_meta_pb->set_partition_id(partition_id());
881
20.7k
    tablet_meta_pb->set_tablet_id(tablet_id());
882
20.7k
    tablet_meta_pb->set_replica_id(replica_id());
883
20.7k
    tablet_meta_pb->set_schema_hash(schema_hash());
884
20.7k
    tablet_meta_pb->set_shard_id(shard_id());
885
20.7k
    tablet_meta_pb->set_creation_time(creation_time());
886
20.7k
    tablet_meta_pb->set_cumulative_layer_point(cumulative_layer_point());
887
20.7k
    *(tablet_meta_pb->mutable_tablet_uid()) = tablet_uid().to_proto();
888
20.7k
    tablet_meta_pb->set_tablet_type(_tablet_type);
889
20.7k
    tablet_meta_pb->set_ttl_seconds(_ttl_seconds);
890
20.7k
    switch (tablet_state()) {
891
9
    case TABLET_NOTREADY:
892
9
        tablet_meta_pb->set_tablet_state(PB_NOTREADY);
893
9
        break;
894
13.8k
    case TABLET_RUNNING:
895
13.8k
        tablet_meta_pb->set_tablet_state(PB_RUNNING);
896
13.8k
        break;
897
0
    case TABLET_TOMBSTONED:
898
0
        tablet_meta_pb->set_tablet_state(PB_TOMBSTONED);
899
0
        break;
900
0
    case TABLET_STOPPED:
901
0
        tablet_meta_pb->set_tablet_state(PB_STOPPED);
902
0
        break;
903
6.96k
    case TABLET_SHUTDOWN:
904
6.96k
        tablet_meta_pb->set_tablet_state(PB_SHUTDOWN);
905
6.96k
        break;
906
20.7k
    }
907
908
    // RowsetMetaPB is separated from TabletMetaPB
909
20.7k
    if (!config::is_cloud_mode() || cloud_get_rowset_meta) {
910
56.7k
        for (const auto& [_, rs] : _rs_metas) {
911
56.7k
            rs->to_rowset_pb(tablet_meta_pb->add_rs_metas());
912
56.7k
        }
913
97.3k
        for (const auto& [_, rs] : _stale_rs_metas) {
914
97.3k
            rs->to_rowset_pb(tablet_meta_pb->add_stale_rs_metas());
915
97.3k
        }
916
20.7k
    }
917
918
20.7k
    _schema->to_schema_pb(tablet_meta_pb->mutable_schema());
919
920
20.7k
    tablet_meta_pb->set_in_restore_mode(in_restore_mode());
921
922
    // to avoid modify tablet meta to the greatest extend
923
20.8k
    if (_preferred_rowset_type == BETA_ROWSET) {
924
20.8k
        tablet_meta_pb->set_preferred_rowset_type(_preferred_rowset_type);
925
20.8k
    }
926
20.7k
    if (_storage_policy_id > 0) {
927
5
        tablet_meta_pb->set_storage_policy_id(_storage_policy_id);
928
5
    }
929
20.7k
    if (_cooldown_meta_id.initialized()) {
930
5
        tablet_meta_pb->mutable_cooldown_meta_id()->CopyFrom(_cooldown_meta_id.to_proto());
931
5
    }
932
933
20.7k
    tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write);
934
935
20.7k
    if (_enable_unique_key_merge_on_write) {
936
2.61k
        std::set<RowsetId> stale_rs_ids;
937
77.0k
        for (const auto& [_, rowset] : _stale_rs_metas) {
938
77.0k
            stale_rs_ids.insert(rowset->rowset_id());
939
77.0k
        }
940
2.61k
        DeleteBitmapPB* delete_bitmap_pb = tablet_meta_pb->mutable_delete_bitmap();
941
8.13k
        for (auto& [id, bitmap] : delete_bitmap().snapshot().delete_bitmap) {
942
8.13k
            auto& [rowset_id, segment_id, ver] = id;
943
8.13k
            if (stale_rs_ids.count(rowset_id) != 0) {
944
7.39k
                continue;
945
7.39k
            }
946
734
            delete_bitmap_pb->add_rowset_ids(rowset_id.to_string());
947
734
            delete_bitmap_pb->add_segment_ids(segment_id);
948
734
            delete_bitmap_pb->add_versions(ver);
949
734
            std::string bitmap_data(bitmap.getSizeInBytes(), '\0');
950
734
            bitmap.write(bitmap_data.data());
951
734
            *(delete_bitmap_pb->add_segment_delete_bitmaps()) = std::move(bitmap_data);
952
734
        }
953
2.61k
    }
954
20.7k
    _binlog_config.to_pb(tablet_meta_pb->mutable_binlog_config());
955
20.7k
    tablet_meta_pb->set_compaction_policy(compaction_policy());
956
20.7k
    tablet_meta_pb->set_time_series_compaction_goal_size_mbytes(
957
20.7k
            time_series_compaction_goal_size_mbytes());
958
20.7k
    tablet_meta_pb->set_time_series_compaction_file_count_threshold(
959
20.7k
            time_series_compaction_file_count_threshold());
960
20.7k
    tablet_meta_pb->set_time_series_compaction_time_threshold_seconds(
961
20.7k
            time_series_compaction_time_threshold_seconds());
962
20.7k
    tablet_meta_pb->set_time_series_compaction_empty_rowsets_threshold(
963
20.7k
            time_series_compaction_empty_rowsets_threshold());
964
20.7k
    tablet_meta_pb->set_time_series_compaction_level_threshold(
965
20.7k
            time_series_compaction_level_threshold());
966
20.7k
    tablet_meta_pb->set_vertical_compaction_num_columns_per_group(
967
20.7k
            vertical_compaction_num_columns_per_group());
968
969
20.7k
    tablet_meta_pb->set_encryption_algorithm(_encryption_algorithm);
970
20.7k
}
971
972
3
void TabletMeta::to_json(string* json_string, json2pb::Pb2JsonOptions& options) {
973
3
    TabletMetaPB tablet_meta_pb;
974
3
    to_meta_pb(&tablet_meta_pb, false);
975
3
    json2pb::ProtoMessageToJson(tablet_meta_pb, json_string, options);
976
3
}
977
978
1.48M
Version TabletMeta::max_version() const {
979
1.48M
    Version max_version = {-1, 0};
980
4.22M
    for (const auto& [_, rs_meta] : _rs_metas) {
981
4.22M
        if (rs_meta->end_version() > max_version.second) {
982
1.96M
            max_version = rs_meta->version();
983
1.96M
        }
984
4.22M
    }
985
1.48M
    return max_version;
986
1.48M
}
987
988
1.06M
size_t TabletMeta::version_count_cross_with_range(const Version& range) const {
989
1.06M
    size_t count = 0;
990
1.93M
    for (const auto& [_, rs_meta] : _rs_metas) {
991
1.93M
        if (!(range.first > rs_meta->version().second || range.second < rs_meta->version().first)) {
992
1.93M
            count++;
993
1.93M
        }
994
1.93M
    }
995
1.06M
    return count;
996
1.06M
}
997
998
35.3k
Status TabletMeta::add_rs_meta(const RowsetMetaSharedPtr& rs_meta) {
999
    // check RowsetMeta is valid
1000
748k
    for (const auto& [_, rs] : _rs_metas) {
1001
748k
        if (rs->version() == rs_meta->version()) {
1002
0
            if (rs->rowset_id() != rs_meta->rowset_id()) {
1003
0
                return Status::Error<PUSH_VERSION_ALREADY_EXIST>(
1004
0
                        "version already exist. rowset_id={}, version={}, tablet={}",
1005
0
                        rs->rowset_id().to_string(), rs->version().to_string(), tablet_id());
1006
0
            } else {
1007
                // rowsetid,version is equal, it is a duplicate req, skip it
1008
0
                return Status::OK();
1009
0
            }
1010
0
        }
1011
748k
    }
1012
35.3k
    _rs_metas.emplace(rs_meta->version(), rs_meta);
1013
35.3k
    return Status::OK();
1014
35.3k
}
1015
1016
332k
void TabletMeta::add_rowsets_unchecked(const std::vector<RowsetSharedPtr>& to_add) {
1017
347k
    for (const auto& rs : to_add) {
1018
347k
        _rs_metas.emplace(rs->rowset_meta()->version(), rs->rowset_meta());
1019
347k
    }
1020
332k
}
1021
1022
void TabletMeta::delete_rs_meta_by_version(const Version& version,
1023
891
                                           std::vector<RowsetMetaSharedPtr>* deleted_rs_metas) {
1024
891
    size_t rowset_cache_version_size = 0;
1025
892
    if (auto it = _rs_metas.find(version); it != _rs_metas.end()) {
1026
892
        if (deleted_rs_metas != nullptr) {
1027
0
            deleted_rs_metas->push_back(it->second);
1028
0
        }
1029
892
        auto rowset_id = it->second->rowset_id();
1030
892
        _rs_metas.erase(it);
1031
892
        if (_enable_unique_key_merge_on_write) {
1032
42
            rowset_cache_version_size = _delete_bitmap->remove_rowset_cache_version(rowset_id);
1033
42
        }
1034
892
        return;
1035
892
    }
1036
18.4E
    _check_mow_rowset_cache_version_size(rowset_cache_version_size);
1037
18.4E
}
1038
1039
void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
1040
                                 const std::vector<RowsetMetaSharedPtr>& to_delete,
1041
8.58k
                                 bool same_version) {
1042
8.58k
    size_t rowset_cache_version_size = 0;
1043
    // Remove to_delete rowsets from _rs_metas
1044
71.1k
    for (auto rs_to_del : to_delete) {
1045
71.1k
        if (auto it = _rs_metas.find(rs_to_del->version()); it != _rs_metas.end()) {
1046
71.1k
            auto rowset_id = it->second->rowset_id();
1047
71.1k
            _rs_metas.erase(it);
1048
71.1k
            if (_enable_unique_key_merge_on_write) {
1049
40.1k
                rowset_cache_version_size = _delete_bitmap->remove_rowset_cache_version(rowset_id);
1050
40.1k
            }
1051
71.1k
        }
1052
71.1k
    }
1053
8.58k
    if (!same_version) {
1054
        // put to_delete rowsets in _stale_rs_metas.
1055
71.1k
        for (auto rs_to_del : to_delete) {
1056
71.1k
            _stale_rs_metas.emplace(rs_to_del->version(), rs_to_del);
1057
71.1k
        }
1058
8.56k
    }
1059
1060
    // put to_add rowsets in _rs_metas.
1061
8.58k
    for (auto rs_to_add : to_add) {
1062
1.44k
        _rs_metas.emplace(rs_to_add->version(), rs_to_add);
1063
1.44k
    }
1064
8.58k
    _check_mow_rowset_cache_version_size(rowset_cache_version_size);
1065
8.58k
}
1066
1067
// Use the passing "rs_metas" to replace the rs meta in this tablet meta
1068
// Also clear the _stale_rs_metas because this tablet meta maybe copyied from
1069
// an existing tablet before. Add after revise, only the passing "rs_metas"
1070
// is needed.
1071
325
void TabletMeta::revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas) {
1072
325
    {
1073
325
        std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
1074
325
        _rs_metas.clear();
1075
478
        for (auto& rs_meta : rs_metas) {
1076
478
            _rs_metas.emplace(rs_meta->version(), rs_meta);
1077
478
        }
1078
325
        _stale_rs_metas.clear();
1079
325
    }
1080
325
    if (_enable_unique_key_merge_on_write) {
1081
40
        _delete_bitmap->clear_rowset_cache_version();
1082
40
    }
1083
325
}
1084
1085
// This method should call after revise_rs_metas, since new rs_metas might be a subset
1086
// of original tablet, we should revise the delete_bitmap according to current rowset.
1087
//
1088
// Delete bitmap is protected by Tablet::_meta_lock, we don't need to acquire the
1089
// TabletMeta's _meta_lock
1090
41
void TabletMeta::revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap) {
1091
41
    _delete_bitmap = std::make_unique<DeleteBitmap>(tablet_id());
1092
48
    for (const auto& [_, rs] : _rs_metas) {
1093
48
        DeleteBitmap rs_bm(tablet_id());
1094
48
        delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
1095
48
                             &rs_bm);
1096
48
        _delete_bitmap->merge(rs_bm);
1097
48
    }
1098
41
    for (const auto& [_, rs] : _stale_rs_metas) {
1099
0
        DeleteBitmap rs_bm(tablet_id());
1100
0
        delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
1101
0
                             &rs_bm);
1102
0
        _delete_bitmap->merge(rs_bm);
1103
0
    }
1104
41
}
1105
1106
64.5k
void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) {
1107
64.5k
    _stale_rs_metas.erase(version);
1108
64.5k
}
1109
1110
0
RowsetMetaSharedPtr TabletMeta::acquire_rs_meta_by_version(const Version& version) const {
1111
0
    if (auto it = _rs_metas.find(version); it != _rs_metas.end()) {
1112
0
        return it->second;
1113
0
    }
1114
0
    return nullptr;
1115
0
}
1116
1117
8.53k
RowsetMetaSharedPtr TabletMeta::acquire_stale_rs_meta_by_version(const Version& version) const {
1118
8.53k
    if (auto it = _stale_rs_metas.find(version); it != _stale_rs_metas.end()) {
1119
8.52k
        return it->second;
1120
8.52k
    }
1121
8
    return nullptr;
1122
8.53k
}
1123
1124
23
Status TabletMeta::set_partition_id(int64_t partition_id) {
1125
23
    if ((_partition_id > 0 && _partition_id != partition_id) || partition_id < 1) {
1126
0
        LOG(WARNING) << "cur partition id=" << _partition_id << " new partition id=" << partition_id
1127
0
                     << " not equal";
1128
0
    }
1129
23
    _partition_id = partition_id;
1130
23
    return Status::OK();
1131
23
}
1132
1133
0
void TabletMeta::clear_stale_rowset() {
1134
0
    _stale_rs_metas.clear();
1135
0
    if (_enable_unique_key_merge_on_write) {
1136
0
        _delete_bitmap->clear_rowset_cache_version();
1137
0
    }
1138
0
}
1139
1140
0
void TabletMeta::clear_rowsets() {
1141
0
    _rs_metas.clear();
1142
0
    if (_enable_unique_key_merge_on_write) {
1143
0
        _delete_bitmap->clear_rowset_cache_version();
1144
0
    }
1145
0
}
1146
1147
46.4k
void TabletMeta::_check_mow_rowset_cache_version_size(size_t rowset_cache_version_size) {
1148
46.4k
    if (_enable_unique_key_merge_on_write && config::enable_mow_verbose_log &&
1149
46.4k
        rowset_cache_version_size > _rs_metas.size() + _stale_rs_metas.size()) {
1150
0
        std::stringstream ss;
1151
0
        auto rowset_ids = _delete_bitmap->get_rowset_cache_version();
1152
0
        std::set<std::string> tablet_rowset_ids;
1153
0
        {
1154
0
            std::shared_lock rlock(_meta_lock);
1155
0
            for (const auto& [_, rs_meta] : _rs_metas) {
1156
0
                tablet_rowset_ids.emplace(rs_meta->rowset_id().to_string());
1157
0
            }
1158
0
            for (const auto& [_, rs_meta] : _stale_rs_metas) {
1159
0
                tablet_rowset_ids.emplace(rs_meta->rowset_id().to_string());
1160
0
            }
1161
0
        }
1162
0
        for (const auto& rowset_id : rowset_ids) {
1163
0
            if (tablet_rowset_ids.find(rowset_id) == tablet_rowset_ids.end()) {
1164
0
                ss << rowset_id << ", ";
1165
0
            }
1166
0
        }
1167
        // size(rowset_cache_version) <= size(_rs_metas) + size(_stale_rs_metas) + size(_unused_rs)
1168
0
        std::string msg = fmt::format(
1169
0
                "tablet: {}, rowset_cache_version size: {}, "
1170
0
                "_rs_metas size: {}, _stale_rs_metas size: {}, delta: {}. rowset only in cache: {}",
1171
0
                _tablet_id, rowset_cache_version_size, _rs_metas.size(), _stale_rs_metas.size(),
1172
0
                rowset_cache_version_size - _rs_metas.size() - _stale_rs_metas.size(), ss.str());
1173
0
        LOG(INFO) << msg;
1174
0
    }
1175
46.4k
}
1176
1177
3
bool operator==(const TabletMeta& a, const TabletMeta& b) {
1178
3
    if (a._table_id != b._table_id) return false;
1179
3
    if (a._index_id != b._index_id) return false;
1180
3
    if (a._partition_id != b._partition_id) return false;
1181
3
    if (a._tablet_id != b._tablet_id) return false;
1182
3
    if (a._replica_id != b._replica_id) return false;
1183
3
    if (a._schema_hash != b._schema_hash) return false;
1184
3
    if (a._shard_id != b._shard_id) return false;
1185
3
    if (a._creation_time != b._creation_time) return false;
1186
3
    if (a._cumulative_layer_point != b._cumulative_layer_point) return false;
1187
3
    if (a._tablet_uid != b._tablet_uid) return false;
1188
3
    if (a._tablet_type != b._tablet_type) return false;
1189
3
    if (a._tablet_state != b._tablet_state) return false;
1190
3
    if (*a._schema != *b._schema) return false;
1191
3
    if (a._rs_metas != b._rs_metas) return false;
1192
3
    if (a._in_restore_mode != b._in_restore_mode) return false;
1193
3
    if (a._preferred_rowset_type != b._preferred_rowset_type) return false;
1194
3
    if (a._storage_policy_id != b._storage_policy_id) return false;
1195
3
    if (a._compaction_policy != b._compaction_policy) return false;
1196
3
    if (a._time_series_compaction_goal_size_mbytes != b._time_series_compaction_goal_size_mbytes)
1197
0
        return false;
1198
3
    if (a._time_series_compaction_file_count_threshold !=
1199
3
        b._time_series_compaction_file_count_threshold)
1200
0
        return false;
1201
3
    if (a._time_series_compaction_time_threshold_seconds !=
1202
3
        b._time_series_compaction_time_threshold_seconds)
1203
0
        return false;
1204
3
    if (a._time_series_compaction_empty_rowsets_threshold !=
1205
3
        b._time_series_compaction_empty_rowsets_threshold)
1206
0
        return false;
1207
3
    if (a._time_series_compaction_level_threshold != b._time_series_compaction_level_threshold)
1208
0
        return false;
1209
3
    return true;
1210
3
}
1211
1212
0
bool operator!=(const TabletMeta& a, const TabletMeta& b) {
1213
0
    return !(a == b);
1214
0
}
1215
1216
// We cannot just copy the underlying memory to construct a string
1217
// due to equivalent objects may have different padding bytes.
1218
// Reading padding bytes is undefined behavior, neither copy nor
1219
// placement new will help simplify the code.
1220
// Refer to C11 standards §6.2.6.1/6 and §6.7.9/21 for more info.
1221
5.84M
static std::string agg_cache_key(int64_t tablet_id, const DeleteBitmap::BitmapKey& bmk) {
1222
5.84M
    std::string ret(sizeof(tablet_id) + sizeof(bmk), '\0');
1223
5.84M
    *reinterpret_cast<int64_t*>(ret.data()) = tablet_id;
1224
5.84M
    auto t = reinterpret_cast<DeleteBitmap::BitmapKey*>(ret.data() + sizeof(tablet_id));
1225
5.84M
    std::get<RowsetId>(*t).version = std::get<RowsetId>(bmk).version;
1226
5.84M
    std::get<RowsetId>(*t).hi = std::get<RowsetId>(bmk).hi;
1227
5.84M
    std::get<RowsetId>(*t).mi = std::get<RowsetId>(bmk).mi;
1228
5.84M
    std::get<RowsetId>(*t).lo = std::get<RowsetId>(bmk).lo;
1229
5.84M
    std::get<1>(*t) = std::get<1>(bmk);
1230
5.84M
    std::get<2>(*t) = std::get<2>(bmk);
1231
5.84M
    return ret;
1232
5.84M
}
1233
1234
// decode cache key info from a agg_cache_key
1235
static void decode_agg_cache_key(const std::string& key_str, int64_t& tablet_id,
1236
39.0k
                                 DeleteBitmap::BitmapKey& bmk) {
1237
39.0k
    const char* ptr = key_str.data();
1238
39.0k
    tablet_id = *reinterpret_cast<const int64_t*>(ptr);
1239
39.0k
    ptr += sizeof(tablet_id);
1240
39.0k
    const auto* t = reinterpret_cast<const DeleteBitmap::BitmapKey*>(ptr);
1241
39.0k
    std::get<RowsetId>(bmk).version = std::get<RowsetId>(*t).version;
1242
39.0k
    std::get<RowsetId>(bmk).hi = std::get<RowsetId>(*t).hi;
1243
39.0k
    std::get<RowsetId>(bmk).mi = std::get<RowsetId>(*t).mi;
1244
39.0k
    std::get<RowsetId>(bmk).lo = std::get<RowsetId>(*t).lo;
1245
39.0k
    std::get<1>(bmk) = std::get<1>(*t);
1246
39.0k
    std::get<2>(bmk) = std::get<2>(*t);
1247
39.0k
}
1248
1249
DeleteBitmapAggCache::DeleteBitmapAggCache(size_t capacity)
1250
8
        : LRUCachePolicy(CachePolicy::CacheType::DELETE_BITMAP_AGG_CACHE, capacity,
1251
8
                         LRUCacheType::SIZE, config::delete_bitmap_agg_cache_stale_sweep_time_sec,
1252
8
                         /*num_shards*/ 256,
1253
8
                         /*element_count_capacity*/ 0, /*enable_prune*/ true,
1254
8
                         /*is_lru_k*/ false) {}
1255
1256
12.6M
DeleteBitmapAggCache* DeleteBitmapAggCache::instance() {
1257
12.6M
    return ExecEnv::GetInstance()->delete_bitmap_agg_cache();
1258
12.6M
}
1259
1260
8
DeleteBitmapAggCache* DeleteBitmapAggCache::create_instance(size_t capacity) {
1261
8
    return new DeleteBitmapAggCache(capacity);
1262
8
}
1263
1264
2
DeleteBitmap DeleteBitmapAggCache::snapshot(int64_t tablet_id) {
1265
2
    DeleteBitmap ret(tablet_id);
1266
39.0k
    auto collector = [&](const LRUHandle* handle) {
1267
39.0k
        auto key = handle->key().to_string();
1268
39.0k
        int64_t key_tablet_id;
1269
39.0k
        DeleteBitmap::BitmapKey bmk;
1270
39.0k
        decode_agg_cache_key(key, key_tablet_id, bmk);
1271
39.0k
        if (key_tablet_id == tablet_id) {
1272
16
            const auto& dbm = reinterpret_cast<DeleteBitmapAggCache::Value*>(handle->value)->bitmap;
1273
16
            ret.set(bmk, dbm);
1274
16
        }
1275
39.0k
    };
1276
2
    DeleteBitmapAggCache::instance()->for_each_entry(collector);
1277
2
    return ret;
1278
2
}
1279
1280
948k
DeleteBitmap::DeleteBitmap(int64_t tablet_id) : _tablet_id(tablet_id) {}
1281
1282
4.36k
DeleteBitmap::DeleteBitmap(const DeleteBitmap& o) {
1283
4.36k
    std::shared_lock l1(o.lock);
1284
4.36k
    delete_bitmap = o.delete_bitmap;
1285
4.36k
    _tablet_id = o._tablet_id;
1286
4.36k
}
1287
1288
29.0k
DeleteBitmap& DeleteBitmap::operator=(const DeleteBitmap& o) {
1289
29.0k
    if (this == &o) return *this;
1290
27.8k
    if (this < &o) {
1291
27.8k
        std::unique_lock l1(lock);
1292
27.8k
        std::shared_lock l2(o.lock);
1293
27.8k
        delete_bitmap = o.delete_bitmap;
1294
27.8k
        _tablet_id = o._tablet_id;
1295
18.4E
    } else {
1296
18.4E
        std::shared_lock l2(o.lock);
1297
18.4E
        std::unique_lock l1(lock);
1298
18.4E
        delete_bitmap = o.delete_bitmap;
1299
18.4E
        _tablet_id = o._tablet_id;
1300
18.4E
    }
1301
27.8k
    return *this;
1302
29.0k
}
1303
1304
0
DeleteBitmap::DeleteBitmap(DeleteBitmap&& o) noexcept {
1305
0
    std::scoped_lock l(o.lock, o._rowset_cache_version_lock);
1306
0
    delete_bitmap = std::move(o.delete_bitmap);
1307
0
    _tablet_id = std::move(o._tablet_id);
1308
0
    o._rowset_cache_version.clear();
1309
0
}
1310
1311
40
DeleteBitmap& DeleteBitmap::operator=(DeleteBitmap&& o) noexcept {
1312
40
    if (this == &o) return *this;
1313
40
    std::scoped_lock l(lock, o.lock, o._rowset_cache_version_lock);
1314
40
    delete_bitmap = std::move(o.delete_bitmap);
1315
40
    _tablet_id = std::move(o._tablet_id);
1316
40
    o._rowset_cache_version.clear();
1317
40
    return *this;
1318
40
}
1319
1320
0
DeleteBitmap DeleteBitmap::from_pb(const DeleteBitmapPB& pb, int64_t tablet_id) {
1321
0
    size_t len = pb.rowset_ids().size();
1322
0
    DCHECK_EQ(len, pb.segment_ids().size());
1323
0
    DCHECK_EQ(len, pb.versions().size());
1324
0
    DeleteBitmap delete_bitmap(tablet_id);
1325
0
    for (int32_t i = 0; i < len; ++i) {
1326
0
        RowsetId rs_id;
1327
0
        rs_id.init(pb.rowset_ids(i));
1328
0
        BitmapKey key = {rs_id, pb.segment_ids(i), pb.versions(i)};
1329
0
        delete_bitmap.delete_bitmap[key] =
1330
0
                roaring::Roaring::read(pb.segment_delete_bitmaps(i).data());
1331
0
    }
1332
0
    return delete_bitmap;
1333
0
}
1334
1335
0
DeleteBitmapPB DeleteBitmap::to_pb() {
1336
0
    std::shared_lock l(lock);
1337
0
    DeleteBitmapPB ret;
1338
0
    for (const auto& [k, v] : delete_bitmap) {
1339
0
        ret.mutable_rowset_ids()->Add(std::get<0>(k).to_string());
1340
0
        ret.mutable_segment_ids()->Add(std::get<1>(k));
1341
0
        ret.mutable_versions()->Add(std::get<2>(k));
1342
0
        std::string bitmap_data(v.getSizeInBytes(), '\0');
1343
0
        v.write(bitmap_data.data());
1344
0
        ret.mutable_segment_delete_bitmaps()->Add(std::move(bitmap_data));
1345
0
    }
1346
0
    return ret;
1347
0
}
1348
1349
2.66k
DeleteBitmap DeleteBitmap::snapshot() const {
1350
2.66k
    std::shared_lock l(lock);
1351
2.66k
    return DeleteBitmap(*this);
1352
2.66k
}
1353
1354
43
DeleteBitmap DeleteBitmap::snapshot(Version version) const {
1355
    // Take snapshot first, then remove keys greater than given version.
1356
43
    DeleteBitmap snapshot = this->snapshot();
1357
43
    auto it = snapshot.delete_bitmap.begin();
1358
452
    while (it != snapshot.delete_bitmap.end()) {
1359
409
        if (std::get<2>(it->first) > version) {
1360
4
            it = snapshot.delete_bitmap.erase(it);
1361
405
        } else {
1362
405
            it++;
1363
405
        }
1364
409
    }
1365
43
    return snapshot;
1366
43
}
1367
1368
4.78M
void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) {
1369
4.78M
    std::lock_guard l(lock);
1370
4.78M
    delete_bitmap[bmk].add(row_id);
1371
4.78M
}
1372
1373
0
int DeleteBitmap::remove(const BitmapKey& bmk, uint32_t row_id) {
1374
0
    std::lock_guard l(lock);
1375
0
    auto it = delete_bitmap.find(bmk);
1376
0
    if (it == delete_bitmap.end()) return -1;
1377
0
    it->second.remove(row_id);
1378
0
    return 0;
1379
0
}
1380
1381
59.4k
void DeleteBitmap::remove(const BitmapKey& start, const BitmapKey& end) {
1382
59.4k
    std::lock_guard l(lock);
1383
81.7k
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) {
1384
73.8k
        auto& [k, _] = *it;
1385
73.8k
        if (k >= end) {
1386
51.5k
            break;
1387
51.5k
        }
1388
22.2k
        it = delete_bitmap.erase(it);
1389
22.2k
    }
1390
59.4k
}
1391
1392
1.25k
void DeleteBitmap::remove(const std::vector<std::tuple<BitmapKey, BitmapKey>>& key_ranges) {
1393
1.25k
    std::lock_guard l(lock);
1394
1.25k
    for (auto& [start, end] : key_ranges) {
1395
2.56k
        for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) {
1396
2.56k
            auto& [k, _] = *it;
1397
2.56k
            if (k >= end) {
1398
1.25k
                break;
1399
1.25k
            }
1400
1.31k
            it = delete_bitmap.erase(it);
1401
1.31k
        }
1402
1.25k
    }
1403
1.25k
}
1404
1405
3.80M
bool DeleteBitmap::contains(const BitmapKey& bmk, uint32_t row_id) const {
1406
3.80M
    std::shared_lock l(lock);
1407
3.80M
    auto it = delete_bitmap.find(bmk);
1408
3.80M
    return it != delete_bitmap.end() && it->second.contains(row_id);
1409
3.80M
}
1410
1411
2
bool DeleteBitmap::contains_agg(const BitmapKey& bmk, uint32_t row_id) const {
1412
2
    return get_agg(bmk)->contains(row_id);
1413
2
}
1414
1415
0
bool DeleteBitmap::empty() const {
1416
0
    std::shared_lock l(lock);
1417
0
    return delete_bitmap.empty();
1418
0
}
1419
1420
142k
uint64_t DeleteBitmap::cardinality() const {
1421
142k
    std::shared_lock l(lock);
1422
142k
    uint64_t res = 0;
1423
1.44M
    for (auto entry : delete_bitmap) {
1424
1.44M
        if (std::get<1>(entry.first) != DeleteBitmap::INVALID_SEGMENT_ID) {
1425
62.3k
            res += entry.second.cardinality();
1426
62.3k
        }
1427
1.44M
    }
1428
142k
    return res;
1429
142k
}
1430
1431
6
uint64_t DeleteBitmap::get_size() const {
1432
6
    std::shared_lock l(lock);
1433
6
    uint64_t charge = 0;
1434
44
    for (auto& [k, v] : delete_bitmap) {
1435
44
        if (std::get<1>(k) != DeleteBitmap::INVALID_SEGMENT_ID) {
1436
44
            charge += v.getSizeInBytes();
1437
44
        }
1438
44
    }
1439
6
    return charge;
1440
6
}
1441
1442
bool DeleteBitmap::contains_agg_with_cache_if_eligible(const BitmapKey& bmk,
1443
3.80M
                                                       uint32_t row_id) const {
1444
3.80M
    g_contains_agg_with_cache_if_eligible_total << 1;
1445
3.80M
    int64_t start_version {0};
1446
3.82M
    if (config::enable_mow_get_agg_by_cache) {
1447
3.82M
        auto deleter = [&](Cache::Handle* handle) {
1448
1.41M
            DeleteBitmapAggCache::instance()->release(handle);
1449
1.41M
        };
1450
3.82M
        std::unique_ptr<Cache::Handle, decltype(deleter)> dbm_handle(nullptr, deleter);
1451
3.82M
        int64_t cached_version = 0;
1452
        // 1. try to lookup the desired key directly
1453
3.82M
        dbm_handle.reset(DeleteBitmapAggCache::instance()->lookup(agg_cache_key(_tablet_id, bmk)));
1454
3.82M
        if (dbm_handle != nullptr) {
1455
1.41M
            cached_version = std::get<2>(bmk);
1456
2.41M
        } else {
1457
            // 2. if not found, try to lookup with cached version
1458
2.41M
            cached_version = _get_rowset_cache_version(bmk);
1459
2.41M
            if (cached_version > 0) {
1460
580
                if (cached_version > std::get<2>(bmk)) {
1461
66
                    cached_version = 0;
1462
514
                } else {
1463
514
                    dbm_handle.reset(DeleteBitmapAggCache::instance()->lookup(agg_cache_key(
1464
514
                            _tablet_id, {std::get<0>(bmk), std::get<1>(bmk), cached_version})));
1465
514
                }
1466
580
            }
1467
2.41M
        }
1468
3.82M
        if (dbm_handle != nullptr) {
1469
1.40M
            const auto& cached_dbm =
1470
1.40M
                    reinterpret_cast<DeleteBitmapAggCache::Value*>(
1471
1.40M
                            DeleteBitmapAggCache::instance()->value(dbm_handle.get()))
1472
1.40M
                            ->bitmap;
1473
1.41M
            if (cached_version == std::get<2>(bmk)) {
1474
1.41M
                g_contains_agg_with_cache_if_eligible_full_hit << 1;
1475
18.4E
            } else {
1476
18.4E
                g_contains_agg_with_cache_if_eligible_partial_hit << 1;
1477
18.4E
            }
1478
1.40M
            if (cached_dbm.contains(row_id)) {
1479
299
                return true;
1480
299
            }
1481
1.41M
            if (cached_version == std::get<2>(bmk)) {
1482
1.41M
                return false;
1483
1.41M
            }
1484
18.4E
            start_version = cached_version + 1;
1485
18.4E
        }
1486
3.82M
    }
1487
2.38M
    DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), start_version};
1488
2.38M
    std::shared_lock l(lock);
1489
2.39M
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1490
1.01M
        auto& [k, bm] = *it;
1491
1.01M
        if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) ||
1492
1.01M
            std::get<2>(k) > std::get<2>(bmk)) {
1493
1.01M
            break;
1494
1.01M
        }
1495
993
        if (bm.contains(row_id)) {
1496
20
            return true;
1497
20
        }
1498
993
    }
1499
2.38M
    return false;
1500
2.38M
}
1501
1502
14
void DeleteBitmap::remove_sentinel_marks() {
1503
14
    std::lock_guard l(lock);
1504
130
    for (auto it = delete_bitmap.begin(), end = delete_bitmap.end(); it != end;) {
1505
116
        if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) {
1506
113
            it = delete_bitmap.erase(it);
1507
113
        } else {
1508
3
            ++it;
1509
3
        }
1510
116
    }
1511
14
}
1512
1513
11.7k
int DeleteBitmap::set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
1514
11.7k
    std::lock_guard l(lock);
1515
11.7k
    auto [_, inserted] = delete_bitmap.insert_or_assign(bmk, segment_delete_bitmap);
1516
11.7k
    return inserted;
1517
11.7k
}
1518
1519
7
int DeleteBitmap::get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const {
1520
7
    std::shared_lock l(lock);
1521
7
    auto it = delete_bitmap.find(bmk);
1522
7
    if (it == delete_bitmap.end()) return -1;
1523
7
    *segment_delete_bitmap = it->second; // copy
1524
7
    return 0;
1525
7
}
1526
1527
54
const roaring::Roaring* DeleteBitmap::get(const BitmapKey& bmk) const {
1528
54
    std::shared_lock l(lock);
1529
54
    auto it = delete_bitmap.find(bmk);
1530
54
    if (it == delete_bitmap.end()) return nullptr;
1531
41
    return &(it->second); // get address
1532
54
}
1533
1534
void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end,
1535
37.4k
                          DeleteBitmap* subset_rowset_map) const {
1536
37.4k
    DCHECK(start < end);
1537
37.4k
    std::shared_lock l(lock);
1538
46.5k
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1539
24.4k
        auto& [k, bm] = *it;
1540
24.4k
        if (k >= end) {
1541
15.3k
            break;
1542
15.3k
        }
1543
9.13k
        subset_rowset_map->set(k, bm);
1544
9.13k
    }
1545
37.4k
}
1546
1547
void DeleteBitmap::subset(std::vector<std::pair<RowsetId, int64_t>>& rowset_ids,
1548
                          int64_t start_version, int64_t end_version,
1549
0
                          DeleteBitmap* subset_delete_map) const {
1550
0
    DCHECK(start_version <= end_version);
1551
0
    for (auto& [rowset_id, _] : rowset_ids) {
1552
0
        BitmapKey start {rowset_id, 0, 0};
1553
0
        BitmapKey end {rowset_id, UINT32_MAX, end_version + 1};
1554
0
        std::shared_lock l(lock);
1555
0
        for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1556
0
            auto& [k, bm] = *it;
1557
0
            if (k >= end) {
1558
0
                break;
1559
0
            }
1560
0
            auto version = std::get<2>(k);
1561
0
            if (version >= start_version && version <= end_version) {
1562
0
                subset_delete_map->merge(k, bm);
1563
0
                VLOG_DEBUG << "subset delete bitmap, tablet=" << _tablet_id << ", version=["
1564
0
                           << start_version << ", " << end_version
1565
0
                           << "]. rowset=" << std::get<0>(k).to_string()
1566
0
                           << ", segment=" << std::get<1>(k) << ", version=" << version
1567
0
                           << ", cardinality=" << bm.cardinality();
1568
0
            }
1569
0
        }
1570
0
    }
1571
0
}
1572
1573
void DeleteBitmap::subset_and_agg(std::vector<std::pair<RowsetId, int64_t>>& rowset_ids,
1574
                                  int64_t start_version, int64_t end_version,
1575
1
                                  DeleteBitmap* subset_delete_map) const {
1576
1
    DCHECK(start_version <= end_version);
1577
2
    for (auto& [rowset_id, segment_num] : rowset_ids) {
1578
6
        for (int64_t seg_id = 0; seg_id < segment_num; ++seg_id) {
1579
4
            BitmapKey end {rowset_id, seg_id, end_version};
1580
4
            auto bm = get_agg_without_cache(end, start_version);
1581
4
            VLOG_DEBUG << "subset delete bitmap, tablet=" << _tablet_id << ", rowset=" << rowset_id
1582
0
                       << ", segment=" << seg_id << ", version=[" << start_version << "-"
1583
0
                       << end_version << "], cardinality=" << bm->cardinality();
1584
4
            if (bm->isEmpty()) {
1585
0
                continue;
1586
0
            }
1587
4
            subset_delete_map->merge(end, *bm);
1588
4
        }
1589
2
    }
1590
1
}
1591
1592
704
size_t DeleteBitmap::get_count_with_range(const BitmapKey& start, const BitmapKey& end) const {
1593
704
    DCHECK(start < end);
1594
704
    size_t count = 0;
1595
704
    std::shared_lock l(lock);
1596
884
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1597
549
        auto& [k, bm] = *it;
1598
549
        if (k >= end) {
1599
369
            break;
1600
369
        }
1601
180
        count++;
1602
180
    }
1603
704
    return count;
1604
704
}
1605
1606
20.6k
void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
1607
20.6k
    std::lock_guard l(lock);
1608
20.6k
    auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap);
1609
20.6k
    if (!succ) {
1610
0
        iter->second |= segment_delete_bitmap;
1611
0
    }
1612
20.6k
}
1613
1614
61.6k
void DeleteBitmap::merge(const DeleteBitmap& other) {
1615
61.6k
    std::lock_guard l(lock);
1616
61.6k
    for (auto& i : other.delete_bitmap) {
1617
2.77k
        auto [j, succ] = this->delete_bitmap.insert(i);
1618
2.77k
        if (!succ) j->second |= i.second;
1619
2.77k
    }
1620
61.6k
}
1621
1622
807k
uint64_t DeleteBitmap::get_delete_bitmap_count() {
1623
807k
    std::shared_lock l(lock);
1624
807k
    uint64_t count = 0;
1625
1.38M
    for (auto it = delete_bitmap.begin(); it != delete_bitmap.end(); it++) {
1626
575k
        if (std::get<1>(it->first) != DeleteBitmap::INVALID_SEGMENT_ID) {
1627
57.8k
            count++;
1628
57.8k
        }
1629
575k
    }
1630
807k
    return count;
1631
807k
}
1632
1633
void DeleteBitmap::traverse_rowset_and_version(
1634
0
        const std::function<int(const RowsetId& rowsetId, int64_t version)>& func) const {
1635
0
    std::shared_lock l(lock);
1636
0
    auto it = delete_bitmap.cbegin();
1637
0
    while (it != delete_bitmap.cend()) {
1638
0
        RowsetId rowset_id = std::get<0>(it->first);
1639
0
        int64_t version = std::get<2>(it->first);
1640
0
        int result = func(rowset_id, version);
1641
0
        if (result == -2) {
1642
            // find next <rowset, version>
1643
0
            it++;
1644
0
        } else {
1645
            // find next <rowset>
1646
0
            it = delete_bitmap.upper_bound({rowset_id, std::numeric_limits<SegmentId>::max(),
1647
0
                                            std::numeric_limits<Version>::max()});
1648
0
        }
1649
0
    }
1650
0
}
1651
1652
0
bool DeleteBitmap::has_calculated_for_multi_segments(const RowsetId& rowset_id) const {
1653
0
    return contains({rowset_id, INVALID_SEGMENT_ID, TEMP_VERSION_COMMON}, ROWSET_SENTINEL_MARK);
1654
0
}
1655
1656
78.0k
size_t DeleteBitmap::remove_rowset_cache_version(const RowsetId& rowset_id) {
1657
78.0k
    std::lock_guard l(_rowset_cache_version_lock);
1658
78.0k
    _rowset_cache_version.erase(rowset_id);
1659
18.4E
    VLOG_DEBUG << "remove agg cache version for tablet=" << _tablet_id
1660
18.4E
               << ", rowset=" << rowset_id.to_string();
1661
78.0k
    return _rowset_cache_version.size();
1662
78.0k
}
1663
1664
40
void DeleteBitmap::clear_rowset_cache_version() {
1665
40
    std::lock_guard l(_rowset_cache_version_lock);
1666
40
    _rowset_cache_version.clear();
1667
40
    VLOG_DEBUG << "clear agg cache version for tablet=" << _tablet_id;
1668
40
}
1669
1670
0
std::set<std::string> DeleteBitmap::get_rowset_cache_version() {
1671
0
    std::set<std::string> set;
1672
0
    std::shared_lock l(_rowset_cache_version_lock);
1673
0
    for (auto& [k, _] : _rowset_cache_version) {
1674
0
        set.insert(k.to_string());
1675
0
    }
1676
0
    return set;
1677
0
}
1678
1679
2.48M
DeleteBitmap::Version DeleteBitmap::_get_rowset_cache_version(const BitmapKey& bmk) const {
1680
2.48M
    std::shared_lock l(_rowset_cache_version_lock);
1681
2.48M
    if (auto it = _rowset_cache_version.find(std::get<0>(bmk)); it != _rowset_cache_version.end()) {
1682
18.0k
        auto& segment_cache_version = it->second;
1683
18.0k
        if (auto it1 = segment_cache_version.find(std::get<1>(bmk));
1684
18.0k
            it1 != segment_cache_version.end()) {
1685
18.0k
            return it1->second;
1686
18.0k
        }
1687
18.0k
    }
1688
2.46M
    return 0;
1689
2.48M
}
1690
1691
2
DeleteBitmap DeleteBitmap::agg_cache_snapshot() {
1692
2
    return DeleteBitmapAggCache::instance()->snapshot(_tablet_id);
1693
2
}
1694
1695
734k
void DeleteBitmap::set_tablet_id(int64_t tablet_id) {
1696
734k
    _tablet_id = tablet_id;
1697
734k
}
1698
1699
2.01M
std::shared_ptr<roaring::Roaring> DeleteBitmap::get_agg(const BitmapKey& bmk) const {
1700
2.01M
    std::string key_str = agg_cache_key(_tablet_id, bmk); // Cache key container
1701
2.01M
    CacheKey key(key_str);
1702
2.01M
    Cache::Handle* handle = DeleteBitmapAggCache::instance()->lookup(key);
1703
1704
2.01M
    DeleteBitmapAggCache::Value* val =
1705
2.01M
            handle == nullptr ? nullptr
1706
2.01M
                              : reinterpret_cast<DeleteBitmapAggCache::Value*>(
1707
1.93M
                                        DeleteBitmapAggCache::instance()->value(handle));
1708
    // FIXME: do we need a mutex here to get rid of duplicated initializations
1709
    //        of cache entries in some cases?
1710
2.01M
    if (val == nullptr) { // Renew if needed, put a new Value to cache
1711
72.2k
        val = new DeleteBitmapAggCache::Value();
1712
72.2k
        Version start_version =
1713
72.2k
                config::enable_mow_get_agg_by_cache ? _get_rowset_cache_version(bmk) : 0;
1714
72.2k
        if (start_version > 0) {
1715
17.4k
            Cache::Handle* handle2 = DeleteBitmapAggCache::instance()->lookup(
1716
17.4k
                    agg_cache_key(_tablet_id, {std::get<0>(bmk), std::get<1>(bmk), start_version}));
1717
1718
17.4k
            DBUG_EXECUTE_IF("DeleteBitmap::get_agg.cache_miss", {
1719
17.4k
                if (handle2 != nullptr) {
1720
17.4k
                    auto p = dp->param("percent", 0.3);
1721
17.4k
                    std::mt19937 gen {std::random_device {}()};
1722
17.4k
                    std::bernoulli_distribution inject_fault {p};
1723
17.4k
                    if (inject_fault(gen)) {
1724
17.4k
                        LOG_INFO("injection DeleteBitmap::get_agg.cache_miss, tablet_id={}",
1725
17.4k
                                 _tablet_id);
1726
17.4k
                        handle2 = nullptr;
1727
17.4k
                    }
1728
17.4k
                }
1729
17.4k
            });
1730
17.4k
            if (handle2 == nullptr || start_version > std::get<2>(bmk)) {
1731
125
                start_version = 0;
1732
17.3k
            } else {
1733
17.3k
                val->bitmap |= reinterpret_cast<DeleteBitmapAggCache::Value*>(
1734
17.3k
                                       DeleteBitmapAggCache::instance()->value(handle2))
1735
17.3k
                                       ->bitmap;
1736
17.3k
                VLOG_DEBUG << "get agg cache version=" << start_version
1737
35
                           << " for tablet=" << _tablet_id
1738
35
                           << ", rowset=" << std::get<0>(bmk).to_string()
1739
35
                           << ", segment=" << std::get<1>(bmk);
1740
17.3k
                start_version += 1;
1741
17.3k
            }
1742
17.4k
            if (handle2 != nullptr) {
1743
17.4k
                DeleteBitmapAggCache::instance()->release(handle2);
1744
17.4k
            }
1745
17.4k
        }
1746
72.2k
        {
1747
72.2k
            std::shared_lock l(lock);
1748
72.2k
            DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), start_version};
1749
80.8k
            for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1750
35.4k
                auto& [k, bm] = *it;
1751
35.4k
                if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) ||
1752
35.4k
                    std::get<2>(k) > std::get<2>(bmk)) {
1753
26.8k
                    break;
1754
26.8k
                }
1755
8.59k
                val->bitmap |= bm;
1756
8.59k
            }
1757
72.2k
        }
1758
72.2k
        size_t charge = val->bitmap.getSizeInBytes() + sizeof(DeleteBitmapAggCache::Value);
1759
72.2k
        handle = DeleteBitmapAggCache::instance()->insert(key, val, charge, charge,
1760
72.2k
                                                          CachePriority::NORMAL);
1761
72.2k
        if (config::enable_mow_get_agg_by_cache && !val->bitmap.isEmpty()) {
1762
23.9k
            std::lock_guard l(_rowset_cache_version_lock);
1763
            // this version is already agg
1764
23.9k
            _rowset_cache_version[std::get<0>(bmk)][std::get<1>(bmk)] = std::get<2>(bmk);
1765
23.9k
            VLOG_DEBUG << "set agg cache version=" << std::get<2>(bmk)
1766
33
                       << " for tablet=" << _tablet_id
1767
33
                       << ", rowset=" << std::get<0>(bmk).to_string()
1768
33
                       << ", segment=" << std::get<1>(bmk);
1769
23.9k
        }
1770
72.2k
        if (start_version > 0 && config::enable_mow_get_agg_correctness_check_core) {
1771
0
            std::shared_ptr<roaring::Roaring> bitmap = get_agg_without_cache(bmk);
1772
0
            if (val->bitmap != *bitmap) {
1773
0
                CHECK(false) << ". get agg correctness check failed for tablet=" << _tablet_id
1774
0
                             << ", rowset=" << std::get<0>(bmk).to_string()
1775
0
                             << ", segment=" << std::get<1>(bmk) << ", version=" << std::get<2>(bmk)
1776
0
                             << ". start_version from cache=" << start_version
1777
0
                             << ", delete_bitmap cardinality with cache="
1778
0
                             << val->bitmap.cardinality()
1779
0
                             << ", delete_bitmap cardinality without cache="
1780
0
                             << bitmap->cardinality();
1781
0
            }
1782
0
        }
1783
72.2k
    }
1784
1785
    // It is natural for the cache to reclaim the underlying memory
1786
2.01M
    return std::shared_ptr<roaring::Roaring>(
1787
2.01M
            &val->bitmap, [handle](...) { DeleteBitmapAggCache::instance()->release(handle); });
1788
2.01M
}
1789
1790
std::shared_ptr<roaring::Roaring> DeleteBitmap::get_agg_without_cache(
1791
8.54k
        const BitmapKey& bmk, const int64_t start_version) const {
1792
8.54k
    std::shared_ptr<roaring::Roaring> bitmap = std::make_shared<roaring::Roaring>();
1793
8.54k
    std::shared_lock l(lock);
1794
8.54k
    DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), start_version};
1795
51.2k
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1796
49.8k
        auto& [k, bm] = *it;
1797
49.8k
        if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) ||
1798
49.8k
            std::get<2>(k) > std::get<2>(bmk)) {
1799
7.09k
            break;
1800
7.09k
        }
1801
42.7k
        *bitmap |= bm;
1802
42.7k
    }
1803
8.54k
    return bitmap;
1804
8.54k
}
1805
1806
0
DeleteBitmap DeleteBitmap::diffset(const std::set<BitmapKey>& key_set) const {
1807
0
    std::shared_lock l(lock);
1808
0
    auto diff_key_set_view =
1809
0
            delete_bitmap | std::ranges::views::transform([](const auto& kv) { return kv.first; }) |
1810
0
            std::ranges::views::filter(
1811
0
                    [&key_set](const auto& key) { return !key_set.contains(key); });
1812
1813
0
    DeleteBitmap dbm(_tablet_id);
1814
0
    for (const auto& key : diff_key_set_view) {
1815
0
        const auto* bitmap = get(key);
1816
0
        DCHECK_NE(bitmap, nullptr);
1817
0
        dbm.delete_bitmap[key] = *bitmap;
1818
0
    }
1819
0
    return dbm;
1820
0
}
1821
1822
0
std::string tablet_state_name(TabletState state) {
1823
0
    switch (state) {
1824
0
    case TABLET_NOTREADY:
1825
0
        return "TABLET_NOTREADY";
1826
1827
0
    case TABLET_RUNNING:
1828
0
        return "TABLET_RUNNING";
1829
1830
0
    case TABLET_TOMBSTONED:
1831
0
        return "TABLET_TOMBSTONED";
1832
1833
0
    case TABLET_STOPPED:
1834
0
        return "TABLET_STOPPED";
1835
1836
0
    case TABLET_SHUTDOWN:
1837
0
        return "TABLET_SHUTDOWN";
1838
1839
0
    default:
1840
0
        return "TabletState(" + std::to_string(state) + ")";
1841
0
    }
1842
0
}
1843
1844
#include "common/compile_check_end.h"
1845
} // namespace doris