Coverage Report

Created: 2025-07-27 01:13

/root/doris/be/src/olap/tablet_meta.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "olap/tablet_meta.h"
19
20
#include <gen_cpp/Descriptors_types.h>
21
#include <gen_cpp/Types_types.h>
22
#include <gen_cpp/olap_common.pb.h>
23
#include <gen_cpp/olap_file.pb.h>
24
#include <gen_cpp/segment_v2.pb.h>
25
#include <gen_cpp/types.pb.h>
26
#include <json2pb/pb_to_json.h>
27
#include <time.h>
28
29
#include <cstdint>
30
#include <memory>
31
#include <random>
32
#include <ranges>
33
#include <set>
34
#include <utility>
35
36
#include "cloud/cloud_meta_mgr.h"
37
#include "cloud/cloud_storage_engine.h"
38
#include "cloud/config.h"
39
#include "common/config.h"
40
#include "gutil/integral_types.h"
41
#include "io/fs/file_writer.h"
42
#include "io/fs/local_file_system.h"
43
#include "olap/data_dir.h"
44
#include "olap/file_header.h"
45
#include "olap/olap_common.h"
46
#include "olap/olap_define.h"
47
#include "olap/rowset/rowset.h"
48
#include "olap/rowset/rowset_meta_manager.h"
49
#include "olap/tablet_fwd.h"
50
#include "olap/tablet_meta_manager.h"
51
#include "olap/tablet_schema_cache.h"
52
#include "olap/utils.h"
53
#include "util/debug_points.h"
54
#include "util/mem_info.h"
55
#include "util/parse_util.h"
56
#include "util/string_util.h"
57
#include "util/time.h"
58
#include "util/uid_util.h"
59
60
using std::string;
61
using std::unordered_map;
62
using std::vector;
63
64
namespace doris {
65
#include "common/compile_check_begin.h"
66
using namespace ErrorCode;
67
68
TabletMetaSharedPtr TabletMeta::create(
69
        const TCreateTabletReq& request, const TabletUid& tablet_uid, uint64_t shard_id,
70
        uint32_t next_unique_id,
71
300
        const unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id) {
72
300
    std::optional<TBinlogConfig> binlog_config;
73
300
    if (request.__isset.binlog_config) {
74
0
        binlog_config = request.binlog_config;
75
0
    }
76
300
    TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format =
77
300
            request.inverted_index_file_storage_format;
78
79
    // We will discard this format. Don't make any further changes here.
80
300
    if (request.__isset.inverted_index_storage_format) {
81
300
        switch (request.inverted_index_storage_format) {
82
0
        case TInvertedIndexStorageFormat::V1:
83
0
            inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V1;
84
0
            break;
85
0
        case TInvertedIndexStorageFormat::V2:
86
0
            inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V2;
87
0
            break;
88
300
        default:
89
300
            break;
90
300
        }
91
300
    }
92
300
    return std::make_shared<TabletMeta>(
93
300
            request.table_id, request.partition_id, request.tablet_id, request.replica_id,
94
300
            request.tablet_schema.schema_hash, shard_id, request.tablet_schema, next_unique_id,
95
300
            col_ordinal_to_unique_id, tablet_uid,
96
300
            request.__isset.tablet_type ? request.tablet_type : TTabletType::TABLET_TYPE_DISK,
97
300
            request.compression_type, request.storage_policy_id,
98
300
            request.__isset.enable_unique_key_merge_on_write
99
300
                    ? request.enable_unique_key_merge_on_write
100
300
                    : false,
101
300
            std::move(binlog_config), request.compaction_policy,
102
300
            request.time_series_compaction_goal_size_mbytes,
103
300
            request.time_series_compaction_file_count_threshold,
104
300
            request.time_series_compaction_time_threshold_seconds,
105
300
            request.time_series_compaction_empty_rowsets_threshold,
106
300
            request.time_series_compaction_level_threshold, inverted_index_file_storage_format);
107
300
}
108
109
1.03k
TabletMeta::~TabletMeta() {
110
1.03k
    if (_handle) {
111
956
        TabletSchemaCache::instance()->release(_handle);
112
956
    }
113
1.03k
}
114
115
TabletMeta::TabletMeta()
116
        : _tablet_uid(0, 0),
117
          _schema(new TabletSchema),
118
541
          _delete_bitmap(new DeleteBitmap(_tablet_id)) {}
119
120
TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id,
121
                       int64_t replica_id, int32_t schema_hash, int32_t shard_id,
122
                       const TTabletSchema& tablet_schema, uint32_t next_unique_id,
123
                       const std::unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id,
124
                       TabletUid tablet_uid, TTabletType::type tabletType,
125
                       TCompressionType::type compression_type, int64_t storage_policy_id,
126
                       bool enable_unique_key_merge_on_write,
127
                       std::optional<TBinlogConfig> binlog_config, std::string compaction_policy,
128
                       int64_t time_series_compaction_goal_size_mbytes,
129
                       int64_t time_series_compaction_file_count_threshold,
130
                       int64_t time_series_compaction_time_threshold_seconds,
131
                       int64_t time_series_compaction_empty_rowsets_threshold,
132
                       int64_t time_series_compaction_level_threshold,
133
                       TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format)
134
        : _tablet_uid(0, 0),
135
          _schema(new TabletSchema),
136
466
          _delete_bitmap(new DeleteBitmap(tablet_id)) {
137
466
    TabletMetaPB tablet_meta_pb;
138
466
    tablet_meta_pb.set_table_id(table_id);
139
466
    tablet_meta_pb.set_partition_id(partition_id);
140
466
    tablet_meta_pb.set_tablet_id(tablet_id);
141
466
    tablet_meta_pb.set_replica_id(replica_id);
142
466
    tablet_meta_pb.set_schema_hash(schema_hash);
143
466
    tablet_meta_pb.set_shard_id(shard_id);
144
    // Persist the creation time, but it is not used
145
466
    tablet_meta_pb.set_creation_time(time(nullptr));
146
466
    tablet_meta_pb.set_cumulative_layer_point(-1);
147
466
    tablet_meta_pb.set_tablet_state(PB_RUNNING);
148
466
    *(tablet_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto();
149
466
    tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_DISK
150
466
                                           ? TabletTypePB::TABLET_TYPE_DISK
151
466
                                           : TabletTypePB::TABLET_TYPE_MEMORY);
152
466
    tablet_meta_pb.set_enable_unique_key_merge_on_write(enable_unique_key_merge_on_write);
153
466
    tablet_meta_pb.set_storage_policy_id(storage_policy_id);
154
466
    tablet_meta_pb.set_compaction_policy(compaction_policy);
155
466
    tablet_meta_pb.set_time_series_compaction_goal_size_mbytes(
156
466
            time_series_compaction_goal_size_mbytes);
157
466
    tablet_meta_pb.set_time_series_compaction_file_count_threshold(
158
466
            time_series_compaction_file_count_threshold);
159
466
    tablet_meta_pb.set_time_series_compaction_time_threshold_seconds(
160
466
            time_series_compaction_time_threshold_seconds);
161
466
    tablet_meta_pb.set_time_series_compaction_empty_rowsets_threshold(
162
466
            time_series_compaction_empty_rowsets_threshold);
163
466
    tablet_meta_pb.set_time_series_compaction_level_threshold(
164
466
            time_series_compaction_level_threshold);
165
466
    TabletSchemaPB* schema = tablet_meta_pb.mutable_schema();
166
466
    schema->set_num_short_key_columns(tablet_schema.short_key_column_count);
167
466
    schema->set_num_rows_per_row_block(config::default_num_rows_per_column_file_block);
168
466
    schema->set_sequence_col_idx(tablet_schema.sequence_col_idx);
169
466
    switch (tablet_schema.keys_type) {
170
40
    case TKeysType::DUP_KEYS:
171
40
        schema->set_keys_type(KeysType::DUP_KEYS);
172
40
        break;
173
301
    case TKeysType::UNIQUE_KEYS:
174
301
        schema->set_keys_type(KeysType::UNIQUE_KEYS);
175
301
        break;
176
64
    case TKeysType::AGG_KEYS:
177
64
        schema->set_keys_type(KeysType::AGG_KEYS);
178
64
        break;
179
61
    default:
180
61
        LOG(WARNING) << "unknown tablet keys type";
181
61
        break;
182
466
    }
183
    // compress_kind used to compress segment files
184
466
    schema->set_compress_kind(COMPRESS_LZ4);
185
186
    // compression_type used to compress segment page
187
466
    switch (compression_type) {
188
0
    case TCompressionType::NO_COMPRESSION:
189
0
        schema->set_compression_type(segment_v2::NO_COMPRESSION);
190
0
        break;
191
0
    case TCompressionType::SNAPPY:
192
0
        schema->set_compression_type(segment_v2::SNAPPY);
193
0
        break;
194
0
    case TCompressionType::LZ4:
195
0
        schema->set_compression_type(segment_v2::LZ4);
196
0
        break;
197
466
    case TCompressionType::LZ4F:
198
466
        schema->set_compression_type(segment_v2::LZ4F);
199
466
        break;
200
0
    case TCompressionType::ZLIB:
201
0
        schema->set_compression_type(segment_v2::ZLIB);
202
0
        break;
203
0
    case TCompressionType::ZSTD:
204
0
        schema->set_compression_type(segment_v2::ZSTD);
205
0
        break;
206
0
    default:
207
0
        schema->set_compression_type(segment_v2::LZ4F);
208
0
        break;
209
466
    }
210
211
466
    switch (inverted_index_file_storage_format) {
212
0
    case TInvertedIndexFileStorageFormat::V1:
213
0
        schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
214
0
        break;
215
466
    case TInvertedIndexFileStorageFormat::V2:
216
466
        schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
217
466
        break;
218
0
    default:
219
0
        schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
220
0
        break;
221
466
    }
222
223
466
    switch (tablet_schema.sort_type) {
224
0
    case TSortType::type::ZORDER:
225
0
        schema->set_sort_type(SortType::ZORDER);
226
0
        break;
227
466
    default:
228
466
        schema->set_sort_type(SortType::LEXICAL);
229
466
    }
230
466
    schema->set_sort_col_num(tablet_schema.sort_col_num);
231
466
    for (const auto& i : tablet_schema.cluster_key_idxes) {
232
2
        schema->add_cluster_key_idxes(i);
233
2
    }
234
466
    tablet_meta_pb.set_in_restore_mode(false);
235
236
    // set column information
237
466
    uint32_t col_ordinal = 0;
238
466
    bool has_bf_columns = false;
239
2.12k
    for (TColumn tcolumn : tablet_schema.columns) {
240
2.12k
        ColumnPB* column = schema->add_column();
241
2.12k
        uint32_t unique_id = -1;
242
2.12k
        if (tcolumn.col_unique_id >= 0) {
243
1
            unique_id = tcolumn.col_unique_id;
244
2.12k
        } else {
245
2.12k
            unique_id = col_ordinal_to_unique_id.at(col_ordinal);
246
2.12k
        }
247
2.12k
        col_ordinal++;
248
2.12k
        init_column_from_tcolumn(unique_id, tcolumn, column);
249
250
2.12k
        if (column->is_bf_column()) {
251
0
            has_bf_columns = true;
252
0
        }
253
254
2.12k
        if (tablet_schema.__isset.indexes) {
255
0
            for (auto& index : tablet_schema.indexes) {
256
0
                if (index.index_type == TIndexType::type::BITMAP) {
257
0
                    DCHECK_EQ(index.columns.size(), 1);
258
0
                    if (iequal(tcolumn.column_name, index.columns[0])) {
259
0
                        column->set_has_bitmap_index(true);
260
0
                        break;
261
0
                    }
262
0
                } else if (index.index_type == TIndexType::type::BLOOMFILTER ||
263
0
                           index.index_type == TIndexType::type::NGRAM_BF) {
264
0
                    DCHECK_EQ(index.columns.size(), 1);
265
0
                    if (iequal(tcolumn.column_name, index.columns[0])) {
266
0
                        column->set_is_bf_column(true);
267
0
                        break;
268
0
                    }
269
0
                }
270
0
            }
271
0
        }
272
2.12k
    }
273
274
    // copy index meta
275
466
    if (tablet_schema.__isset.indexes) {
276
0
        for (auto& index : tablet_schema.indexes) {
277
0
            TabletIndexPB* index_pb = schema->add_index();
278
0
            index_pb->set_index_id(index.index_id);
279
0
            index_pb->set_index_name(index.index_name);
280
            // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side
281
            // get column unique id by name
282
0
            for (auto column_name : index.columns) {
283
0
                for (auto column : schema->column()) {
284
0
                    if (iequal(column.name(), column_name)) {
285
0
                        index_pb->add_col_unique_id(column.unique_id());
286
0
                    }
287
0
                }
288
0
            }
289
0
            switch (index.index_type) {
290
0
            case TIndexType::BITMAP:
291
0
                index_pb->set_index_type(IndexType::BITMAP);
292
0
                break;
293
0
            case TIndexType::INVERTED:
294
0
                index_pb->set_index_type(IndexType::INVERTED);
295
0
                break;
296
0
            case TIndexType::BLOOMFILTER:
297
0
                index_pb->set_index_type(IndexType::BLOOMFILTER);
298
0
                break;
299
0
            case TIndexType::NGRAM_BF:
300
0
                index_pb->set_index_type(IndexType::NGRAM_BF);
301
0
                break;
302
0
            }
303
304
0
            if (index.__isset.properties) {
305
0
                auto properties = index_pb->mutable_properties();
306
0
                for (auto kv : index.properties) {
307
0
                    (*properties)[kv.first] = kv.second;
308
0
                }
309
0
            }
310
0
        }
311
0
    }
312
313
466
    schema->set_next_column_unique_id(next_unique_id);
314
466
    if (has_bf_columns && tablet_schema.__isset.bloom_filter_fpp) {
315
0
        schema->set_bf_fpp(tablet_schema.bloom_filter_fpp);
316
0
    }
317
318
466
    if (tablet_schema.__isset.is_in_memory) {
319
0
        schema->set_is_in_memory(tablet_schema.is_in_memory);
320
0
    }
321
322
466
    if (tablet_schema.__isset.disable_auto_compaction) {
323
0
        schema->set_disable_auto_compaction(tablet_schema.disable_auto_compaction);
324
0
    }
325
326
466
    if (tablet_schema.__isset.variant_enable_flatten_nested) {
327
466
        schema->set_variant_enable_flatten_nested(tablet_schema.variant_enable_flatten_nested);
328
466
    }
329
330
466
    if (tablet_schema.__isset.enable_single_replica_compaction) {
331
466
        schema->set_enable_single_replica_compaction(
332
466
                tablet_schema.enable_single_replica_compaction);
333
466
    }
334
335
466
    if (tablet_schema.__isset.delete_sign_idx) {
336
466
        schema->set_delete_sign_idx(tablet_schema.delete_sign_idx);
337
466
    }
338
466
    if (tablet_schema.__isset.store_row_column) {
339
466
        schema->set_store_row_column(tablet_schema.store_row_column);
340
466
    }
341
466
    if (tablet_schema.__isset.row_store_page_size) {
342
466
        schema->set_row_store_page_size(tablet_schema.row_store_page_size);
343
466
    }
344
466
    if (tablet_schema.__isset.storage_page_size) {
345
466
        schema->set_storage_page_size(tablet_schema.storage_page_size);
346
466
    }
347
466
    if (tablet_schema.__isset.skip_write_index_on_load) {
348
466
        schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load);
349
466
    }
350
466
    if (tablet_schema.__isset.row_store_col_cids) {
351
0
        schema->mutable_row_store_column_unique_ids()->Add(tablet_schema.row_store_col_cids.begin(),
352
0
                                                           tablet_schema.row_store_col_cids.end());
353
0
    }
354
466
    if (binlog_config.has_value()) {
355
0
        BinlogConfig tmp_binlog_config;
356
0
        tmp_binlog_config = binlog_config.value();
357
0
        tmp_binlog_config.to_pb(tablet_meta_pb.mutable_binlog_config());
358
0
    }
359
360
466
    init_from_pb(tablet_meta_pb);
361
466
}
362
363
TabletMeta::TabletMeta(const TabletMeta& b)
364
        : MetadataAdder(b),
365
          _table_id(b._table_id),
366
          _index_id(b._index_id),
367
          _partition_id(b._partition_id),
368
          _tablet_id(b._tablet_id),
369
          _replica_id(b._replica_id),
370
          _schema_hash(b._schema_hash),
371
          _shard_id(b._shard_id),
372
          _creation_time(b._creation_time),
373
          _cumulative_layer_point(b._cumulative_layer_point),
374
          _tablet_uid(b._tablet_uid),
375
          _tablet_type(b._tablet_type),
376
          _tablet_state(b._tablet_state),
377
          _schema(b._schema),
378
          _rs_metas(b._rs_metas),
379
          _stale_rs_metas(b._stale_rs_metas),
380
          _in_restore_mode(b._in_restore_mode),
381
          _preferred_rowset_type(b._preferred_rowset_type),
382
          _storage_policy_id(b._storage_policy_id),
383
          _cooldown_meta_id(b._cooldown_meta_id),
384
          _enable_unique_key_merge_on_write(b._enable_unique_key_merge_on_write),
385
          _delete_bitmap(b._delete_bitmap),
386
          _binlog_config(b._binlog_config),
387
          _compaction_policy(b._compaction_policy),
388
          _time_series_compaction_goal_size_mbytes(b._time_series_compaction_goal_size_mbytes),
389
          _time_series_compaction_file_count_threshold(
390
                  b._time_series_compaction_file_count_threshold),
391
          _time_series_compaction_time_threshold_seconds(
392
                  b._time_series_compaction_time_threshold_seconds),
393
          _time_series_compaction_empty_rowsets_threshold(
394
                  b._time_series_compaction_empty_rowsets_threshold),
395
0
          _time_series_compaction_level_threshold(b._time_series_compaction_level_threshold) {};
396
397
void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn,
398
2.12k
                                          ColumnPB* column) {
399
2.12k
    column->set_unique_id(unique_id);
400
2.12k
    column->set_name(tcolumn.column_name);
401
2.12k
    column->set_has_bitmap_index(tcolumn.has_bitmap_index);
402
2.12k
    column->set_is_auto_increment(tcolumn.is_auto_increment);
403
2.12k
    string data_type;
404
2.12k
    EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type);
405
2.12k
    column->set_type(data_type);
406
407
2.12k
    uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type,
408
2.12k
                                                             tcolumn.column_type.len);
409
2.12k
    column->set_length(length);
410
2.12k
    column->set_index_length(length);
411
2.12k
    column->set_precision(tcolumn.column_type.precision);
412
2.12k
    column->set_frac(tcolumn.column_type.scale);
413
414
2.12k
    if (tcolumn.__isset.result_is_nullable) {
415
0
        column->set_result_is_nullable(tcolumn.result_is_nullable);
416
0
    }
417
418
2.12k
    if (tcolumn.__isset.be_exec_version) {
419
2.12k
        column->set_be_exec_version(tcolumn.be_exec_version);
420
2.12k
    }
421
422
2.12k
    if (tcolumn.column_type.type == TPrimitiveType::VARCHAR ||
423
2.12k
        tcolumn.column_type.type == TPrimitiveType::STRING) {
424
101
        if (!tcolumn.column_type.__isset.index_len) {
425
101
            column->set_index_length(10);
426
101
        } else {
427
0
            column->set_index_length(tcolumn.column_type.index_len);
428
0
        }
429
101
    }
430
2.12k
    if (!tcolumn.is_key) {
431
1.14k
        column->set_is_key(false);
432
1.14k
        if (tcolumn.__isset.aggregation) {
433
0
            column->set_aggregation(tcolumn.aggregation);
434
1.14k
        } else {
435
1.14k
            string aggregation_type;
436
1.14k
            EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type);
437
1.14k
            column->set_aggregation(aggregation_type);
438
1.14k
        }
439
1.14k
    } else {
440
983
        column->set_is_key(true);
441
983
        column->set_aggregation("NONE");
442
983
    }
443
2.12k
    column->set_is_nullable(tcolumn.is_allow_null);
444
2.12k
    if (tcolumn.__isset.default_value) {
445
0
        column->set_default_value(tcolumn.default_value);
446
0
    }
447
2.12k
    if (tcolumn.__isset.is_bloom_filter_column) {
448
0
        column->set_is_bf_column(tcolumn.is_bloom_filter_column);
449
0
    }
450
2.12k
    for (size_t i = 0; i < tcolumn.children_column.size(); i++) {
451
0
        ColumnPB* children_column = column->add_children_columns();
452
0
        init_column_from_tcolumn(tcolumn.children_column[i].col_unique_id,
453
0
                                 tcolumn.children_column[i], children_column);
454
0
    }
455
2.12k
}
456
457
0
void TabletMeta::remove_rowset_delete_bitmap(const RowsetId& rowset_id, const Version& version) {
458
0
    if (_enable_unique_key_merge_on_write) {
459
0
        delete_bitmap()->remove({rowset_id, 0, 0}, {rowset_id, UINT32_MAX, 0});
460
0
        if (config::enable_mow_verbose_log) {
461
0
            LOG_INFO("delete rowset delete bitmap. tablet={}, rowset={}, version={}", tablet_id(),
462
0
                     rowset_id.to_string(), version.to_string());
463
0
        }
464
0
        size_t rowset_cache_version_size = delete_bitmap()->remove_rowset_cache_version(rowset_id);
465
0
        _check_mow_rowset_cache_version_size(rowset_cache_version_size);
466
0
    }
467
0
}
468
469
4
Status TabletMeta::create_from_file(const string& file_path) {
470
4
    TabletMetaPB tablet_meta_pb;
471
4
    RETURN_IF_ERROR(load_from_file(file_path, &tablet_meta_pb));
472
4
    init_from_pb(tablet_meta_pb);
473
4
    return Status::OK();
474
4
}
475
476
10
Status TabletMeta::load_from_file(const string& file_path, TabletMetaPB* tablet_meta_pb) {
477
10
    FileHeader<TabletMetaPB> file_header(file_path);
478
    // In file_header.deserialize(), it validates file length, signature, checksum of protobuf.
479
10
    RETURN_IF_ERROR(file_header.deserialize());
480
10
    try {
481
10
        tablet_meta_pb->CopyFrom(file_header.message());
482
10
    } catch (...) {
483
0
        return Status::Error<PARSE_PROTOBUF_ERROR>("fail to copy protocol buffer object. file={}",
484
0
                                                   file_path);
485
0
    }
486
10
    return Status::OK();
487
10
}
488
489
std::string TabletMeta::construct_header_file_path(const string& schema_hash_path,
490
3
                                                   int64_t tablet_id) {
491
3
    std::stringstream header_name_stream;
492
3
    header_name_stream << schema_hash_path << "/" << tablet_id << ".hdr";
493
3
    return header_name_stream.str();
494
3
}
495
496
0
Status TabletMeta::save_as_json(const string& file_path) {
497
0
    std::string json_meta;
498
0
    json2pb::Pb2JsonOptions json_options;
499
0
    json_options.pretty_json = true;
500
0
    json_options.bytes_to_base64 = true;
501
0
    to_json(&json_meta, json_options);
502
    // save to file
503
0
    io::FileWriterPtr file_writer;
504
0
    RETURN_IF_ERROR(io::global_local_filesystem()->create_file(file_path, &file_writer));
505
0
    RETURN_IF_ERROR(file_writer->append(json_meta));
506
0
    RETURN_IF_ERROR(file_writer->close());
507
0
    return Status::OK();
508
0
}
509
510
230
Status TabletMeta::save(const string& file_path) {
511
230
    TabletMetaPB tablet_meta_pb;
512
230
    to_meta_pb(&tablet_meta_pb);
513
230
    return TabletMeta::save(file_path, tablet_meta_pb);
514
230
}
515
516
234
Status TabletMeta::save(const string& file_path, const TabletMetaPB& tablet_meta_pb) {
517
234
    DCHECK(!file_path.empty());
518
234
    FileHeader<TabletMetaPB> file_header(file_path);
519
234
    try {
520
234
        file_header.mutable_message()->CopyFrom(tablet_meta_pb);
521
234
    } catch (...) {
522
0
        LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path;
523
0
        return Status::Error<ErrorCode::INTERNAL_ERROR>(
524
0
                "fail to copy protocol buffer object. file={}", file_path);
525
0
    }
526
234
    RETURN_IF_ERROR(file_header.prepare());
527
234
    RETURN_IF_ERROR(file_header.serialize());
528
234
    return Status::OK();
529
234
}
530
531
565
Status TabletMeta::save_meta(DataDir* data_dir) {
532
565
    std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
533
565
    return _save_meta(data_dir);
534
565
}
535
536
565
Status TabletMeta::_save_meta(DataDir* data_dir) {
537
    // check if tablet uid is valid
538
565
    if (_tablet_uid.hi == 0 && _tablet_uid.lo == 0) {
539
0
        LOG(FATAL) << "tablet_uid is invalid"
540
0
                   << " tablet=" << tablet_id() << " _tablet_uid=" << _tablet_uid.to_string();
541
0
    }
542
565
    string meta_binary;
543
544
565
    auto t1 = MonotonicMicros();
545
565
    serialize(&meta_binary);
546
565
    auto t2 = MonotonicMicros();
547
565
    Status status = TabletMetaManager::save(data_dir, tablet_id(), schema_hash(), meta_binary);
548
565
    if (!status.ok()) {
549
0
        LOG(FATAL) << "fail to save tablet_meta. status=" << status << ", tablet_id=" << tablet_id()
550
0
                   << ", schema_hash=" << schema_hash();
551
0
    }
552
565
    auto t3 = MonotonicMicros();
553
565
    auto cost = t3 - t1;
554
565
    if (cost > 1 * 1000 * 1000) {
555
0
        LOG(INFO) << "save tablet(" << tablet_id() << ") meta too slow. serialize cost " << t2 - t1
556
0
                  << "(us), serialized binary size: " << meta_binary.length()
557
0
                  << "(bytes), write rocksdb cost " << t3 - t2 << "(us)";
558
0
    }
559
565
    return status;
560
565
}
561
562
570
void TabletMeta::serialize(string* meta_binary) {
563
570
    TabletMetaPB tablet_meta_pb;
564
570
    to_meta_pb(&tablet_meta_pb);
565
570
    if (tablet_meta_pb.partition_id() <= 0) {
566
468
        LOG(WARNING) << "invalid partition id " << tablet_meta_pb.partition_id() << " tablet "
567
468
                     << tablet_meta_pb.tablet_id();
568
468
    }
569
570
    DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", {
570
570
        long partition_id = tablet_meta_pb.partition_id();
571
570
        tablet_meta_pb.set_partition_id(0);
572
570
        LOG(WARNING) << "set debug point TabletMeta::serialize::zero_partition_id old="
573
570
                     << partition_id << " new=" << tablet_meta_pb.DebugString();
574
570
    });
575
570
    bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary);
576
570
    if (!_rs_metas.empty() || !_stale_rs_metas.empty()) {
577
570
        _avg_rs_meta_serialize_size =
578
570
                meta_binary->length() / (_rs_metas.size() + _stale_rs_metas.size());
579
570
        if (meta_binary->length() > config::tablet_meta_serialize_size_limit ||
580
570
            !serialize_success) {
581
0
            int64_t origin_meta_size = meta_binary->length();
582
0
            int64_t stale_rowsets_num = tablet_meta_pb.stale_rs_metas().size();
583
0
            tablet_meta_pb.clear_stale_rs_metas();
584
0
            meta_binary->clear();
585
0
            serialize_success = tablet_meta_pb.SerializeToString(meta_binary);
586
0
            LOG(WARNING) << "tablet meta serialization size exceeds limit: "
587
0
                         << config::tablet_meta_serialize_size_limit
588
0
                         << " clean up stale rowsets, tablet id: " << tablet_id()
589
0
                         << " stale rowset num: " << stale_rowsets_num
590
0
                         << " serialization size before clean " << origin_meta_size
591
0
                         << " serialization size after clean " << meta_binary->length();
592
0
        }
593
570
    }
594
595
570
    if (!serialize_success) {
596
0
        LOG(FATAL) << "failed to serialize meta " << tablet_id();
597
0
    }
598
570
}
599
600
461
Status TabletMeta::deserialize(std::string_view meta_binary) {
601
461
    TabletMetaPB tablet_meta_pb;
602
461
    bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(),
603
461
                                                static_cast<int32_t>(meta_binary.size()));
604
461
    if (!parsed) {
605
0
        return Status::Error<INIT_FAILED>("parse tablet meta failed");
606
0
    }
607
461
    init_from_pb(tablet_meta_pb);
608
461
    return Status::OK();
609
461
}
610
611
959
void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) {
612
959
    _table_id = tablet_meta_pb.table_id();
613
959
    _index_id = tablet_meta_pb.index_id();
614
959
    _partition_id = tablet_meta_pb.partition_id();
615
959
    _tablet_id = tablet_meta_pb.tablet_id();
616
959
    _replica_id = tablet_meta_pb.replica_id();
617
959
    _schema_hash = tablet_meta_pb.schema_hash();
618
959
    _shard_id = tablet_meta_pb.shard_id();
619
959
    _creation_time = tablet_meta_pb.creation_time();
620
959
    _cumulative_layer_point = tablet_meta_pb.cumulative_layer_point();
621
959
    _tablet_uid = TabletUid(tablet_meta_pb.tablet_uid());
622
959
    _ttl_seconds = tablet_meta_pb.ttl_seconds();
623
959
    if (tablet_meta_pb.has_tablet_type()) {
624
938
        _tablet_type = tablet_meta_pb.tablet_type();
625
938
    } else {
626
21
        _tablet_type = TabletTypePB::TABLET_TYPE_DISK;
627
21
    }
628
629
    // init _tablet_state
630
959
    switch (tablet_meta_pb.tablet_state()) {
631
27
    case PB_NOTREADY:
632
27
        _tablet_state = TabletState::TABLET_NOTREADY;
633
27
        break;
634
707
    case PB_RUNNING:
635
707
        _tablet_state = TabletState::TABLET_RUNNING;
636
707
        break;
637
0
    case PB_TOMBSTONED:
638
0
        _tablet_state = TabletState::TABLET_TOMBSTONED;
639
0
        break;
640
0
    case PB_STOPPED:
641
0
        _tablet_state = TabletState::TABLET_STOPPED;
642
0
        break;
643
225
    case PB_SHUTDOWN:
644
225
        _tablet_state = TabletState::TABLET_SHUTDOWN;
645
225
        break;
646
0
    default:
647
0
        LOG(WARNING) << "tablet has no state. tablet=" << tablet_id()
648
0
                     << ", schema_hash=" << schema_hash();
649
959
    }
650
651
    // init _schema
652
959
    TabletSchemaSPtr schema = std::make_shared<TabletSchema>();
653
959
    schema->init_from_pb(tablet_meta_pb.schema());
654
959
    if (_handle) {
655
3
        TabletSchemaCache::instance()->release(_handle);
656
3
    }
657
959
    auto pair = TabletSchemaCache::instance()->insert(schema->to_key());
658
959
    _handle = pair.first;
659
959
    _schema = pair.second;
660
661
959
    if (tablet_meta_pb.has_enable_unique_key_merge_on_write()) {
662
938
        _enable_unique_key_merge_on_write = tablet_meta_pb.enable_unique_key_merge_on_write();
663
938
    }
664
665
    // init _rs_metas
666
10.8k
    for (auto& it : tablet_meta_pb.rs_metas()) {
667
10.8k
        RowsetMetaSharedPtr rs_meta(new RowsetMeta());
668
10.8k
        rs_meta->init_from_pb(it);
669
10.8k
        _rs_metas.push_back(std::move(rs_meta));
670
10.8k
    }
671
672
    // For mow table, delete bitmap of stale rowsets has not been persisted.
673
    // When be restart, query should not read the stale rowset, otherwise duplicate keys
674
    // will be read out. Therefore, we don't add them to _stale_rs_meta for mow table.
675
959
    if (!config::skip_loading_stale_rowset_meta && !_enable_unique_key_merge_on_write) {
676
924
        for (auto& it : tablet_meta_pb.stale_rs_metas()) {
677
0
            RowsetMetaSharedPtr rs_meta(new RowsetMeta());
678
0
            rs_meta->init_from_pb(it);
679
0
            _stale_rs_metas.push_back(std::move(rs_meta));
680
0
        }
681
924
    }
682
683
959
    if (tablet_meta_pb.has_in_restore_mode()) {
684
938
        _in_restore_mode = tablet_meta_pb.in_restore_mode();
685
938
    }
686
687
959
    if (tablet_meta_pb.has_preferred_rowset_type()) {
688
472
        _preferred_rowset_type = tablet_meta_pb.preferred_rowset_type();
689
472
    }
690
691
959
    _storage_policy_id = tablet_meta_pb.storage_policy_id();
692
959
    if (tablet_meta_pb.has_cooldown_meta_id()) {
693
0
        _cooldown_meta_id = tablet_meta_pb.cooldown_meta_id();
694
0
    }
695
696
959
    if (tablet_meta_pb.has_delete_bitmap()) {
697
0
        int rst_ids_size = tablet_meta_pb.delete_bitmap().rowset_ids_size();
698
0
        int seg_ids_size = tablet_meta_pb.delete_bitmap().segment_ids_size();
699
0
        int versions_size = tablet_meta_pb.delete_bitmap().versions_size();
700
0
        int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size();
701
0
        CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size &&
702
0
              seg_maps_size == versions_size);
703
0
        for (int i = 0; i < rst_ids_size; ++i) {
704
0
            RowsetId rst_id;
705
0
            rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i));
706
0
            auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i);
707
0
            auto ver = tablet_meta_pb.delete_bitmap().versions(i);
708
0
            auto bitmap = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps(i).data();
709
0
            delete_bitmap()->delete_bitmap[{rst_id, seg_id, ver}] = roaring::Roaring::read(bitmap);
710
0
        }
711
0
    }
712
713
959
    if (tablet_meta_pb.has_binlog_config()) {
714
470
        _binlog_config = tablet_meta_pb.binlog_config();
715
470
    }
716
959
    _compaction_policy = tablet_meta_pb.compaction_policy();
717
959
    _time_series_compaction_goal_size_mbytes =
718
959
            tablet_meta_pb.time_series_compaction_goal_size_mbytes();
719
959
    _time_series_compaction_file_count_threshold =
720
959
            tablet_meta_pb.time_series_compaction_file_count_threshold();
721
959
    _time_series_compaction_time_threshold_seconds =
722
959
            tablet_meta_pb.time_series_compaction_time_threshold_seconds();
723
959
    _time_series_compaction_empty_rowsets_threshold =
724
959
            tablet_meta_pb.time_series_compaction_empty_rowsets_threshold();
725
959
    _time_series_compaction_level_threshold =
726
959
            tablet_meta_pb.time_series_compaction_level_threshold();
727
959
}
728
729
809
void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) {
730
809
    tablet_meta_pb->set_table_id(table_id());
731
809
    tablet_meta_pb->set_index_id(index_id());
732
809
    tablet_meta_pb->set_partition_id(partition_id());
733
809
    tablet_meta_pb->set_tablet_id(tablet_id());
734
809
    tablet_meta_pb->set_replica_id(replica_id());
735
809
    tablet_meta_pb->set_schema_hash(schema_hash());
736
809
    tablet_meta_pb->set_shard_id(shard_id());
737
809
    tablet_meta_pb->set_creation_time(creation_time());
738
809
    tablet_meta_pb->set_cumulative_layer_point(cumulative_layer_point());
739
809
    *(tablet_meta_pb->mutable_tablet_uid()) = tablet_uid().to_proto();
740
809
    tablet_meta_pb->set_tablet_type(_tablet_type);
741
809
    tablet_meta_pb->set_ttl_seconds(_ttl_seconds);
742
809
    switch (tablet_state()) {
743
8
    case TABLET_NOTREADY:
744
8
        tablet_meta_pb->set_tablet_state(PB_NOTREADY);
745
8
        break;
746
323
    case TABLET_RUNNING:
747
323
        tablet_meta_pb->set_tablet_state(PB_RUNNING);
748
323
        break;
749
0
    case TABLET_TOMBSTONED:
750
0
        tablet_meta_pb->set_tablet_state(PB_TOMBSTONED);
751
0
        break;
752
0
    case TABLET_STOPPED:
753
0
        tablet_meta_pb->set_tablet_state(PB_STOPPED);
754
0
        break;
755
478
    case TABLET_SHUTDOWN:
756
478
        tablet_meta_pb->set_tablet_state(PB_SHUTDOWN);
757
478
        break;
758
809
    }
759
760
    // RowsetMetaPB is separated from TabletMetaPB
761
809
    if (!config::is_cloud_mode()) {
762
21.6k
        for (auto& rs : _rs_metas) {
763
21.6k
            rs->to_rowset_pb(tablet_meta_pb->add_rs_metas());
764
21.6k
        }
765
809
        for (auto rs : _stale_rs_metas) {
766
0
            rs->to_rowset_pb(tablet_meta_pb->add_stale_rs_metas());
767
0
        }
768
809
    }
769
770
809
    _schema->to_schema_pb(tablet_meta_pb->mutable_schema());
771
772
809
    tablet_meta_pb->set_in_restore_mode(in_restore_mode());
773
774
    // to avoid modify tablet meta to the greatest extend
775
809
    if (_preferred_rowset_type == BETA_ROWSET) {
776
809
        tablet_meta_pb->set_preferred_rowset_type(_preferred_rowset_type);
777
809
    }
778
809
    if (_storage_policy_id > 0) {
779
5
        tablet_meta_pb->set_storage_policy_id(_storage_policy_id);
780
5
    }
781
809
    if (_cooldown_meta_id.initialized()) {
782
5
        tablet_meta_pb->mutable_cooldown_meta_id()->CopyFrom(_cooldown_meta_id.to_proto());
783
5
    }
784
785
809
    tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write);
786
787
809
    if (_enable_unique_key_merge_on_write) {
788
4
        std::set<RowsetId> stale_rs_ids;
789
4
        for (const auto& rowset : _stale_rs_metas) {
790
0
            stale_rs_ids.insert(rowset->rowset_id());
791
0
        }
792
4
        DeleteBitmapPB* delete_bitmap_pb = tablet_meta_pb->mutable_delete_bitmap();
793
4
        for (auto& [id, bitmap] : delete_bitmap()->snapshot().delete_bitmap) {
794
2
            auto& [rowset_id, segment_id, ver] = id;
795
2
            if (stale_rs_ids.count(rowset_id) != 0) {
796
0
                continue;
797
0
            }
798
2
            delete_bitmap_pb->add_rowset_ids(rowset_id.to_string());
799
2
            delete_bitmap_pb->add_segment_ids(segment_id);
800
2
            delete_bitmap_pb->add_versions(ver);
801
2
            std::string bitmap_data(bitmap.getSizeInBytes(), '\0');
802
2
            bitmap.write(bitmap_data.data());
803
2
            *(delete_bitmap_pb->add_segment_delete_bitmaps()) = std::move(bitmap_data);
804
2
        }
805
4
    }
806
809
    _binlog_config.to_pb(tablet_meta_pb->mutable_binlog_config());
807
809
    tablet_meta_pb->set_compaction_policy(compaction_policy());
808
809
    tablet_meta_pb->set_time_series_compaction_goal_size_mbytes(
809
809
            time_series_compaction_goal_size_mbytes());
810
809
    tablet_meta_pb->set_time_series_compaction_file_count_threshold(
811
809
            time_series_compaction_file_count_threshold());
812
809
    tablet_meta_pb->set_time_series_compaction_time_threshold_seconds(
813
809
            time_series_compaction_time_threshold_seconds());
814
809
    tablet_meta_pb->set_time_series_compaction_empty_rowsets_threshold(
815
809
            time_series_compaction_empty_rowsets_threshold());
816
809
    tablet_meta_pb->set_time_series_compaction_level_threshold(
817
809
            time_series_compaction_level_threshold());
818
809
}
819
820
2
void TabletMeta::to_json(string* json_string, json2pb::Pb2JsonOptions& options) {
821
2
    TabletMetaPB tablet_meta_pb;
822
2
    to_meta_pb(&tablet_meta_pb);
823
2
    json2pb::ProtoMessageToJson(tablet_meta_pb, json_string, options);
824
2
}
825
826
124
Version TabletMeta::max_version() const {
827
124
    Version max_version = {-1, 0};
828
189
    for (auto& rs_meta : _rs_metas) {
829
189
        if (rs_meta->end_version() > max_version.second) {
830
187
            max_version = rs_meta->version();
831
187
        }
832
189
    }
833
124
    return max_version;
834
124
}
835
836
0
size_t TabletMeta::version_count_cross_with_range(const Version& range) const {
837
0
    size_t count = 0;
838
0
    for (const auto& rs_meta : _rs_metas) {
839
0
        if (!(range.first > rs_meta->version().second || range.second < rs_meta->version().first)) {
840
0
            count++;
841
0
        }
842
0
    }
843
0
    return count;
844
0
}
845
846
11.1k
Status TabletMeta::add_rs_meta(const RowsetMetaSharedPtr& rs_meta) {
847
    // check RowsetMeta is valid
848
357k
    for (auto& rs : _rs_metas) {
849
357k
        if (rs->version() == rs_meta->version()) {
850
0
            if (rs->rowset_id() != rs_meta->rowset_id()) {
851
0
                return Status::Error<PUSH_VERSION_ALREADY_EXIST>(
852
0
                        "version already exist. rowset_id={}, version={}, tablet={}",
853
0
                        rs->rowset_id().to_string(), rs->version().to_string(), tablet_id());
854
0
            } else {
855
                // rowsetid,version is equal, it is a duplicate req, skip it
856
0
                return Status::OK();
857
0
            }
858
0
        }
859
357k
    }
860
11.1k
    _rs_metas.push_back(rs_meta);
861
11.1k
    return Status::OK();
862
11.1k
}
863
864
0
void TabletMeta::add_rowsets_unchecked(const std::vector<RowsetSharedPtr>& to_add) {
865
0
    for (const auto& rs : to_add) {
866
0
        _rs_metas.push_back(rs->rowset_meta());
867
0
    }
868
0
}
869
870
void TabletMeta::delete_rs_meta_by_version(const Version& version,
871
0
                                           std::vector<RowsetMetaSharedPtr>* deleted_rs_metas) {
872
0
    size_t rowset_cache_version_size = 0;
873
0
    auto it = _rs_metas.begin();
874
0
    while (it != _rs_metas.end()) {
875
0
        if ((*it)->version() == version) {
876
0
            if (deleted_rs_metas != nullptr) {
877
0
                deleted_rs_metas->push_back(*it);
878
0
            }
879
0
            _rs_metas.erase(it);
880
0
            if (_enable_unique_key_merge_on_write) {
881
0
                rowset_cache_version_size =
882
0
                        _delete_bitmap->remove_rowset_cache_version((*it)->rowset_id());
883
0
            }
884
0
            return;
885
0
        } else {
886
0
            ++it;
887
0
        }
888
0
    }
889
0
    _check_mow_rowset_cache_version_size(rowset_cache_version_size);
890
0
}
891
892
void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add,
893
                                 const std::vector<RowsetMetaSharedPtr>& to_delete,
894
24
                                 bool same_version) {
895
24
    size_t rowset_cache_version_size = 0;
896
    // Remove to_delete rowsets from _rs_metas
897
24
    for (auto rs_to_del : to_delete) {
898
18
        auto it = _rs_metas.begin();
899
20
        while (it != _rs_metas.end()) {
900
20
            if (rs_to_del->version() == (*it)->version()) {
901
18
                _rs_metas.erase(it);
902
18
                if (_enable_unique_key_merge_on_write) {
903
1
                    rowset_cache_version_size =
904
1
                            _delete_bitmap->remove_rowset_cache_version((*it)->rowset_id());
905
1
                }
906
                // there should be only one rowset match the version
907
18
                break;
908
18
            } else {
909
2
                ++it;
910
2
            }
911
20
        }
912
18
    }
913
24
    if (!same_version) {
914
        // put to_delete rowsets in _stale_rs_metas.
915
6
        _stale_rs_metas.insert(_stale_rs_metas.end(), to_delete.begin(), to_delete.end());
916
6
    }
917
    // put to_add rowsets in _rs_metas.
918
24
    _rs_metas.insert(_rs_metas.end(), to_add.begin(), to_add.end());
919
24
    _check_mow_rowset_cache_version_size(rowset_cache_version_size);
920
24
}
921
922
// Use the passing "rs_metas" to replace the rs meta in this tablet meta
923
// Also clear the _stale_rs_metas because this tablet meta maybe copyied from
924
// an existing tablet before. Add after revise, only the passing "rs_metas"
925
// is needed.
926
5
void TabletMeta::revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas) {
927
5
    {
928
5
        std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
929
5
        _rs_metas = std::move(rs_metas);
930
5
        _stale_rs_metas.clear();
931
5
    }
932
5
    if (_enable_unique_key_merge_on_write) {
933
0
        _delete_bitmap->clear_rowset_cache_version();
934
0
    }
935
5
}
936
937
// This method should call after revise_rs_metas, since new rs_metas might be a subset
938
// of original tablet, we should revise the delete_bitmap according to current rowset.
939
//
940
// Delete bitmap is protected by Tablet::_meta_lock, we don't need to acquire the
941
// TabletMeta's _meta_lock
942
1
void TabletMeta::revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap) {
943
1
    _delete_bitmap = std::make_unique<DeleteBitmap>(tablet_id());
944
2
    for (auto rs : _rs_metas) {
945
2
        DeleteBitmap rs_bm(tablet_id());
946
2
        delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
947
2
                             &rs_bm);
948
2
        _delete_bitmap->merge(rs_bm);
949
2
    }
950
1
    for (auto rs : _stale_rs_metas) {
951
0
        DeleteBitmap rs_bm(tablet_id());
952
0
        delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX},
953
0
                             &rs_bm);
954
0
        _delete_bitmap->merge(rs_bm);
955
0
    }
956
1
}
957
958
0
void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) {
959
0
    auto it = _stale_rs_metas.begin();
960
0
    while (it != _stale_rs_metas.end()) {
961
0
        if ((*it)->version() == version) {
962
0
            it = _stale_rs_metas.erase(it);
963
0
        } else {
964
0
            it++;
965
0
        }
966
0
    }
967
0
}
968
969
0
RowsetMetaSharedPtr TabletMeta::acquire_rs_meta_by_version(const Version& version) const {
970
0
    for (auto it : _rs_metas) {
971
0
        if (it->version() == version) {
972
0
            return it;
973
0
        }
974
0
    }
975
0
    return nullptr;
976
0
}
977
978
8
RowsetMetaSharedPtr TabletMeta::acquire_stale_rs_meta_by_version(const Version& version) const {
979
8
    for (auto it : _stale_rs_metas) {
980
0
        if (it->version() == version) {
981
0
            return it;
982
0
        }
983
0
    }
984
8
    return nullptr;
985
8
}
986
987
23
Status TabletMeta::set_partition_id(int64_t partition_id) {
988
23
    if ((_partition_id > 0 && _partition_id != partition_id) || partition_id < 1) {
989
0
        LOG(WARNING) << "cur partition id=" << _partition_id << " new partition id=" << partition_id
990
0
                     << " not equal";
991
0
    }
992
23
    _partition_id = partition_id;
993
23
    return Status::OK();
994
23
}
995
996
0
void TabletMeta::clear_stale_rowset() {
997
0
    _stale_rs_metas.clear();
998
0
    if (_enable_unique_key_merge_on_write) {
999
0
        _delete_bitmap->clear_rowset_cache_version();
1000
0
    }
1001
0
}
1002
1003
0
void TabletMeta::clear_rowsets() {
1004
0
    _rs_metas.clear();
1005
0
    if (_enable_unique_key_merge_on_write) {
1006
0
        _delete_bitmap->clear_rowset_cache_version();
1007
0
    }
1008
0
}
1009
1010
24
void TabletMeta::_check_mow_rowset_cache_version_size(size_t rowset_cache_version_size) {
1011
24
    if (_enable_unique_key_merge_on_write && config::enable_mow_verbose_log &&
1012
24
        rowset_cache_version_size > _rs_metas.size() + _stale_rs_metas.size()) {
1013
0
        std::stringstream ss;
1014
0
        auto rowset_ids = _delete_bitmap->get_rowset_cache_version();
1015
0
        for (const auto& rowset_id : rowset_ids) {
1016
0
            bool found = false;
1017
0
            for (auto& rs_meta : _rs_metas) {
1018
0
                if (rs_meta->rowset_id() == rowset_id) {
1019
0
                    found = true;
1020
0
                    break;
1021
0
                }
1022
0
            }
1023
0
            if (found) {
1024
0
                continue;
1025
0
            }
1026
0
            for (auto& rs_meta : _stale_rs_metas) {
1027
0
                if (rs_meta->rowset_id() == rowset_id) {
1028
0
                    found = true;
1029
0
                    break;
1030
0
                }
1031
0
            }
1032
0
            if (!found) {
1033
0
                ss << rowset_id.to_string() << ", ";
1034
0
            }
1035
0
        }
1036
        // size(rowset_cache_version) <= size(_rs_metas) + size(_stale_rs_metas) + size(_unused_rs)
1037
0
        std::string msg = fmt::format(
1038
0
                "tablet: {}, rowset_cache_version size: {}, "
1039
0
                "_rs_metas size: {}, _stale_rs_metas size: {}, delta: {}. rowset only in cache: {}",
1040
0
                _tablet_id, rowset_cache_version_size, _rs_metas.size(), _stale_rs_metas.size(),
1041
0
                rowset_cache_version_size - _rs_metas.size() - _stale_rs_metas.size(), ss.str());
1042
0
        LOG(INFO) << msg;
1043
0
    }
1044
24
}
1045
1046
1
bool operator==(const TabletMeta& a, const TabletMeta& b) {
1047
1
    if (a._table_id != b._table_id) return false;
1048
1
    if (a._index_id != b._index_id) return false;
1049
1
    if (a._partition_id != b._partition_id) return false;
1050
1
    if (a._tablet_id != b._tablet_id) return false;
1051
1
    if (a._replica_id != b._replica_id) return false;
1052
1
    if (a._schema_hash != b._schema_hash) return false;
1053
1
    if (a._shard_id != b._shard_id) return false;
1054
1
    if (a._creation_time != b._creation_time) return false;
1055
1
    if (a._cumulative_layer_point != b._cumulative_layer_point) return false;
1056
1
    if (a._tablet_uid != b._tablet_uid) return false;
1057
1
    if (a._tablet_type != b._tablet_type) return false;
1058
1
    if (a._tablet_state != b._tablet_state) return false;
1059
1
    if (*a._schema != *b._schema) return false;
1060
1
    if (a._rs_metas.size() != b._rs_metas.size()) return false;
1061
1
    for (int i = 0; i < a._rs_metas.size(); ++i) {
1062
0
        if (a._rs_metas[i] != b._rs_metas[i]) return false;
1063
0
    }
1064
1
    if (a._in_restore_mode != b._in_restore_mode) return false;
1065
1
    if (a._preferred_rowset_type != b._preferred_rowset_type) return false;
1066
1
    if (a._storage_policy_id != b._storage_policy_id) return false;
1067
1
    if (a._compaction_policy != b._compaction_policy) return false;
1068
1
    if (a._time_series_compaction_goal_size_mbytes != b._time_series_compaction_goal_size_mbytes)
1069
0
        return false;
1070
1
    if (a._time_series_compaction_file_count_threshold !=
1071
1
        b._time_series_compaction_file_count_threshold)
1072
0
        return false;
1073
1
    if (a._time_series_compaction_time_threshold_seconds !=
1074
1
        b._time_series_compaction_time_threshold_seconds)
1075
0
        return false;
1076
1
    if (a._time_series_compaction_empty_rowsets_threshold !=
1077
1
        b._time_series_compaction_empty_rowsets_threshold)
1078
0
        return false;
1079
1
    if (a._time_series_compaction_level_threshold != b._time_series_compaction_level_threshold)
1080
0
        return false;
1081
1
    return true;
1082
1
}
1083
1084
0
bool operator!=(const TabletMeta& a, const TabletMeta& b) {
1085
0
    return !(a == b);
1086
0
}
1087
1088
DeleteBitmapAggCache::DeleteBitmapAggCache(size_t capacity)
1089
        : LRUCachePolicy(CachePolicy::CacheType::DELETE_BITMAP_AGG_CACHE, capacity,
1090
                         LRUCacheType::SIZE, config::delete_bitmap_agg_cache_stale_sweep_time_sec,
1091
1
                         256) {}
1092
1093
135
DeleteBitmapAggCache* DeleteBitmapAggCache::instance() {
1094
135
    return ExecEnv::GetInstance()->delete_bitmap_agg_cache();
1095
135
}
1096
1097
1
DeleteBitmapAggCache* DeleteBitmapAggCache::create_instance(size_t capacity) {
1098
1
    return new DeleteBitmapAggCache(capacity);
1099
1
}
1100
1101
1.03k
DeleteBitmap::DeleteBitmap(int64_t tablet_id) : _tablet_id(tablet_id) {}
1102
1103
7
DeleteBitmap::DeleteBitmap(const DeleteBitmap& o) {
1104
7
    std::shared_lock l1(o.lock);
1105
7
    delete_bitmap = o.delete_bitmap;
1106
7
    _tablet_id = o._tablet_id;
1107
7
}
1108
1109
0
DeleteBitmap& DeleteBitmap::operator=(const DeleteBitmap& o) {
1110
0
    if (this == &o) return *this;
1111
0
    if (this < &o) {
1112
0
        std::unique_lock l1(lock);
1113
0
        std::shared_lock l2(o.lock);
1114
0
        delete_bitmap = o.delete_bitmap;
1115
0
        _tablet_id = o._tablet_id;
1116
0
    } else {
1117
0
        std::shared_lock l2(o.lock);
1118
0
        std::unique_lock l1(lock);
1119
0
        delete_bitmap = o.delete_bitmap;
1120
0
        _tablet_id = o._tablet_id;
1121
0
    }
1122
0
    return *this;
1123
0
}
1124
1125
0
DeleteBitmap::DeleteBitmap(DeleteBitmap&& o) noexcept {
1126
0
    std::scoped_lock l(o.lock, o._rowset_cache_version_lock);
1127
0
    delete_bitmap = std::move(o.delete_bitmap);
1128
0
    _tablet_id = std::move(o._tablet_id);
1129
0
    o._rowset_cache_version.clear();
1130
0
}
1131
1132
0
DeleteBitmap& DeleteBitmap::operator=(DeleteBitmap&& o) noexcept {
1133
0
    if (this == &o) return *this;
1134
0
    std::scoped_lock l(lock, o.lock, o._rowset_cache_version_lock);
1135
0
    delete_bitmap = std::move(o.delete_bitmap);
1136
0
    _tablet_id = std::move(o._tablet_id);
1137
0
    o._rowset_cache_version.clear();
1138
0
    return *this;
1139
0
}
1140
1141
0
DeleteBitmap DeleteBitmap::from_pb(const DeleteBitmapPB& pb, int64_t tablet_id) {
1142
0
    size_t len = pb.rowset_ids().size();
1143
0
    DCHECK_EQ(len, pb.segment_ids().size());
1144
0
    DCHECK_EQ(len, pb.versions().size());
1145
0
    DeleteBitmap delete_bitmap(tablet_id);
1146
0
    for (int32_t i = 0; i < len; ++i) {
1147
0
        RowsetId rs_id;
1148
0
        rs_id.init(pb.rowset_ids(i));
1149
0
        BitmapKey key = {rs_id, pb.segment_ids(i), pb.versions(i)};
1150
0
        delete_bitmap.delete_bitmap[key] =
1151
0
                roaring::Roaring::read(pb.segment_delete_bitmaps(i).data());
1152
0
    }
1153
0
    return delete_bitmap;
1154
0
}
1155
1156
0
DeleteBitmapPB DeleteBitmap::to_pb() {
1157
0
    std::shared_lock l(lock);
1158
0
    DeleteBitmapPB ret;
1159
0
    for (const auto& [k, v] : delete_bitmap) {
1160
0
        ret.mutable_rowset_ids()->Add(std::get<0>(k).to_string());
1161
0
        ret.mutable_segment_ids()->Add(std::get<1>(k));
1162
0
        ret.mutable_versions()->Add(std::get<2>(k));
1163
0
        std::string bitmap_data(v.getSizeInBytes(), '\0');
1164
0
        v.write(bitmap_data.data());
1165
0
        ret.mutable_segment_delete_bitmaps()->Add(std::move(bitmap_data));
1166
0
    }
1167
0
    return ret;
1168
0
}
1169
1170
7
DeleteBitmap DeleteBitmap::snapshot() const {
1171
7
    std::shared_lock l(lock);
1172
7
    return DeleteBitmap(*this);
1173
7
}
1174
1175
3
DeleteBitmap DeleteBitmap::snapshot(Version version) const {
1176
    // Take snapshot first, then remove keys greater than given version.
1177
3
    DeleteBitmap snapshot = this->snapshot();
1178
3
    auto it = snapshot.delete_bitmap.begin();
1179
412
    while (it != snapshot.delete_bitmap.end()) {
1180
409
        if (std::get<2>(it->first) > version) {
1181
4
            it = snapshot.delete_bitmap.erase(it);
1182
405
        } else {
1183
405
            it++;
1184
405
        }
1185
409
    }
1186
3
    return snapshot;
1187
3
}
1188
1189
459k
void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) {
1190
459k
    std::lock_guard l(lock);
1191
459k
    delete_bitmap[bmk].add(row_id);
1192
459k
}
1193
1194
0
int DeleteBitmap::remove(const BitmapKey& bmk, uint32_t row_id) {
1195
0
    std::lock_guard l(lock);
1196
0
    auto it = delete_bitmap.find(bmk);
1197
0
    if (it == delete_bitmap.end()) return -1;
1198
0
    it->second.remove(row_id);
1199
0
    return 0;
1200
0
}
1201
1202
8
void DeleteBitmap::remove(const BitmapKey& start, const BitmapKey& end) {
1203
8
    std::lock_guard l(lock);
1204
107
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) {
1205
101
        auto& [k, _] = *it;
1206
101
        if (k >= end) {
1207
2
            break;
1208
2
        }
1209
99
        it = delete_bitmap.erase(it);
1210
99
    }
1211
8
}
1212
1213
6
bool DeleteBitmap::contains(const BitmapKey& bmk, uint32_t row_id) const {
1214
6
    std::shared_lock l(lock);
1215
6
    auto it = delete_bitmap.find(bmk);
1216
6
    return it != delete_bitmap.end() && it->second.contains(row_id);
1217
6
}
1218
1219
2
bool DeleteBitmap::contains_agg(const BitmapKey& bmk, uint32_t row_id) const {
1220
2
    return get_agg(bmk)->contains(row_id);
1221
2
}
1222
1223
0
bool DeleteBitmap::empty() const {
1224
0
    std::shared_lock l(lock);
1225
0
    return delete_bitmap.empty();
1226
0
}
1227
1228
63
uint64_t DeleteBitmap::cardinality() const {
1229
63
    std::shared_lock l(lock);
1230
63
    uint64_t res = 0;
1231
314
    for (auto entry : delete_bitmap) {
1232
314
        res += entry.second.cardinality();
1233
314
    }
1234
63
    return res;
1235
63
}
1236
1237
0
uint64_t DeleteBitmap::get_size() const {
1238
0
    std::shared_lock l(lock);
1239
0
    uint64_t charge = 0;
1240
0
    for (auto& [k, v] : delete_bitmap) {
1241
0
        charge += v.getSizeInBytes();
1242
0
    }
1243
0
    return charge;
1244
0
}
1245
1246
1
bool DeleteBitmap::contains_agg_without_cache(const BitmapKey& bmk, uint32_t row_id) const {
1247
1
    std::shared_lock l(lock);
1248
1
    DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), 0};
1249
1
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1250
0
        auto& [k, bm] = *it;
1251
0
        if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) ||
1252
0
            std::get<2>(k) > std::get<2>(bmk)) {
1253
0
            break;
1254
0
        }
1255
0
        if (bm.contains(row_id)) {
1256
0
            return true;
1257
0
        }
1258
0
    }
1259
1
    return false;
1260
1
}
1261
1262
0
void DeleteBitmap::remove_sentinel_marks() {
1263
0
    std::lock_guard l(lock);
1264
0
    for (auto it = delete_bitmap.begin(), end = delete_bitmap.end(); it != end;) {
1265
0
        if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) {
1266
0
            it = delete_bitmap.erase(it);
1267
0
        } else {
1268
0
            ++it;
1269
0
        }
1270
0
    }
1271
0
}
1272
1273
38
int DeleteBitmap::set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
1274
38
    std::lock_guard l(lock);
1275
38
    auto [_, inserted] = delete_bitmap.insert_or_assign(bmk, segment_delete_bitmap);
1276
38
    return inserted;
1277
38
}
1278
1279
3
int DeleteBitmap::get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const {
1280
3
    std::shared_lock l(lock);
1281
3
    auto it = delete_bitmap.find(bmk);
1282
3
    if (it == delete_bitmap.end()) return -1;
1283
3
    *segment_delete_bitmap = it->second; // copy
1284
3
    return 0;
1285
3
}
1286
1287
54
const roaring::Roaring* DeleteBitmap::get(const BitmapKey& bmk) const {
1288
54
    std::shared_lock l(lock);
1289
54
    auto it = delete_bitmap.find(bmk);
1290
54
    if (it == delete_bitmap.end()) return nullptr;
1291
41
    return &(it->second); // get address
1292
54
}
1293
1294
void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end,
1295
3
                          DeleteBitmap* subset_rowset_map) const {
1296
3
    roaring::Roaring roaring;
1297
3
    DCHECK(start < end);
1298
3
    std::shared_lock l(lock);
1299
26
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1300
25
        auto& [k, bm] = *it;
1301
25
        if (k >= end) {
1302
2
            break;
1303
2
        }
1304
23
        subset_rowset_map->set(k, bm);
1305
23
    }
1306
3
}
1307
1308
0
size_t DeleteBitmap::get_count_with_range(const BitmapKey& start, const BitmapKey& end) const {
1309
0
    DCHECK(start < end);
1310
0
    size_t count = 0;
1311
0
    std::shared_lock l(lock);
1312
0
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1313
0
        auto& [k, bm] = *it;
1314
0
        if (k >= end) {
1315
0
            break;
1316
0
        }
1317
0
        count++;
1318
0
    }
1319
0
    return count;
1320
0
}
1321
1322
2
void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) {
1323
2
    std::lock_guard l(lock);
1324
2
    auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap);
1325
2
    if (!succ) {
1326
0
        iter->second |= segment_delete_bitmap;
1327
0
    }
1328
2
}
1329
1330
9
void DeleteBitmap::merge(const DeleteBitmap& other) {
1331
9
    std::lock_guard l(lock);
1332
29
    for (auto& i : other.delete_bitmap) {
1333
29
        auto [j, succ] = this->delete_bitmap.insert(i);
1334
29
        if (!succ) j->second |= i.second;
1335
29
    }
1336
9
}
1337
1338
void DeleteBitmap::add_to_remove_queue(
1339
        const std::string& version_str,
1340
        const std::vector<std::tuple<int64_t, DeleteBitmap::BitmapKey, DeleteBitmap::BitmapKey>>&
1341
0
                vector) {
1342
0
    std::shared_lock l(stale_delete_bitmap_lock);
1343
0
    _stale_delete_bitmap.emplace(version_str, vector);
1344
0
}
1345
1346
0
void DeleteBitmap::remove_stale_delete_bitmap_from_queue(const std::vector<std::string>& vector) {
1347
0
    if (!config::enable_delete_bitmap_merge_on_compaction) {
1348
0
        return;
1349
0
    }
1350
0
    std::shared_lock l(stale_delete_bitmap_lock);
1351
    //<rowset_id, start_version, end_version>
1352
0
    std::vector<std::tuple<std::string, uint64_t, uint64_t>> to_delete;
1353
0
    int64_t tablet_id = -1;
1354
0
    for (auto& version_str : vector) {
1355
0
        auto it = _stale_delete_bitmap.find(version_str);
1356
0
        if (it != _stale_delete_bitmap.end()) {
1357
0
            auto delete_bitmap_vector = it->second;
1358
0
            for (auto& delete_bitmap_tuple : it->second) {
1359
0
                if (tablet_id < 0) {
1360
0
                    tablet_id = std::get<0>(delete_bitmap_tuple);
1361
0
                }
1362
0
                auto start_bmk = std::get<1>(delete_bitmap_tuple);
1363
0
                auto end_bmk = std::get<2>(delete_bitmap_tuple);
1364
                // the key range of to be removed is [start_bmk,end_bmk),
1365
                // due to the different definitions of the right boundary,
1366
                // so use end_bmk as right boundary when removing local delete bitmap,
1367
                // use (end_bmk - 1) as right boundary when removing ms delete bitmap
1368
0
                remove(start_bmk, end_bmk);
1369
0
                to_delete.emplace_back(std::make_tuple(std::get<0>(start_bmk).to_string(), 0,
1370
0
                                                       std::get<2>(end_bmk) - 1));
1371
0
            }
1372
0
            _stale_delete_bitmap.erase(version_str);
1373
0
        }
1374
0
    }
1375
0
    if (tablet_id == -1 || to_delete.empty()) {
1376
0
        return;
1377
0
    }
1378
0
    CloudStorageEngine& engine = ExecEnv::GetInstance()->storage_engine().to_cloud();
1379
0
    auto st = engine.meta_mgr().remove_old_version_delete_bitmap(tablet_id, to_delete);
1380
0
    if (!st.ok()) {
1381
0
        LOG(WARNING) << "fail to remove_stale_delete_bitmap_from_queue for tablet=" << tablet_id
1382
0
                     << ",st=" << st;
1383
0
    }
1384
0
}
1385
1386
63
uint64_t DeleteBitmap::get_delete_bitmap_count() {
1387
63
    std::shared_lock l(lock);
1388
63
    uint64_t count = 0;
1389
377
    for (auto it = delete_bitmap.begin(); it != delete_bitmap.end(); it++) {
1390
314
        if (std::get<1>(it->first) != DeleteBitmap::INVALID_SEGMENT_ID) {
1391
314
            count++;
1392
314
        }
1393
314
    }
1394
63
    return count;
1395
63
}
1396
1397
0
bool DeleteBitmap::has_calculated_for_multi_segments(const RowsetId& rowset_id) const {
1398
0
    return contains({rowset_id, INVALID_SEGMENT_ID, TEMP_VERSION_COMMON}, ROWSET_SENTINEL_MARK);
1399
0
}
1400
1401
1
size_t DeleteBitmap::remove_rowset_cache_version(const RowsetId& rowset_id) {
1402
1
    std::lock_guard l(_rowset_cache_version_lock);
1403
1
    _rowset_cache_version.erase(rowset_id);
1404
1
    VLOG_DEBUG << "remove agg cache version for tablet=" << _tablet_id
1405
0
               << ", rowset=" << rowset_id.to_string();
1406
1
    return _rowset_cache_version.size();
1407
1
}
1408
1409
0
void DeleteBitmap::clear_rowset_cache_version() {
1410
0
    std::lock_guard l(_rowset_cache_version_lock);
1411
0
    _rowset_cache_version.clear();
1412
0
    VLOG_DEBUG << "clear agg cache version for tablet=" << _tablet_id;
1413
0
}
1414
1415
0
std::set<RowsetId> DeleteBitmap::get_rowset_cache_version() {
1416
0
    std::set<RowsetId> set;
1417
0
    std::shared_lock l(_rowset_cache_version_lock);
1418
0
    for (auto& [k, _] : _rowset_cache_version) {
1419
0
        set.insert(k);
1420
0
    }
1421
0
    return set;
1422
0
}
1423
1424
38
DeleteBitmap::Version DeleteBitmap::_get_rowset_cache_version(const BitmapKey& bmk) const {
1425
38
    std::shared_lock l(_rowset_cache_version_lock);
1426
38
    if (auto it = _rowset_cache_version.find(std::get<0>(bmk)); it != _rowset_cache_version.end()) {
1427
30
        auto& segment_cache_version = it->second;
1428
30
        if (auto it1 = segment_cache_version.find(std::get<1>(bmk));
1429
30
            it1 != segment_cache_version.end()) {
1430
1
            return it1->second;
1431
1
        }
1432
30
    }
1433
37
    return 0;
1434
38
}
1435
1436
// We cannot just copy the underlying memory to construct a string
1437
// due to equivalent objects may have different padding bytes.
1438
// Reading padding bytes is undefined behavior, neither copy nor
1439
// placement new will help simplify the code.
1440
// Refer to C11 standards §6.2.6.1/6 and §6.7.9/21 for more info.
1441
45
static std::string agg_cache_key(int64_t tablet_id, const DeleteBitmap::BitmapKey& bmk) {
1442
45
    std::string ret(sizeof(tablet_id) + sizeof(bmk), '\0');
1443
45
    *reinterpret_cast<int64_t*>(ret.data()) = tablet_id;
1444
45
    auto t = reinterpret_cast<DeleteBitmap::BitmapKey*>(ret.data() + sizeof(tablet_id));
1445
45
    std::get<RowsetId>(*t).version = std::get<RowsetId>(bmk).version;
1446
45
    std::get<RowsetId>(*t).hi = std::get<RowsetId>(bmk).hi;
1447
45
    std::get<RowsetId>(*t).mi = std::get<RowsetId>(bmk).mi;
1448
45
    std::get<RowsetId>(*t).lo = std::get<RowsetId>(bmk).lo;
1449
45
    std::get<1>(*t) = std::get<1>(bmk);
1450
45
    std::get<2>(*t) = std::get<2>(bmk);
1451
45
    return ret;
1452
45
}
1453
1454
44
std::shared_ptr<roaring::Roaring> DeleteBitmap::get_agg(const BitmapKey& bmk) const {
1455
44
    std::string key_str = agg_cache_key(_tablet_id, bmk); // Cache key container
1456
44
    CacheKey key(key_str);
1457
44
    Cache::Handle* handle = DeleteBitmapAggCache::instance()->lookup(key);
1458
1459
44
    DeleteBitmapAggCache::Value* val =
1460
44
            handle == nullptr ? nullptr
1461
44
                              : reinterpret_cast<DeleteBitmapAggCache::Value*>(
1462
6
                                        DeleteBitmapAggCache::instance()->value(handle));
1463
    // FIXME: do we need a mutex here to get rid of duplicated initializations
1464
    //        of cache entries in some cases?
1465
44
    if (val == nullptr) { // Renew if needed, put a new Value to cache
1466
38
        val = new DeleteBitmapAggCache::Value();
1467
38
        Version start_version =
1468
38
                config::enable_mow_get_agg_by_cache ? _get_rowset_cache_version(bmk) : 0;
1469
38
        if (start_version > 0) {
1470
1
            Cache::Handle* handle2 = DeleteBitmapAggCache::instance()->lookup(
1471
1
                    agg_cache_key(_tablet_id, {std::get<0>(bmk), std::get<1>(bmk), start_version}));
1472
1473
1
            DBUG_EXECUTE_IF("DeleteBitmap::get_agg.cache_miss", {
1474
1
                if (handle2 != nullptr) {
1475
1
                    auto p = dp->param("percent", 0.3);
1476
1
                    std::mt19937 gen {std::random_device {}()};
1477
1
                    std::bernoulli_distribution inject_fault {p};
1478
1
                    if (inject_fault(gen)) {
1479
1
                        LOG_INFO("injection DeleteBitmap::get_agg.cache_miss, tablet_id={}",
1480
1
                                 _tablet_id);
1481
1
                        handle2 = nullptr;
1482
1
                    }
1483
1
                }
1484
1
            });
1485
1
            if (handle2 == nullptr || start_version > std::get<2>(bmk)) {
1486
0
                start_version = 0;
1487
1
            } else {
1488
1
                val->bitmap |= reinterpret_cast<DeleteBitmapAggCache::Value*>(
1489
1
                                       DeleteBitmapAggCache::instance()->value(handle2))
1490
1
                                       ->bitmap;
1491
1
                VLOG_DEBUG << "get agg cache version=" << start_version
1492
0
                           << " for tablet=" << _tablet_id
1493
0
                           << ", rowset=" << std::get<0>(bmk).to_string()
1494
0
                           << ", segment=" << std::get<1>(bmk);
1495
1
                start_version += 1;
1496
1
            }
1497
1
            if (handle2 != nullptr) {
1498
1
                DeleteBitmapAggCache::instance()->release(handle2);
1499
1
            }
1500
1
        }
1501
38
        {
1502
38
            std::shared_lock l(lock);
1503
38
            DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), start_version};
1504
66
            for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1505
63
                auto& [k, bm] = *it;
1506
63
                if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) ||
1507
63
                    std::get<2>(k) > std::get<2>(bmk)) {
1508
35
                    break;
1509
35
                }
1510
28
                val->bitmap |= bm;
1511
28
            }
1512
38
        }
1513
38
        size_t charge = val->bitmap.getSizeInBytes() + sizeof(DeleteBitmapAggCache::Value);
1514
38
        handle = DeleteBitmapAggCache::instance()->insert(key, val, charge, charge,
1515
38
                                                          CachePriority::NORMAL);
1516
38
        if (config::enable_mow_get_agg_by_cache && !val->bitmap.isEmpty()) {
1517
27
            std::lock_guard l(_rowset_cache_version_lock);
1518
            // this version is already agg
1519
27
            _rowset_cache_version[std::get<0>(bmk)][std::get<1>(bmk)] = std::get<2>(bmk);
1520
27
            VLOG_DEBUG << "set agg cache version=" << std::get<2>(bmk)
1521
0
                       << " for tablet=" << _tablet_id
1522
0
                       << ", rowset=" << std::get<0>(bmk).to_string()
1523
0
                       << ", segment=" << std::get<1>(bmk);
1524
27
        }
1525
38
        if (start_version > 0 && config::enable_mow_get_agg_correctness_check_core) {
1526
0
            std::shared_ptr<roaring::Roaring> bitmap = get_agg_without_cache(bmk);
1527
0
            if (val->bitmap != *bitmap) {
1528
0
                CHECK(false) << ". get agg correctness check failed for tablet=" << _tablet_id
1529
0
                             << ", rowset=" << std::get<0>(bmk).to_string()
1530
0
                             << ", segment=" << std::get<1>(bmk) << ", version=" << std::get<2>(bmk)
1531
0
                             << ". start_version from cache=" << start_version
1532
0
                             << ", delete_bitmap cardinality with cache="
1533
0
                             << val->bitmap.cardinality()
1534
0
                             << ", delete_bitmap cardinality without cache="
1535
0
                             << bitmap->cardinality();
1536
0
            }
1537
0
        }
1538
38
    }
1539
1540
    // It is natural for the cache to reclaim the underlying memory
1541
44
    return std::shared_ptr<roaring::Roaring>(
1542
44
            &val->bitmap, [handle](...) { DeleteBitmapAggCache::instance()->release(handle); });
1543
44
}
1544
1545
0
std::shared_ptr<roaring::Roaring> DeleteBitmap::get_agg_without_cache(const BitmapKey& bmk) const {
1546
0
    std::shared_ptr<roaring::Roaring> bitmap = std::make_shared<roaring::Roaring>();
1547
0
    std::shared_lock l(lock);
1548
0
    DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), 0};
1549
0
    for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) {
1550
0
        auto& [k, bm] = *it;
1551
0
        if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) ||
1552
0
            std::get<2>(k) > std::get<2>(bmk)) {
1553
0
            break;
1554
0
        }
1555
0
        *bitmap |= bm;
1556
0
    }
1557
0
    return bitmap;
1558
0
}
1559
1560
0
DeleteBitmap DeleteBitmap::diffset(const std::set<BitmapKey>& key_set) const {
1561
0
    std::shared_lock l(lock);
1562
0
    auto diff_key_set_view =
1563
0
            delete_bitmap | std::ranges::views::transform([](const auto& kv) { return kv.first; }) |
1564
0
            std::ranges::views::filter(
1565
0
                    [&key_set](const auto& key) { return !key_set.contains(key); });
1566
1567
0
    DeleteBitmap dbm(_tablet_id);
1568
0
    for (const auto& key : diff_key_set_view) {
1569
0
        const auto* bitmap = get(key);
1570
0
        DCHECK_NE(bitmap, nullptr);
1571
0
        dbm.delete_bitmap[key] = *bitmap;
1572
0
    }
1573
0
    return dbm;
1574
0
}
1575
1576
0
std::string tablet_state_name(TabletState state) {
1577
0
    switch (state) {
1578
0
    case TABLET_NOTREADY:
1579
0
        return "TABLET_NOTREADY";
1580
1581
0
    case TABLET_RUNNING:
1582
0
        return "TABLET_RUNNING";
1583
1584
0
    case TABLET_TOMBSTONED:
1585
0
        return "TABLET_TOMBSTONED";
1586
1587
0
    case TABLET_STOPPED:
1588
0
        return "TABLET_STOPPED";
1589
1590
0
    case TABLET_SHUTDOWN:
1591
0
        return "TABLET_SHUTDOWN";
1592
1593
0
    default:
1594
0
        return "TabletState(" + std::to_string(state) + ")";
1595
0
    }
1596
0
}
1597
1598
#include "common/compile_check_end.h"
1599
} // namespace doris