Coverage Report

Created: 2026-05-18 13:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/olap_common.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/Types_types.h>
21
#include <netinet/in.h>
22
23
#include <atomic>
24
#include <charconv>
25
#include <cstdint>
26
#include <functional>
27
#include <list>
28
#include <map>
29
#include <memory>
30
#include <ostream>
31
#include <sstream>
32
#include <string>
33
#include <typeinfo>
34
#include <unordered_map>
35
#include <unordered_set>
36
#include <utility>
37
38
#include "common/cast_set.h"
39
#include "common/config.h"
40
#include "common/exception.h"
41
#include "io/io_common.h"
42
#include "storage/index/inverted/inverted_index_stats.h"
43
#include "storage/olap_define.h"
44
#include "storage/rowset/rowset_fwd.h"
45
#include "util/hash_util.hpp"
46
#include "util/time.h"
47
#include "util/uid_util.h"
48
49
namespace doris {
50
static constexpr int64_t MAX_ROWSET_ID = 1L << 56;
51
static constexpr int64_t LOW_56_BITS = 0x00ffffffffffffff;
52
53
using SchemaHash = int32_t;
54
using int128_t = __int128;
55
using uint128_t = unsigned __int128;
56
57
using TabletUid = UniqueId;
58
59
enum CompactionType { BASE_COMPACTION = 1, CUMULATIVE_COMPACTION = 2, FULL_COMPACTION = 3 };
60
61
enum DataDirType {
62
    SPILL_DISK_DIR,
63
    OLAP_DATA_DIR,
64
    DATA_CACHE_DIR,
65
};
66
67
struct DataDirInfo {
68
    std::string path;
69
    size_t path_hash = 0;
70
    int64_t disk_capacity = 1; // actual disk capacity
71
    int64_t available = 0;     // available space, in bytes unit
72
    int64_t local_used_capacity = 0;
73
    int64_t remote_used_capacity = 0;
74
    int64_t trash_used_capacity = 0;
75
    bool is_used = false;                                      // whether available mark
76
    TStorageMedium::type storage_medium = TStorageMedium::HDD; // Storage medium type: SSD|HDD
77
    DataDirType data_dir_type = DataDirType::OLAP_DATA_DIR;
78
    std::string metric_name;
79
};
80
81
// Sort DataDirInfo by available space.
82
struct DataDirInfoLessAvailability {
83
23
    bool operator()(const DataDirInfo& left, const DataDirInfo& right) const {
84
23
        return left.available < right.available;
85
23
    }
86
};
87
88
struct TabletInfo {
89
    TabletInfo(TTabletId in_tablet_id, UniqueId in_uid)
90
1.86M
            : tablet_id(in_tablet_id), tablet_uid(in_uid) {}
91
92
10.8M
    bool operator<(const TabletInfo& right) const {
93
10.8M
        if (tablet_id != right.tablet_id) {
94
10.6M
            return tablet_id < right.tablet_id;
95
10.6M
        } else {
96
198k
            return tablet_uid < right.tablet_uid;
97
198k
        }
98
10.8M
    }
99
100
88
    std::string to_string() const {
101
88
        std::stringstream ss;
102
88
        ss << tablet_id << "." << tablet_uid.to_string();
103
88
        return ss.str();
104
88
    }
105
106
    TTabletId tablet_id;
107
    UniqueId tablet_uid;
108
};
109
110
struct TabletSize {
111
    TabletSize(TTabletId in_tablet_id, size_t in_tablet_size)
112
0
            : tablet_id(in_tablet_id), tablet_size(in_tablet_size) {}
113
114
    TTabletId tablet_id;
115
    size_t tablet_size;
116
};
117
118
// Define all data types supported by StorageField.
119
// If new filed_type is defined, not only new TypeInfo may need be defined,
120
// but also some functions like get_type_info in types.cpp need to be changed.
121
enum class FieldType {
122
    OLAP_FIELD_TYPE_TINYINT = 1, // MYSQL_TYPE_TINY
123
    OLAP_FIELD_TYPE_UNSIGNED_TINYINT = 2,
124
    OLAP_FIELD_TYPE_SMALLINT = 3, // MYSQL_TYPE_SHORT
125
    OLAP_FIELD_TYPE_UNSIGNED_SMALLINT = 4,
126
    OLAP_FIELD_TYPE_INT = 5, // MYSQL_TYPE_LONG
127
    OLAP_FIELD_TYPE_UNSIGNED_INT = 6,
128
    OLAP_FIELD_TYPE_BIGINT = 7, // MYSQL_TYPE_LONGLONG
129
    OLAP_FIELD_TYPE_UNSIGNED_BIGINT = 8,
130
    OLAP_FIELD_TYPE_LARGEINT = 9,
131
    OLAP_FIELD_TYPE_FLOAT = 10,  // MYSQL_TYPE_FLOAT
132
    OLAP_FIELD_TYPE_DOUBLE = 11, // MYSQL_TYPE_DOUBLE
133
    OLAP_FIELD_TYPE_DISCRETE_DOUBLE = 12,
134
    OLAP_FIELD_TYPE_CHAR = 13,     // MYSQL_TYPE_STRING
135
    OLAP_FIELD_TYPE_DATE = 14,     // MySQL_TYPE_NEWDATE
136
    OLAP_FIELD_TYPE_DATETIME = 15, // MySQL_TYPE_DATETIME
137
    OLAP_FIELD_TYPE_DECIMAL = 16,  // DECIMAL, using different store format against MySQL
138
    OLAP_FIELD_TYPE_VARCHAR = 17,
139
140
    OLAP_FIELD_TYPE_STRUCT = 18,  // Struct
141
    OLAP_FIELD_TYPE_ARRAY = 19,   // ARRAY
142
    OLAP_FIELD_TYPE_MAP = 20,     // Map
143
    OLAP_FIELD_TYPE_UNKNOWN = 21, // UNKNOW OLAP_FIELD_TYPE_STRING
144
    OLAP_FIELD_TYPE_NONE = 22,
145
    OLAP_FIELD_TYPE_HLL = 23,
146
    OLAP_FIELD_TYPE_BOOL = 24,
147
    OLAP_FIELD_TYPE_BITMAP = 25,
148
    OLAP_FIELD_TYPE_STRING = 26,
149
    OLAP_FIELD_TYPE_QUANTILE_STATE = 27,
150
    OLAP_FIELD_TYPE_DATEV2 = 28,
151
    OLAP_FIELD_TYPE_DATETIMEV2 = 29,
152
    OLAP_FIELD_TYPE_TIMEV2 = 30,
153
    OLAP_FIELD_TYPE_DECIMAL32 = 31,
154
    OLAP_FIELD_TYPE_DECIMAL64 = 32,
155
    OLAP_FIELD_TYPE_DECIMAL128I = 33,
156
    OLAP_FIELD_TYPE_JSONB = 34,
157
    OLAP_FIELD_TYPE_VARIANT = 35,
158
    OLAP_FIELD_TYPE_AGG_STATE = 36,
159
    OLAP_FIELD_TYPE_DECIMAL256 = 37,
160
    OLAP_FIELD_TYPE_IPV4 = 38,
161
    OLAP_FIELD_TYPE_IPV6 = 39,
162
    OLAP_FIELD_TYPE_TIMESTAMPTZ = 40,
163
};
164
165
// Define all aggregation methods supported by StorageField
166
// Note that in practice, not all types can use all the following aggregation methods
167
// For example, it is meaningless to use SUM for the string type (but it will not cause the program to crash)
168
// The implementation of the StorageField class does not perform such checks, and should be constrained when creating the table
169
enum class FieldAggregationMethod {
170
    OLAP_FIELD_AGGREGATION_NONE = 0,
171
    OLAP_FIELD_AGGREGATION_SUM = 1,
172
    OLAP_FIELD_AGGREGATION_MIN = 2,
173
    OLAP_FIELD_AGGREGATION_MAX = 3,
174
    OLAP_FIELD_AGGREGATION_REPLACE = 4,
175
    OLAP_FIELD_AGGREGATION_HLL_UNION = 5,
176
    OLAP_FIELD_AGGREGATION_UNKNOWN = 6,
177
    OLAP_FIELD_AGGREGATION_BITMAP_UNION = 7,
178
    // Replace if and only if added value is not null
179
    OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL = 8,
180
    OLAP_FIELD_AGGREGATION_QUANTILE_UNION = 9,
181
    OLAP_FIELD_AGGREGATION_GENERIC = 10
182
};
183
184
enum class PushType {
185
    PUSH_NORMAL = 1,          // for broker/hadoop load, not used any more
186
    PUSH_FOR_DELETE = 2,      // for delete
187
    PUSH_FOR_LOAD_DELETE = 3, // not used any more
188
    PUSH_NORMAL_V2 = 4,       // for spark load
189
};
190
191
3.83M
constexpr bool field_is_slice_type(const FieldType& field_type) {
192
3.83M
    return field_type == FieldType::OLAP_FIELD_TYPE_VARCHAR ||
193
3.83M
           field_type == FieldType::OLAP_FIELD_TYPE_CHAR ||
194
3.83M
           field_type == FieldType::OLAP_FIELD_TYPE_STRING;
195
3.83M
}
196
197
22
constexpr bool field_is_numeric_type(const FieldType& field_type) {
198
22
    return field_type == FieldType::OLAP_FIELD_TYPE_INT ||
199
22
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT ||
200
22
           field_type == FieldType::OLAP_FIELD_TYPE_BIGINT ||
201
22
           field_type == FieldType::OLAP_FIELD_TYPE_SMALLINT ||
202
22
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_TINYINT ||
203
22
           field_type == FieldType::OLAP_FIELD_TYPE_UNSIGNED_SMALLINT ||
204
22
           field_type == FieldType::OLAP_FIELD_TYPE_TINYINT ||
205
22
           field_type == FieldType::OLAP_FIELD_TYPE_DOUBLE ||
206
22
           field_type == FieldType::OLAP_FIELD_TYPE_FLOAT ||
207
22
           field_type == FieldType::OLAP_FIELD_TYPE_DATE ||
208
22
           field_type == FieldType::OLAP_FIELD_TYPE_DATEV2 ||
209
22
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIME ||
210
22
           field_type == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 ||
211
22
           field_type == FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ ||
212
22
           field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT ||
213
22
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL ||
214
22
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL32 ||
215
22
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL64 ||
216
22
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL128I ||
217
22
           field_type == FieldType::OLAP_FIELD_TYPE_DECIMAL256 ||
218
22
           field_type == FieldType::OLAP_FIELD_TYPE_BOOL ||
219
22
           field_type == FieldType::OLAP_FIELD_TYPE_IPV4 ||
220
22
           field_type == FieldType::OLAP_FIELD_TYPE_IPV6;
221
22
}
222
223
// <start_version_id, end_version_id>, such as <100, 110>
224
//using Version = std::pair<TupleVersion, TupleVersion>;
225
226
struct Version {
227
    int64_t first;
228
    int64_t second;
229
230
22.5M
    Version(int64_t first_, int64_t second_) : first(first_), second(second_) {}
231
5.45M
    Version() : first(0), second(0) {}
232
233
0
    static Version mock() {
234
        // Every time SchemaChange is used for external rowing, some temporary versions (such as 999, 1000, 1001) will be written, in order to avoid Cache conflicts, temporary
235
        // The version number takes a BIG NUMBER plus the version number of the current SchemaChange
236
0
        return Version(1 << 28, 1 << 29);
237
0
    }
238
239
    friend std::ostream& operator<<(std::ostream& os, const Version& version);
240
241
7.48k
    bool operator!=(const Version& rhs) const { return first != rhs.first || second != rhs.second; }
242
243
3.41M
    bool operator==(const Version& rhs) const { return first == rhs.first && second == rhs.second; }
244
245
757k
    bool contains(const Version& other) const {
246
757k
        return first <= other.first && second >= other.second;
247
757k
    }
248
249
58.6k
    std::string to_string() const { return fmt::format("[{}-{}]", first, second); }
250
};
251
252
struct TsoRange : public Version {
253
2.52M
    TsoRange() : Version(-1, -1) {}
254
663k
    TsoRange(int64_t start_tso, int64_t end_tso) : Version(start_tso, end_tso) {}
255
256
31.9k
    int64_t start_tso() const { return first; }
257
31.9k
    int64_t end_tso() const { return second; }
258
259
0
    bool contains(const TsoRange& other) const { return Version::contains(other); }
260
};
261
262
using Versions = std::vector<Version>;
263
264
3.88k
inline std::ostream& operator<<(std::ostream& os, const Version& version) {
265
3.88k
    return os << version.to_string();
266
3.88k
}
267
268
0
inline std::ostream& operator<<(std::ostream& os, const Versions& versions) {
269
0
    for (auto& version : versions) {
270
0
        os << version;
271
0
    }
272
0
    return os;
273
0
}
274
275
// used for hash-struct of hash_map<Version, Rowset*>.
276
struct HashOfVersion {
277
3.79M
    size_t operator()(const Version& version) const {
278
3.79M
        size_t seed = 0;
279
3.79M
        seed = HashUtil::hash64(&version.first, sizeof(version.first), seed);
280
3.79M
        seed = HashUtil::hash64(&version.second, sizeof(version.second), seed);
281
3.79M
        return seed;
282
3.79M
    }
283
};
284
285
// It is used to represent Graph vertex.
286
struct Vertex {
287
    int64_t value = 0;
288
    std::list<int64_t> edges;
289
290
825k
    Vertex(int64_t v) : value(v) {}
291
};
292
293
class StorageField;
294
295
// ReaderStatistics used to collect statistics when scan data from storage
296
struct OlapReaderStatistics {
297
    int64_t io_ns = 0;
298
    int64_t compressed_bytes_read = 0;
299
300
    int64_t decompress_ns = 0;
301
    int64_t uncompressed_bytes_read = 0;
302
303
    // total read bytes in memory
304
    int64_t bytes_read = 0;
305
306
    int64_t block_fetch_ns = 0; // time of rowset reader's `next_batch()` call
307
    int64_t block_load_ns = 0;
308
    int64_t blocks_load = 0;
309
    // Not used any more, will be removed after non-vectorized code is removed
310
    int64_t block_seek_num = 0;
311
    // Not used any more, will be removed after non-vectorized code is removed
312
    int64_t block_seek_ns = 0;
313
314
    // block_load_ns
315
    //      block_init_ns
316
    //          block_init_seek_ns
317
    //          generate_row_ranges_ns
318
    //      predicate_column_read_ns
319
    //          predicate_column_read_seek_ns
320
    //      lazy_read_ns
321
    //          block_lazy_read_seek_ns
322
    int64_t block_init_ns = 0;
323
    int64_t block_init_seek_num = 0;
324
    int64_t block_init_seek_ns = 0;
325
    int64_t predicate_column_read_ns = 0;
326
    int64_t non_predicate_read_ns = 0;
327
    int64_t predicate_column_read_seek_num = 0;
328
    int64_t predicate_column_read_seek_ns = 0;
329
    int64_t lazy_read_ns = 0;
330
    int64_t block_lazy_read_seek_num = 0;
331
    int64_t block_lazy_read_seek_ns = 0;
332
333
    int64_t raw_rows_read = 0;
334
335
    int64_t rows_vec_cond_filtered = 0;
336
    int64_t rows_short_circuit_cond_filtered = 0;
337
    int64_t rows_expr_cond_filtered = 0;
338
    int64_t vec_cond_input_rows = 0;
339
    int64_t short_circuit_cond_input_rows = 0;
340
    int64_t expr_cond_input_rows = 0;
341
    int64_t rows_vec_del_cond_filtered = 0;
342
    int64_t vec_cond_ns = 0;
343
    int64_t short_cond_ns = 0;
344
    int64_t expr_filter_ns = 0;
345
    int64_t output_col_ns = 0;
346
    int64_t rows_key_range_filtered = 0;
347
    int64_t rows_stats_filtered = 0;
348
    int64_t rows_stats_rp_filtered = 0;
349
    int64_t rows_bf_filtered = 0;
350
    int64_t segment_dict_filtered = 0;
351
    // Including the number of rows filtered out according to the Delete information in the Tablet,
352
    // and the number of rows filtered for marked deleted rows under the unique key model.
353
    // This metric is mainly used to record the number of rows filtered by the delete condition in Segment V1,
354
    // and it is also used to record the replaced rows in the Unique key model in the "Reader" class.
355
    // In segmentv2, if you want to get all filtered rows, you need the sum of "rows_del_filtered" and "rows_conditions_filtered".
356
    int64_t rows_del_filtered = 0;
357
    int64_t rows_del_by_bitmap = 0;
358
    // the number of rows filtered by various column indexes.
359
    int64_t rows_conditions_filtered = 0;
360
    int64_t generate_row_ranges_by_keys_ns = 0;
361
    int64_t generate_row_ranges_by_column_conditions_ns = 0;
362
    int64_t generate_row_ranges_by_bf_ns = 0;
363
    int64_t generate_row_ranges_by_zonemap_ns = 0;
364
    int64_t generate_row_ranges_by_dict_ns = 0;
365
366
    int64_t index_load_ns = 0;
367
368
    int64_t total_pages_num = 0;
369
    int64_t cached_pages_num = 0;
370
371
    int64_t rows_inverted_index_filtered = 0;
372
    int64_t inverted_index_filter_timer = 0;
373
    int64_t inverted_index_query_timer = 0;
374
    int64_t inverted_index_query_cache_hit = 0;
375
    int64_t inverted_index_query_cache_miss = 0;
376
    int64_t inverted_index_query_null_bitmap_timer = 0;
377
    int64_t inverted_index_query_bitmap_copy_timer = 0;
378
    int64_t inverted_index_searcher_open_timer = 0;
379
    int64_t inverted_index_searcher_search_timer = 0;
380
    int64_t inverted_index_searcher_search_init_timer = 0;
381
    int64_t inverted_index_searcher_search_exec_timer = 0;
382
    int64_t inverted_index_searcher_cache_hit = 0;
383
    int64_t inverted_index_searcher_cache_miss = 0;
384
    int64_t inverted_index_downgrade_count = 0;
385
    int64_t inverted_index_analyzer_timer = 0;
386
    int64_t inverted_index_lookup_timer = 0;
387
    InvertedIndexStatistics inverted_index_stats;
388
389
    int64_t ann_index_load_ns = 0;
390
    int64_t ann_topn_search_ns = 0;
391
    int64_t ann_index_topn_search_cnt = 0;
392
    int64_t ann_ivf_on_disk_load_ns = 0;
393
    int64_t ann_ivf_on_disk_cache_hit_cnt = 0;
394
    int64_t ann_ivf_on_disk_cache_miss_cnt = 0;
395
    int64_t ann_index_cache_hits = 0;
396
397
    // Detailed timing for ANN operations
398
    int64_t ann_index_topn_engine_search_ns = 0;  // time spent in engine for range search
399
    int64_t ann_index_topn_result_process_ns = 0; // time spent processing TopN results
400
    int64_t ann_index_topn_engine_convert_ns = 0; // time spent on FAISS-side conversions (TopN)
401
    int64_t ann_index_topn_engine_prepare_ns =
402
            0; // time spent preparing before engine search (TopN)
403
    int64_t rows_ann_index_topn_filtered = 0;
404
405
    int64_t ann_index_range_search_ns = 0;
406
    int64_t ann_index_range_search_cnt = 0;
407
    // Detailed timing for ANN Range search
408
    int64_t ann_range_engine_search_ns = 0; // time spent in engine for range search
409
    int64_t ann_range_pre_process_ns = 0;   // time spent preparing before engine search
410
411
    int64_t ann_range_result_convert_ns = 0; // time spent processing range results
412
    int64_t ann_range_engine_convert_ns = 0; // time spent on FAISS-side conversions (Range)
413
    int64_t rows_ann_index_range_filtered = 0;
414
    int64_t ann_index_range_cache_hits = 0;
415
    int64_t ann_fall_back_brute_force_cnt = 0;
416
417
    int64_t output_index_result_column_timer = 0;
418
    // number of segment filtered by column stat when creating seg iterator
419
    int64_t filtered_segment_number = 0;
420
    // number of segment with condition cache hit
421
    int64_t condition_cache_hit_seg_nums = 0;
422
    // number of rows filtered by condition cache hit
423
    int64_t condition_cache_filtered_rows = 0;
424
    // total number of segment
425
    int64_t total_segment_number = 0;
426
427
    io::FileCacheStatistics file_cache_stats;
428
    int64_t load_segments_timer = 0;
429
430
    int64_t collect_iterator_merge_next_timer = 0;
431
    int64_t collect_iterator_normal_next_timer = 0;
432
    int64_t delete_bitmap_get_agg_ns = 0;
433
434
    int64_t tablet_reader_init_timer_ns = 0;
435
    int64_t tablet_reader_capture_rs_readers_timer_ns = 0;
436
    int64_t tablet_reader_init_return_columns_timer_ns = 0;
437
    int64_t tablet_reader_init_keys_param_timer_ns = 0;
438
    int64_t tablet_reader_init_orderby_keys_param_timer_ns = 0;
439
    int64_t tablet_reader_init_conditions_param_timer_ns = 0;
440
    int64_t tablet_reader_init_delete_condition_param_timer_ns = 0;
441
    int64_t block_reader_vcollect_iter_init_timer_ns = 0;
442
    int64_t block_reader_rs_readers_init_timer_ns = 0;
443
    int64_t block_reader_build_heap_init_timer_ns = 0;
444
445
    int64_t rowset_reader_get_segment_iterators_timer_ns = 0;
446
    int64_t rowset_reader_create_iterators_timer_ns = 0;
447
    int64_t rowset_reader_init_iterators_timer_ns = 0;
448
    int64_t rowset_reader_load_segments_timer_ns = 0;
449
450
    int64_t segment_iterator_init_timer_ns = 0;
451
    int64_t segment_iterator_init_return_column_iterators_timer_ns = 0;
452
    int64_t segment_iterator_init_index_iterators_timer_ns = 0;
453
    int64_t segment_iterator_init_segment_prefetchers_timer_ns = 0;
454
455
    int64_t segment_create_column_readers_timer_ns = 0;
456
    int64_t segment_load_index_timer_ns = 0;
457
458
    int64_t adaptive_batch_size_predict_min_rows = INT64_MAX;
459
    int64_t adaptive_batch_size_predict_max_rows = 0;
460
461
    int64_t variant_scan_sparse_column_timer_ns = 0;
462
    int64_t variant_scan_sparse_column_bytes = 0;
463
    int64_t variant_fill_path_from_sparse_column_timer_ns = 0;
464
    int64_t variant_subtree_default_iter_count = 0;
465
    int64_t variant_subtree_leaf_iter_count = 0;
466
    int64_t variant_subtree_hierarchical_iter_count = 0;
467
    int64_t variant_subtree_sparse_iter_count = 0;
468
    int64_t variant_doc_value_column_iter_count = 0;
469
};
470
471
using ColumnId = uint32_t;
472
// Column unique id set
473
using UniqueIdSet = std::set<uint32_t>;
474
// Column unique Id -> column id map
475
using UniqueIdToColumnIdMap = std::map<ColumnId, ColumnId>;
476
477
// 8 bit rowset id version
478
// 56 bit, inc number from 1
479
// 128 bit backend uid, it is a uuid bit, id version
480
struct RowsetId {
481
    int8_t version = 0;
482
    int64_t hi = 0;
483
    int64_t mi = 0;
484
    int64_t lo = 0;
485
486
665k
    void init(std::string_view rowset_id_str) {
487
        // for new rowsetid its a 48 hex string
488
        // if the len < 48, then it is an old format rowset id
489
665k
        if (rowset_id_str.length() < 48) [[unlikely]] {
490
115
            int64_t high;
491
115
            auto [_, ec] = std::from_chars(rowset_id_str.data(),
492
115
                                           rowset_id_str.data() + rowset_id_str.length(), high);
493
115
            if (ec != std::errc {}) [[unlikely]] {
494
1
                if (config::force_regenerate_rowsetid_on_start_error) {
495
1
                    LOG(WARNING) << "failed to init rowset id: " << rowset_id_str;
496
1
                    high = MAX_ROWSET_ID - 1;
497
1
                } else {
498
0
                    throw Exception(
499
0
                            Status::FatalError("failed to init rowset id: {}", rowset_id_str));
500
0
                }
501
1
            }
502
115
            init(1, high, 0, 0);
503
665k
        } else {
504
665k
            int64_t high = 0;
505
665k
            int64_t middle = 0;
506
665k
            int64_t low = 0;
507
665k
            from_hex(&high, rowset_id_str.substr(0, 16));
508
665k
            from_hex(&middle, rowset_id_str.substr(16, 16));
509
665k
            from_hex(&low, rowset_id_str.substr(32, 16));
510
665k
            init(high >> 56, high & LOW_56_BITS, middle, low);
511
665k
        }
512
665k
    }
513
514
    // to compatible with old version
515
4.26k
    void init(int64_t rowset_id) { init(1, rowset_id, 0, 0); }
516
517
720k
    void init(int64_t id_version, int64_t high, int64_t middle, int64_t low) {
518
720k
        version = cast_set<int8_t>(id_version);
519
720k
        if (UNLIKELY(high >= MAX_ROWSET_ID)) {
520
0
            throw Exception(Status::FatalError("inc rowsetid is too large:{}", high));
521
0
        }
522
720k
        hi = (id_version << 56) + (high & LOW_56_BITS);
523
720k
        mi = middle;
524
720k
        lo = low;
525
720k
    }
526
527
2.32M
    std::string to_string() const {
528
2.32M
        if (version < 2) {
529
14.6k
            return std::to_string(hi & LOW_56_BITS);
530
2.31M
        } else {
531
2.31M
            char buf[48];
532
2.31M
            to_hex(hi, buf);
533
2.31M
            to_hex(mi, buf + 16);
534
2.31M
            to_hex(lo, buf + 32);
535
2.31M
            return {buf, 48};
536
2.31M
        }
537
2.32M
    }
538
539
    // std::unordered_map need this api
540
1.28M
    bool operator==(const RowsetId& rhs) const {
541
1.28M
        return hi == rhs.hi && mi == rhs.mi && lo == rhs.lo;
542
1.28M
    }
543
544
24.6k
    bool operator!=(const RowsetId& rhs) const {
545
24.6k
        return hi != rhs.hi || mi != rhs.mi || lo != rhs.lo;
546
24.6k
    }
547
548
57.6M
    bool operator<(const RowsetId& rhs) const {
549
57.6M
        if (hi != rhs.hi) {
550
15.8M
            return hi < rhs.hi;
551
41.8M
        } else if (mi != rhs.mi) {
552
205
            return mi < rhs.mi;
553
41.8M
        } else {
554
41.8M
            return lo < rhs.lo;
555
41.8M
        }
556
57.6M
    }
557
558
8.18k
    friend std::ostream& operator<<(std::ostream& out, const RowsetId& rowset_id) {
559
8.18k
        out << rowset_id.to_string();
560
8.18k
        return out;
561
8.18k
    }
562
};
563
564
using RowsetIdUnorderedSet = std::unordered_set<RowsetId>;
565
566
// Extract rowset id from filename, return uninitialized rowset id if filename is invalid
567
79.0k
inline RowsetId extract_rowset_id(std::string_view filename) {
568
79.0k
    RowsetId rowset_id;
569
79.0k
    if (filename.ends_with(".dat")) {
570
        // filename format: {rowset_id}_{segment_num}.dat
571
69.9k
        auto end = filename.find('_');
572
69.9k
        if (end == std::string::npos) {
573
0
            return rowset_id;
574
0
        }
575
69.9k
        rowset_id.init(filename.substr(0, end));
576
69.9k
        return rowset_id;
577
69.9k
    }
578
9.03k
    if (filename.ends_with(".idx")) {
579
        // filename format: {rowset_id}_{segment_num}_{index_id}.idx
580
8.79k
        auto end = filename.find('_');
581
8.79k
        if (end == std::string::npos) {
582
0
            return rowset_id;
583
0
        }
584
8.79k
        rowset_id.init(filename.substr(0, end));
585
8.79k
        return rowset_id;
586
8.79k
    }
587
244
    return rowset_id;
588
9.03k
}
589
590
class DeleteBitmap;
591
// merge on write context
592
struct MowContext {
593
    MowContext(int64_t version, int64_t txnid, std::shared_ptr<RowsetIdUnorderedSet> ids,
594
               std::vector<RowsetSharedPtr> rowset_ptrs, std::shared_ptr<DeleteBitmap> db)
595
6.54k
            : max_version(version),
596
6.54k
              txn_id(txnid),
597
6.54k
              rowset_ids(std::move(ids)),
598
6.54k
              rowset_ptrs(std::move(rowset_ptrs)),
599
6.54k
              delete_bitmap(std::move(db)) {}
600
    int64_t max_version;
601
    int64_t txn_id;
602
    std::shared_ptr<RowsetIdUnorderedSet> rowset_ids;
603
    std::vector<RowsetSharedPtr> rowset_ptrs;
604
    std::shared_ptr<DeleteBitmap> delete_bitmap;
605
};
606
607
// used for controll compaction
608
struct VersionWithTime {
609
    std::atomic<int64_t> version;
610
    int64_t update_ts;
611
612
31.9k
    VersionWithTime() : version(0), update_ts(MonotonicMillis()) {}
613
614
31.6k
    void update_version_monoto(int64_t new_version) {
615
31.6k
        int64_t cur_version = version.load(std::memory_order_relaxed);
616
31.6k
        while (cur_version < new_version) {
617
31.6k
            if (version.compare_exchange_strong(cur_version, new_version, std::memory_order_relaxed,
618
31.6k
                                                std::memory_order_relaxed)) {
619
31.6k
                update_ts = MonotonicMillis();
620
31.6k
                break;
621
31.6k
            }
622
31.6k
        }
623
31.6k
    }
624
};
625
} // namespace doris
626
627
// This intended to be a "good" hash function.  It may change from time to time.
628
template <>
629
struct std::hash<doris::RowsetId> {
630
1.07M
    size_t operator()(const doris::RowsetId& rowset_id) const {
631
1.07M
        size_t seed = 0;
632
1.07M
        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.hi, sizeof(rowset_id.hi),
633
1.07M
                                                 seed);
634
1.07M
        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.mi, sizeof(rowset_id.mi),
635
1.07M
                                                 seed);
636
1.07M
        seed = doris::HashUtil::xxHash64WithSeed((const char*)&rowset_id.lo, sizeof(rowset_id.lo),
637
1.07M
                                                 seed);
638
1.07M
        return seed;
639
1.07M
    }
640
};