Coverage Report

Created: 2026-03-17 00:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/rowset/rowset_meta.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_H
19
#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_H
20
21
#include <gen_cpp/olap_file.pb.h>
22
#include <glog/logging.h>
23
24
#include <atomic>
25
#include <chrono>
26
#include <cstdint>
27
#include <memory>
28
#include <string>
29
#include <vector>
30
31
#include "common/cast_set.h"
32
#include "common/config.h"
33
#include "common/status.h"
34
#include "io/fs/encrypted_fs_factory.h"
35
#include "io/fs/file_system.h"
36
#include "runtime/memory/lru_cache_policy.h"
37
#include "storage/metadata_adder.h"
38
#include "storage/olap_common.h"
39
#include "storage/rowset/rowset_fwd.h"
40
#include "storage/storage_policy.h"
41
#include "storage/tablet/tablet_fwd.h"
42
#include "util/once.h"
43
44
namespace doris {
45
46
#include "common/compile_check_begin.h"
47
48
class RowsetMeta : public MetadataAdder<RowsetMeta> {
49
public:
50
27.9k
    RowsetMeta() = default;
51
    ~RowsetMeta();
52
53
    bool init(std::string_view pb_rowset_meta);
54
55
    bool init(const RowsetMeta* rowset_meta);
56
57
    bool init_from_pb(const RowsetMetaPB& rowset_meta_pb);
58
59
    bool init_from_json(const std::string& json_rowset_meta);
60
61
0
    bool serialize(std::string* value) { return _serialize_to_pb(value); }
62
63
    bool json_rowset_meta(std::string* json_rowset_meta);
64
65
    // If the rowset is a local rowset, return the global local file system.
66
    // Otherwise, return the remote file system corresponding to rowset's resource id.
67
    // Note that if the resource id cannot be found for the corresponding remote file system, nullptr will be returned.
68
    MOCK_FUNCTION io::FileSystemSPtr fs();
69
70
    io::FileSystemSPtr physical_fs();
71
72
    Result<const StorageResource*> remote_storage_resource();
73
74
    void set_remote_storage_resource(StorageResource resource);
75
76
25
    const std::string& resource_id() const { return _rowset_meta_pb.resource_id(); }
77
78
17
    void set_resource_id(const std::string& resource_id) {
79
17
        _rowset_meta_pb.set_resource_id(resource_id);
80
17
    }
81
82
27.1k
    bool is_local() const { return !_rowset_meta_pb.has_resource_id(); }
83
84
    bool has_variant_type_in_schema() const;
85
86
2.75M
    RowsetId rowset_id() const { return _rowset_id; }
87
88
12.5k
    void set_rowset_id(const RowsetId& rowset_id) {
89
        // rowset id is a required field, just set it to 0
90
12.5k
        _rowset_meta_pb.set_rowset_id(0);
91
12.5k
        _rowset_id = rowset_id;
92
12.5k
        _rowset_meta_pb.set_rowset_id_v2(rowset_id.to_string());
93
12.5k
    }
94
95
13.7k
    int64_t tablet_id() const { return _rowset_meta_pb.tablet_id(); }
96
97
11.6k
    void set_tablet_id(int64_t tablet_id) { _rowset_meta_pb.set_tablet_id(tablet_id); }
98
99
1
    int64_t index_id() const { return _rowset_meta_pb.index_id(); }
100
101
38
    void set_index_id(int64_t index_id) { _rowset_meta_pb.set_index_id(index_id); }
102
103
21
    TabletUid tablet_uid() const { return _rowset_meta_pb.tablet_uid(); }
104
105
11.6k
    void set_tablet_uid(TabletUid tablet_uid) {
106
11.6k
        *(_rowset_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto();
107
11.6k
    }
108
109
6
    int64_t txn_id() const { return _rowset_meta_pb.txn_id(); }
110
111
138
    void set_txn_id(int64_t txn_id) { _rowset_meta_pb.set_txn_id(txn_id); }
112
113
29
    int32_t tablet_schema_hash() const { return _rowset_meta_pb.tablet_schema_hash(); }
114
115
1.09k
    void set_tablet_schema_hash(int32_t tablet_schema_hash) {
116
1.09k
        _rowset_meta_pb.set_tablet_schema_hash(tablet_schema_hash);
117
1.09k
    }
118
119
12.2k
    RowsetTypePB rowset_type() const { return _rowset_meta_pb.rowset_type(); }
120
121
2.76k
    void set_rowset_type(RowsetTypePB rowset_type) { _rowset_meta_pb.set_rowset_type(rowset_type); }
122
123
15.9k
    RowsetStatePB rowset_state() const { return _rowset_meta_pb.rowset_state(); }
124
125
2.10k
    void set_rowset_state(RowsetStatePB rowset_state) {
126
2.10k
        _rowset_meta_pb.set_rowset_state(rowset_state);
127
2.10k
    }
128
129
2.61M
    Version version() const {
130
2.61M
        return {_rowset_meta_pb.start_version(), _rowset_meta_pb.end_version()};
131
2.61M
    }
132
133
12.3k
    void set_version(Version version) {
134
12.3k
        _rowset_meta_pb.set_start_version(version.first);
135
12.3k
        _rowset_meta_pb.set_end_version(version.second);
136
12.3k
    }
137
138
17.7k
    bool has_version() const {
139
17.7k
        return _rowset_meta_pb.has_start_version() && _rowset_meta_pb.has_end_version();
140
17.7k
    }
141
142
28.7k
    int64_t start_version() const { return _rowset_meta_pb.start_version(); }
143
144
49.7k
    int64_t end_version() const { return _rowset_meta_pb.end_version(); }
145
146
1.43k
    int64_t num_rows() const { return _rowset_meta_pb.num_rows(); }
147
148
1.50k
    void set_num_rows(int64_t num_rows) { _rowset_meta_pb.set_num_rows(num_rows); }
149
150
1.07k
    void set_num_segment_rows(const std::vector<uint32_t>& num_segment_rows) {
151
1.07k
        _rowset_meta_pb.mutable_num_segment_rows()->Assign(num_segment_rows.cbegin(),
152
1.07k
                                                           num_segment_rows.cend());
153
1.07k
    }
154
155
86
    void get_num_segment_rows(std::vector<uint32_t>* num_segment_rows) const {
156
86
        num_segment_rows->assign(_rowset_meta_pb.num_segment_rows().cbegin(),
157
86
                                 _rowset_meta_pb.num_segment_rows().cend());
158
86
    }
159
160
1.58k
    auto& get_num_segment_rows() const { return _rowset_meta_pb.num_segment_rows(); }
161
162
7.92k
    int64_t total_disk_size() const { return _rowset_meta_pb.total_disk_size(); }
163
164
8.32k
    void set_total_disk_size(int64_t total_disk_size) {
165
8.32k
        _rowset_meta_pb.set_total_disk_size(total_disk_size);
166
8.32k
    }
167
168
314
    int64_t data_disk_size() const { return _rowset_meta_pb.data_disk_size(); }
169
170
1.19k
    void set_data_disk_size(int64_t data_disk_size) {
171
1.19k
        _rowset_meta_pb.set_data_disk_size(data_disk_size);
172
1.19k
    }
173
174
305
    int64_t index_disk_size() const { return _rowset_meta_pb.index_disk_size(); }
175
176
1.19k
    void set_index_disk_size(int64_t index_disk_size) {
177
1.19k
        _rowset_meta_pb.set_index_disk_size(index_disk_size);
178
1.19k
    }
179
180
0
    void zone_maps(std::vector<ZoneMap>* zone_maps) {
181
0
        for (const ZoneMap& zone_map : _rowset_meta_pb.zone_maps()) {
182
0
            zone_maps->push_back(zone_map);
183
0
        }
184
0
    }
185
186
0
    void set_zone_maps(const std::vector<ZoneMap>& zone_maps) {
187
0
        for (const ZoneMap& zone_map : zone_maps) {
188
0
            ZoneMap* new_zone_map = _rowset_meta_pb.add_zone_maps();
189
0
            *new_zone_map = zone_map;
190
0
        }
191
0
    }
192
193
0
    void add_zone_map(const ZoneMap& zone_map) {
194
0
        ZoneMap* new_zone_map = _rowset_meta_pb.add_zone_maps();
195
0
        *new_zone_map = zone_map;
196
0
    }
197
198
5.89k
    bool has_delete_predicate() const { return _rowset_meta_pb.has_delete_predicate(); }
199
200
171
    const DeletePredicatePB& delete_predicate() const { return _rowset_meta_pb.delete_predicate(); }
201
202
0
    DeletePredicatePB* mutable_delete_predicate() {
203
0
        return _rowset_meta_pb.mutable_delete_predicate();
204
0
    }
205
206
83
    void set_delete_predicate(DeletePredicatePB delete_predicate) {
207
83
        DeletePredicatePB* new_delete_condition = _rowset_meta_pb.mutable_delete_predicate();
208
83
        *new_delete_condition = std::move(delete_predicate);
209
83
    }
210
211
51
    bool empty() const { return _rowset_meta_pb.empty(); }
212
213
1.19k
    void set_empty(bool empty) { _rowset_meta_pb.set_empty(empty); }
214
215
0
    PUniqueId load_id() const { return _rowset_meta_pb.load_id(); }
216
217
47
    void set_load_id(PUniqueId load_id) {
218
47
        PUniqueId* new_load_id = _rowset_meta_pb.mutable_load_id();
219
47
        new_load_id->set_hi(load_id.hi());
220
47
        new_load_id->set_lo(load_id.lo());
221
47
    }
222
223
1
    void set_job_id(const std::string& job_id) { _rowset_meta_pb.set_job_id(job_id); }
224
225
0
    const std::string& job_id() const { return _rowset_meta_pb.job_id(); }
226
227
0
    bool delete_flag() const { return _rowset_meta_pb.delete_flag(); }
228
229
30
    int64_t creation_time() const { return _rowset_meta_pb.creation_time(); }
230
231
1.17k
    void set_creation_time(int64_t creation_time) {
232
1.17k
        return _rowset_meta_pb.set_creation_time(creation_time);
233
1.17k
    }
234
235
650
    int64_t stale_at() const {
236
650
        int64_t stale_time = _stale_at_s.load();
237
650
        return stale_time > 0 ? stale_time : _rowset_meta_pb.creation_time();
238
650
    }
239
240
2
    bool has_stale_at() const { return _stale_at_s.load() > 0; }
241
242
565
    void set_stale_at(int64_t stale_at) { _stale_at_s.store(stale_at); }
243
244
10
    int64_t partition_id() const { return _rowset_meta_pb.partition_id(); }
245
246
1.02k
    void set_partition_id(int64_t partition_id) {
247
1.02k
        return _rowset_meta_pb.set_partition_id(partition_id);
248
1.02k
    }
249
250
85.1k
    int64_t num_segments() const { return _rowset_meta_pb.num_segments(); }
251
252
5.11k
    void set_num_segments(int64_t num_segments) { _rowset_meta_pb.set_num_segments(num_segments); }
253
254
    // Convert to RowsetMetaPB, skip_schema is only used by cloud to separate schema from rowset meta.
255
    void to_rowset_pb(RowsetMetaPB* rs_meta_pb, bool skip_schema = false) const;
256
257
    // Convert to RowsetMetaPB, skip_schema is only used by cloud to separate schema from rowset meta.
258
    RowsetMetaPB get_rowset_pb(bool skip_schema = false) const;
259
260
0
    inline DeletePredicatePB* mutable_delete_pred_pb() {
261
0
        return _rowset_meta_pb.mutable_delete_predicate();
262
0
    }
263
264
935
    bool is_singleton_delta() const {
265
935
        return has_version() && _rowset_meta_pb.start_version() == _rowset_meta_pb.end_version();
266
935
    }
267
268
    // Some time, we may check if this rowset is in rowset meta manager's meta by using RowsetMetaManager::check_rowset_meta.
269
    // But, this check behavior may cost a lot of time when it is frequent.
270
    // If we explicitly remove this rowset from rowset meta manager's meta, we can set _is_removed_from_rowset_meta to true,
271
    // And next time when we want to check if this rowset is in rowset mata manager's meta, we can
272
    // check is_remove_from_rowset_meta() first.
273
0
    void set_remove_from_rowset_meta() { _is_removed_from_rowset_meta = true; }
274
275
0
    bool is_remove_from_rowset_meta() const { return _is_removed_from_rowset_meta; }
276
277
1.53k
    SegmentsOverlapPB segments_overlap() const { return _rowset_meta_pb.segments_overlap_pb(); }
278
279
5.64k
    void set_segments_overlap(SegmentsOverlapPB segments_overlap) {
280
5.64k
        _rowset_meta_pb.set_segments_overlap_pb(segments_overlap);
281
5.64k
    }
282
283
17.1k
    static bool comparator(const RowsetMetaSharedPtr& left, const RowsetMetaSharedPtr& right) {
284
17.1k
        return left->end_version() < right->end_version();
285
17.1k
    }
286
287
    // return true if segments in this rowset has overlapping data.
288
    // this is not same as `segments_overlap()` method.
289
    // `segments_overlap()` only return the value of "segments_overlap" field in rowset meta,
290
    // but "segments_overlap" may be UNKNOWN.
291
    //
292
    // Returns true if all of the following conditions are met
293
    // 1. the rowset contains more than one segment
294
    // 2. the rowset's start version == end version (non-singleton rowset was generated by compaction process
295
    //    which always produces non-overlapped segments)
296
    // 3. segments_overlap() flag is not NONOVERLAPPING (OVERLAP_UNKNOWN and OVERLAPPING are OK)
297
30.7k
    bool is_segments_overlapping() const {
298
30.7k
        return num_segments() > 1 && is_singleton_delta() && segments_overlap() != NONOVERLAPPING;
299
30.7k
    }
300
301
0
    bool produced_by_compaction() const {
302
0
        return has_version() &&
303
0
               (start_version() < end_version() ||
304
0
                (start_version() == end_version() && segments_overlap() == NONOVERLAPPING));
305
0
    }
306
307
    // get the compaction score of this rowset.
308
    // if segments are overlapping, the score equals to the number of segments,
309
    // otherwise, score is 1.
310
28.9k
    uint32_t get_compaction_score() const {
311
28.9k
        uint32_t score = 0;
312
28.9k
        if (!is_segments_overlapping()) {
313
28.8k
            score = 1;
314
28.8k
        } else {
315
96
            auto num_seg = num_segments();
316
96
            DCHECK_GT(num_seg, 0);
317
96
            score = cast_set<uint32_t>(num_seg);
318
96
            CHECK(score > 0);
319
96
        }
320
28.9k
        return score;
321
28.9k
    }
322
323
0
    uint32_t get_merge_way_num() const {
324
0
        uint32_t way_num = 0;
325
0
        if (!is_segments_overlapping()) {
326
0
            if (num_segments() == 0) {
327
0
                way_num = 0;
328
0
            } else {
329
0
                way_num = 1;
330
0
            }
331
0
        } else {
332
0
            auto num_seg = num_segments();
333
0
            DCHECK_GT(num_seg, 0);
334
335
0
            way_num = cast_set<uint32_t>(num_seg);
336
0
            CHECK(way_num > 0);
337
0
        }
338
0
        return way_num;
339
0
    }
340
341
204
    void get_segments_key_bounds(std::vector<KeyBoundsPB>* segments_key_bounds) const {
342
247
        for (const KeyBoundsPB& key_range : _rowset_meta_pb.segments_key_bounds()) {
343
247
            segments_key_bounds->push_back(key_range);
344
247
        }
345
204
    }
346
347
3
    auto& get_segments_key_bounds() const { return _rowset_meta_pb.segments_key_bounds(); }
348
349
1.12k
    bool is_segments_key_bounds_truncated() const {
350
1.12k
        return _rowset_meta_pb.has_segments_key_bounds_truncated() &&
351
1.12k
               _rowset_meta_pb.segments_key_bounds_truncated();
352
1.12k
    }
353
354
1.12k
    void set_segments_key_bounds_truncated(bool truncated) {
355
1.12k
        _rowset_meta_pb.set_segments_key_bounds_truncated(truncated);
356
1.12k
    }
357
358
200
    bool get_first_segment_key_bound(KeyBoundsPB* key_bounds) {
359
        // for compatibility, old version has not segment key bounds
360
200
        if (_rowset_meta_pb.segments_key_bounds_size() == 0) {
361
0
            return false;
362
0
        }
363
200
        *key_bounds = *_rowset_meta_pb.segments_key_bounds().begin();
364
200
        return true;
365
200
    }
366
367
137
    bool get_last_segment_key_bound(KeyBoundsPB* key_bounds) {
368
137
        if (_rowset_meta_pb.segments_key_bounds_size() == 0) {
369
0
            return false;
370
0
        }
371
137
        *key_bounds = *_rowset_meta_pb.segments_key_bounds().rbegin();
372
137
        return true;
373
137
    }
374
375
    void set_segments_key_bounds(const std::vector<KeyBoundsPB>& segments_key_bounds);
376
377
0
    void add_segment_key_bounds(KeyBoundsPB segments_key_bounds) {
378
0
        *_rowset_meta_pb.add_segments_key_bounds() = std::move(segments_key_bounds);
379
0
        set_segments_overlap(OVERLAPPING);
380
0
    }
381
382
1.59k
    void set_newest_write_timestamp(int64_t timestamp) {
383
1.59k
        _rowset_meta_pb.set_newest_write_timestamp(timestamp);
384
1.59k
    }
385
386
1.05k
    int64_t newest_write_timestamp() const { return _rowset_meta_pb.newest_write_timestamp(); }
387
388
    // for cloud only
389
249
    bool has_visible_ts_ms() const { return _rowset_meta_pb.has_visible_ts_ms(); }
390
248
    int64_t visible_ts_ms() const { return _rowset_meta_pb.visible_ts_ms(); }
391
249
    std::chrono::time_point<std::chrono::system_clock> visible_timestamp() const {
392
249
        using namespace std::chrono;
393
249
        if (has_visible_ts_ms()) {
394
248
            return time_point<system_clock>(milliseconds(visible_ts_ms()));
395
248
        }
396
1
        return system_clock::from_time_t(newest_write_timestamp());
397
249
    }
398
#ifdef BE_TEST
399
689
    void set_visible_ts_ms(int64_t visible_ts_ms) {
400
689
        _rowset_meta_pb.set_visible_ts_ms(visible_ts_ms);
401
689
    }
402
#endif
403
404
    void set_tablet_schema(const TabletSchemaSPtr& tablet_schema);
405
    void set_tablet_schema(const TabletSchemaPB& tablet_schema);
406
407
34.4k
    const TabletSchemaSPtr& tablet_schema() const { return _schema; }
408
409
1
    void set_txn_expiration(int64_t expiration) { _rowset_meta_pb.set_txn_expiration(expiration); }
410
411
1
    void set_compaction_level(int64_t compaction_level) {
412
1
        _rowset_meta_pb.set_compaction_level(compaction_level);
413
1
    }
414
415
11
    int64_t compaction_level() { return _rowset_meta_pb.compaction_level(); }
416
417
    // `seg_file_size` MUST ordered by segment id
418
    void add_segments_file_size(const std::vector<size_t>& seg_file_size);
419
420
    // Return -1 if segment file size is unknown
421
    int64_t segment_file_size(int seg_id) const;
422
423
0
    const auto& segments_file_size() const { return _rowset_meta_pb.segments_file_size(); }
424
425
    // Used for partial update, when publish, partial update may add a new rowset and we should update rowset meta
426
    void merge_rowset_meta(const RowsetMeta& other);
427
428
    InvertedIndexFileInfo inverted_index_file_info(int seg_id);
429
430
0
    const auto& inverted_index_file_info() const {
431
0
        return _rowset_meta_pb.inverted_index_file_info();
432
0
    }
433
434
    void add_inverted_index_files_info(
435
            const std::vector<const InvertedIndexFileInfo*>& idx_file_info);
436
437
    int64_t get_metadata_size() const override;
438
439
    // Because the member field '_handle' is a raw pointer, use member func 'init' to replace copy ctor
440
    RowsetMeta(const RowsetMeta&) = delete;
441
    RowsetMeta operator=(const RowsetMeta&) = delete;
442
443
    void add_packed_slice_location(const std::string& segment_path,
444
                                   const std::string& packed_file_path, int64_t offset,
445
0
                                   int64_t size, int64_t packed_file_size) {
446
0
        auto* index_map = _rowset_meta_pb.mutable_packed_slice_locations();
447
0
        auto& index_pb = (*index_map)[segment_path];
448
0
        index_pb.set_packed_file_path(packed_file_path);
449
0
        index_pb.set_offset(offset);
450
0
        index_pb.set_size(size);
451
0
        index_pb.set_packed_file_size(packed_file_size);
452
0
    }
453
454
81
    int32_t schema_version() const { return _rowset_meta_pb.schema_version(); }
455
456
0
    std::string debug_string() const { return _rowset_meta_pb.ShortDebugString(); }
457
458
    // Pre-set the encryption algorithm to avoid re-entrant get_tablet calls
459
    // that can cause SingleFlight deadlock during tablet loading.
460
0
    void set_encryption_algorithm(EncryptionAlgorithmPB algorithm) {
461
0
        _determine_encryption_once.call(
462
0
                [algorithm]() -> Result<EncryptionAlgorithmPB> { return algorithm; });
463
0
    }
464
465
private:
466
    bool _deserialize_from_pb(std::string_view value);
467
468
    bool _serialize_to_pb(std::string* value);
469
470
    void _init();
471
472
    friend bool operator==(const RowsetMeta& a, const RowsetMeta& b);
473
474
0
    friend bool operator!=(const RowsetMeta& a, const RowsetMeta& b) { return !(a == b); }
475
476
private:
477
    RowsetMetaPB _rowset_meta_pb;
478
    TabletSchemaSPtr _schema;
479
    Cache::Handle* _handle = nullptr;
480
    RowsetId _rowset_id;
481
    StorageResource _storage_resource;
482
    bool _is_removed_from_rowset_meta = false;
483
    DorisCallOnce<Result<EncryptionAlgorithmPB>> _determine_encryption_once;
484
    std::atomic<int64_t> _stale_at_s {0};
485
};
486
487
using RowsetMetaMapContainer = std::unordered_map<Version, RowsetMetaSharedPtr, HashOfVersion>;
488
489
#include "common/compile_check_end.h"
490
} // namespace doris
491
492
#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_META_H