Coverage Report

Created: 2025-10-31 19:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/root/doris/be/src/olap/base_tablet.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <gen_cpp/olap_common.pb.h>
21
22
#include <memory>
23
#include <mutex>
24
#include <shared_mutex>
25
#include <string>
26
27
#include "common/status.h"
28
#include "olap/iterators.h"
29
#include "olap/olap_common.h"
30
#include "olap/partial_update_info.h"
31
#include "olap/rowset/segment_v2/segment.h"
32
#include "olap/tablet_fwd.h"
33
#include "olap/tablet_meta.h"
34
#include "olap/tablet_schema.h"
35
#include "olap/version_graph.h"
36
#include "util/metrics.h"
37
38
namespace doris {
39
struct RowSetSplits;
40
struct RowsetWriterContext;
41
class RowsetWriter;
42
class CalcDeleteBitmapToken;
43
class SegmentCacheHandle;
44
class RowIdConversion;
45
struct PartialUpdateInfo;
46
class PartialUpdateReadPlan;
47
struct CaptureRowsetOps;
48
struct CaptureRowsetResult;
49
struct TabletReadSource;
50
class FixedReadPlan;
51
52
struct TabletWithVersion {
53
    BaseTabletSPtr tablet;
54
    int64_t version;
55
};
56
57
enum class CompactionStage { NOT_SCHEDULED, PENDING, EXECUTING };
58
59
// Base class for all tablet classes
60
class BaseTablet : public std::enable_shared_from_this<BaseTablet> {
61
public:
62
    explicit BaseTablet(TabletMetaSharedPtr tablet_meta);
63
    virtual ~BaseTablet();
64
    BaseTablet(const BaseTablet&) = delete;
65
    BaseTablet& operator=(const BaseTablet&) = delete;
66
67
9.41k
    TabletState tablet_state() const { return _tablet_meta->tablet_state(); }
68
    Status set_tablet_state(TabletState state);
69
14
    int64_t table_id() const { return _tablet_meta->table_id(); }
70
0
    size_t row_size() const { return _tablet_meta->tablet_schema()->row_size(); }
71
28
    int64_t index_id() const { return _tablet_meta->index_id(); }
72
977
    int64_t partition_id() const { return _tablet_meta->partition_id(); }
73
18.3k
    int64_t tablet_id() const { return _tablet_meta->tablet_id(); }
74
1.51k
    int32_t schema_hash() const { return _tablet_meta->schema_hash(); }
75
0
    CompressKind compress_kind() const { return _tablet_meta->tablet_schema()->compress_kind(); }
76
1.31k
    KeysType keys_type() const { return _tablet_meta->tablet_schema()->keys_type(); }
77
128
    size_t num_key_columns() const { return _tablet_meta->tablet_schema()->num_key_columns(); }
78
337
    int64_t ttl_seconds() const { return _tablet_meta->ttl_seconds(); }
79
    // currently used by schema change, inverted index building, and cooldown
80
18
    std::timed_mutex& get_schema_change_lock() { return _schema_change_lock; }
81
1.21k
    bool enable_unique_key_merge_on_write() const {
82
1.21k
#ifdef BE_TEST
83
1.21k
        if (_tablet_meta == nullptr) {
84
0
            return false;
85
0
        }
86
1.21k
#endif
87
1.21k
        return _tablet_meta->enable_unique_key_merge_on_write();
88
1.21k
    }
89
90
    // Property encapsulated in TabletMeta
91
2.87k
    const TabletMetaSharedPtr& tablet_meta() { return _tablet_meta; }
92
93
    int32_t max_version_config();
94
95
    // FIXME(plat1ko): It is not appropriate to expose this lock
96
352
    std::shared_mutex& get_header_lock() { return _meta_lock; }
97
98
    void update_max_version_schema(const TabletSchemaSPtr& tablet_schema);
99
100
11.6k
    TabletSchemaSPtr tablet_schema() const {
101
11.6k
        std::shared_lock rlock(_meta_lock);
102
11.6k
        return _max_version_schema;
103
11.6k
    }
104
105
0
    void set_alter_failed(bool alter_failed) { _alter_failed = alter_failed; }
106
0
    bool is_alter_failed() { return _alter_failed; }
107
108
    virtual std::string tablet_path() const = 0;
109
110
    virtual bool exceed_version_limit(int32_t limit) = 0;
111
112
    virtual Result<std::unique_ptr<RowsetWriter>> create_rowset_writer(RowsetWriterContext& context,
113
                                                                       bool vertical) = 0;
114
115
    virtual Status capture_rs_readers(const Version& spec_version,
116
                                      std::vector<RowSetSplits>* rs_splits,
117
                                      const CaptureRowsetOps& opts) = 0;
118
119
    virtual size_t tablet_footprint() = 0;
120
121
    // this method just return the compaction sum on each rowset
122
    // note(tsy): we should unify the compaction score calculation finally
123
    uint32_t get_real_compaction_score() const;
124
125
    // MUST hold shared meta lock
126
    Status capture_rs_readers_unlocked(const Versions& version_path,
127
                                       std::vector<RowSetSplits>* rs_splits) const;
128
129
    // _rs_version_map and _stale_rs_version_map should be protected by _meta_lock
130
    // The caller must call hold _meta_lock when call this three function.
131
    RowsetSharedPtr get_rowset_by_version(const Version& version, bool find_is_stale = false) const;
132
    RowsetSharedPtr get_stale_rowset_by_version(const Version& version) const;
133
    RowsetSharedPtr get_rowset_with_max_version() const;
134
135
    Status get_all_rs_id(int64_t max_version, RowsetIdUnorderedSet* rowset_ids) const;
136
    Status get_all_rs_id_unlocked(int64_t max_version, RowsetIdUnorderedSet* rowset_ids) const;
137
138
    // Get the missed versions until the spec_version.
139
    Versions get_missed_versions(int64_t spec_version) const;
140
    Versions get_missed_versions_unlocked(int64_t spec_version) const;
141
142
    void generate_tablet_meta_copy(TabletMeta& new_tablet_meta, bool cloud_get_rowset_meta) const;
143
    void generate_tablet_meta_copy_unlocked(TabletMeta& new_tablet_meta,
144
                                            bool cloud_get_rowset_meta) const;
145
146
36
    virtual int64_t max_version_unlocked() const { return _tablet_meta->max_version().second; }
147
148
    static TabletSchemaSPtr tablet_schema_with_merged_max_schema_version(
149
            const std::vector<RowsetMetaSharedPtr>& rowset_metas);
150
151
    ////////////////////////////////////////////////////////////////////////////
152
    // begin MoW functions
153
    ////////////////////////////////////////////////////////////////////////////
154
    std::vector<RowsetSharedPtr> get_rowset_by_ids(
155
            const RowsetIdUnorderedSet* specified_rowset_ids);
156
157
    // Lookup a row with TupleDescriptor and fill Block
158
    Status lookup_row_data(const Slice& encoded_key, const RowLocation& row_location,
159
                           RowsetSharedPtr rowset, OlapReaderStatistics& stats, std::string& values,
160
                           bool write_to_cache = false);
161
    // Lookup the row location of `encoded_key`, the function sets `row_location` on success.
162
    // NOTE: the method only works in unique key model with primary key index, you will got a
163
    //       not supported error in other data model.
164
    Status lookup_row_key(const Slice& encoded_key, TabletSchema* latest_schema, bool with_seq_col,
165
                          const std::vector<RowsetSharedPtr>& specified_rowsets,
166
                          RowLocation* row_location, int64_t version,
167
                          std::vector<std::unique_ptr<SegmentCacheHandle>>& segment_caches,
168
                          RowsetSharedPtr* rowset = nullptr, bool with_rowid = true,
169
                          std::string* encoded_seq_value = nullptr,
170
                          OlapReaderStatistics* stats = nullptr,
171
                          DeleteBitmapPtr tablet_delete_bitmap = nullptr);
172
173
    // calc delete bitmap when flush memtable, use a fake version to calc
174
    // For example, cur max version is 5, and we use version 6 to calc but
175
    // finally this rowset publish version with 8, we should make up data
176
    // for rowset 6-7. Also, if a compaction happens between commit_txn and
177
    // publish_txn, we should remove compaction input rowsets' delete_bitmap
178
    // and build newly generated rowset's delete_bitmap
179
    static Status calc_delete_bitmap(
180
            const BaseTabletSPtr& tablet, RowsetSharedPtr rowset,
181
            const std::vector<segment_v2::SegmentSharedPtr>& segments,
182
            const std::vector<RowsetSharedPtr>& specified_rowsets, DeleteBitmapPtr delete_bitmap,
183
            int64_t version, CalcDeleteBitmapToken* token, RowsetWriter* rowset_writer = nullptr,
184
            DeleteBitmapPtr tablet_delete_bitmap = nullptr,
185
            std::function<void(segment_v2::SegmentSharedPtr, Status)> callback =
186
1
                    [](segment_v2::SegmentSharedPtr, Status) {});
187
188
    Status calc_segment_delete_bitmap(RowsetSharedPtr rowset,
189
                                      const segment_v2::SegmentSharedPtr& seg,
190
                                      const std::vector<RowsetSharedPtr>& specified_rowsets,
191
                                      DeleteBitmapPtr delete_bitmap, int64_t end_version,
192
                                      RowsetWriter* rowset_writer,
193
                                      DeleteBitmapPtr tablet_delete_bitmap = nullptr);
194
195
    Status calc_delete_bitmap_between_segments(
196
            TabletSchemaSPtr schema, RowsetId rowset_id,
197
            const std::vector<segment_v2::SegmentSharedPtr>& segments,
198
            DeleteBitmapPtr delete_bitmap);
199
200
    static Status commit_phase_update_delete_bitmap(
201
            const BaseTabletSPtr& tablet, const RowsetSharedPtr& rowset,
202
            RowsetIdUnorderedSet& pre_rowset_ids, DeleteBitmapPtr delete_bitmap,
203
            const std::vector<segment_v2::SegmentSharedPtr>& segments, int64_t txn_id,
204
            CalcDeleteBitmapToken* token, RowsetWriter* rowset_writer = nullptr);
205
206
    static void add_sentinel_mark_to_delete_bitmap(DeleteBitmap* delete_bitmap,
207
                                                   const RowsetIdUnorderedSet& rowsetids);
208
209
    Status check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap, int64_t max_version,
210
                                           int64_t txn_id, const RowsetIdUnorderedSet& rowset_ids,
211
                                           std::vector<RowsetSharedPtr>* rowsets = nullptr);
212
213
    static const signed char* get_delete_sign_column_data(const vectorized::Block& block,
214
                                                          size_t rows_at_least = 0);
215
216
    static Status generate_default_value_block(const TabletSchema& schema,
217
                                               const std::vector<uint32_t>& cids,
218
                                               const std::vector<std::string>& default_values,
219
                                               const vectorized::Block& ref_block,
220
                                               vectorized::Block& default_value_block);
221
222
    static Status generate_new_block_for_partial_update(
223
            TabletSchemaSPtr rowset_schema, const PartialUpdateInfo* partial_update_info,
224
            const FixedReadPlan& read_plan_ori, const FixedReadPlan& read_plan_update,
225
            const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset,
226
            vectorized::Block* output_block);
227
228
    static Status generate_new_block_for_flexible_partial_update(
229
            TabletSchemaSPtr rowset_schema, const PartialUpdateInfo* partial_update_info,
230
            std::set<uint32_t>& rids_be_overwritten, const FixedReadPlan& read_plan_ori,
231
            const FixedReadPlan& read_plan_update,
232
            const std::map<RowsetId, RowsetSharedPtr>& rsid_to_rowset,
233
            vectorized::Block* output_block);
234
235
    // We use the TabletSchema from the caller because the TabletSchema in the rowset'meta
236
    // may be outdated due to schema change. Also note that the the cids should indicate the indexes
237
    // of the columns in the TabletSchema passed in.
238
    static Status fetch_value_through_row_column(RowsetSharedPtr input_rowset,
239
                                                 const TabletSchema& tablet_schema, uint32_t segid,
240
                                                 const std::vector<uint32_t>& rowids,
241
                                                 const std::vector<uint32_t>& cids,
242
                                                 vectorized::Block& block);
243
244
    static Status fetch_value_by_rowids(RowsetSharedPtr input_rowset, uint32_t segid,
245
                                        const std::vector<uint32_t>& rowids,
246
                                        const TabletColumn& tablet_column,
247
                                        vectorized::MutableColumnPtr& dst);
248
249
    virtual Result<std::unique_ptr<RowsetWriter>> create_transient_rowset_writer(
250
            const Rowset& rowset, std::shared_ptr<PartialUpdateInfo> partial_update_info,
251
            int64_t txn_expiration = 0) = 0;
252
253
    static Status update_delete_bitmap(const BaseTabletSPtr& self, TabletTxnInfo* txn_info,
254
                                       int64_t txn_id, int64_t txn_expiration = 0,
255
                                       DeleteBitmapPtr tablet_delete_bitmap = nullptr);
256
    virtual Status save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t txn_id,
257
                                      DeleteBitmapPtr delete_bitmap, RowsetWriter* rowset_writer,
258
                                      const RowsetIdUnorderedSet& cur_rowset_ids,
259
                                      int64_t lock_id = -1, int64_t next_visible_version = -1) = 0;
260
    virtual CalcDeleteBitmapExecutor* calc_delete_bitmap_executor() = 0;
261
262
    void calc_compaction_output_rowset_delete_bitmap(
263
            const std::vector<RowsetSharedPtr>& input_rowsets,
264
            const RowIdConversion& rowid_conversion, uint64_t start_version, uint64_t end_version,
265
            std::set<RowLocation>* missed_rows,
266
            std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>* location_map,
267
            const DeleteBitmap& input_delete_bitmap, DeleteBitmap* output_rowset_delete_bitmap);
268
269
    Status check_rowid_conversion(
270
            RowsetSharedPtr dst_rowset,
271
            const std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>>&
272
                    location_map);
273
274
    static Status update_delete_bitmap_without_lock(
275
            const BaseTabletSPtr& self, const RowsetSharedPtr& rowset,
276
            const std::vector<RowsetSharedPtr>* specified_base_rowsets = nullptr);
277
278
    using DeleteBitmapKeyRanges =
279
            std::vector<std::tuple<DeleteBitmap::BitmapKey, DeleteBitmap::BitmapKey>>;
280
    void agg_delete_bitmap_for_stale_rowsets(
281
            Version version, DeleteBitmapKeyRanges& remove_delete_bitmap_key_ranges);
282
    void check_agg_delete_bitmap_for_stale_rowsets(int64_t& useless_rowset_count,
283
                                                   int64_t& useless_rowset_version_count);
284
    ////////////////////////////////////////////////////////////////////////////
285
    // end MoW functions
286
    ////////////////////////////////////////////////////////////////////////////
287
288
    RowsetSharedPtr get_rowset(const RowsetId& rowset_id);
289
290
    std::vector<RowsetSharedPtr> get_snapshot_rowset(bool include_stale_rowset = false) const;
291
292
    virtual void clear_cache() = 0;
293
294
    // Find the first consecutive empty rowsets. output->size() >= limit
295
    void calc_consecutive_empty_rowsets(std::vector<RowsetSharedPtr>* empty_rowsets,
296
                                        const std::vector<RowsetSharedPtr>& candidate_rowsets,
297
                                        int64_t limit);
298
299
    void traverse_rowsets(std::function<void(const RowsetSharedPtr&)> visitor,
300
11
                          bool include_stale = false) {
301
11
        std::shared_lock rlock(_meta_lock);
302
11
        traverse_rowsets_unlocked(visitor, include_stale);
303
11
    }
304
305
    void traverse_rowsets_unlocked(std::function<void(const RowsetSharedPtr&)> visitor,
306
16
                                   bool include_stale = false) const {
307
85
        for (auto& [v, rs] : _rs_version_map) {
308
85
            visitor(rs);
309
85
        }
310
16
        if (!include_stale) return;
311
81
        for (auto& [v, rs] : _stale_rs_version_map) {
312
81
            visitor(rs);
313
81
        }
314
15
    }
315
316
    Status calc_file_crc(uint32_t* crc_value, int64_t start_version, int64_t end_version,
317
                         uint32_t* rowset_count, int64_t* file_count);
318
319
    Status show_nested_index_file(std::string* json_meta);
320
321
12.4k
    TabletUid tablet_uid() const { return _tablet_meta->tablet_uid(); }
322
561
    TabletInfo get_tablet_info() const { return TabletInfo(tablet_id(), tablet_uid()); }
323
324
    void get_base_rowset_delete_bitmap_count(
325
            uint64_t* max_base_rowset_delete_bitmap_score,
326
            int64_t* max_base_rowset_delete_bitmap_score_tablet_id);
327
328
3
    virtual Status check_delete_bitmap_cache(int64_t txn_id, DeleteBitmap* expected_delete_bitmap) {
329
3
        return Status::OK();
330
3
    }
331
332
    void prefill_dbm_agg_cache(const RowsetSharedPtr& rowset, int64_t version);
333
    void prefill_dbm_agg_cache_after_compaction(const RowsetSharedPtr& output_rowset);
334
335
    [[nodiscard]] Result<CaptureRowsetResult> capture_consistent_rowsets_unlocked(
336
            const Version& version_range, const CaptureRowsetOps& options) const;
337
338
    [[nodiscard]] virtual Result<std::vector<Version>> capture_consistent_versions_unlocked(
339
            const Version& version_range, const CaptureRowsetOps& options) const;
340
341
    [[nodiscard]] Result<std::vector<RowSetSplits>> capture_rs_readers_unlocked(
342
            const Version& version_range, const CaptureRowsetOps& options) const;
343
344
    [[nodiscard]] Result<TabletReadSource> capture_read_source(const Version& version_range,
345
                                                               const CaptureRowsetOps& options);
346
347
protected:
348
    // Find the missed versions until the spec_version.
349
    //
350
    // for example:
351
    //     [0-4][5-5][8-8][9-9][14-14]
352
    // for cloud, if spec_version = 12, it will return [6-7],[10-12]
353
    // for local, if spec_version = 12, it will return [6, 6], [7, 7], [10, 10], [11, 11], [12, 12]
354
    virtual Versions calc_missed_versions(int64_t spec_version,
355
                                          Versions existing_versions) const = 0;
356
357
    void _print_missed_versions(const Versions& missed_versions) const;
358
    bool _reconstruct_version_tracker_if_necessary();
359
360
    static void _rowset_ids_difference(const RowsetIdUnorderedSet& cur,
361
                                       const RowsetIdUnorderedSet& pre,
362
                                       RowsetIdUnorderedSet* to_add, RowsetIdUnorderedSet* to_del);
363
364
    Status sort_block(vectorized::Block& in_block, vectorized::Block& output_block);
365
366
    Result<CaptureRowsetResult> _remote_capture_rowsets(const Version& version_range) const;
367
368
    mutable std::shared_mutex _meta_lock;
369
    TimestampedVersionTracker _timestamped_version_tracker;
370
    // After version 0.13, all newly created rowsets are saved in _rs_version_map.
371
    // And if rowset being compacted, the old rowsets will be saved in _stale_rs_version_map;
372
    std::unordered_map<Version, RowsetSharedPtr, HashOfVersion> _rs_version_map;
373
    // This variable _stale_rs_version_map is used to record these rowsets which are be compacted.
374
    // These _stale rowsets are been removed when rowsets' pathVersion is expired,
375
    // this policy is judged and computed by TimestampedVersionTracker.
376
    std::unordered_map<Version, RowsetSharedPtr, HashOfVersion> _stale_rs_version_map;
377
    const TabletMetaSharedPtr _tablet_meta;
378
    TabletSchemaSPtr _max_version_schema;
379
380
    // `_alter_failed` is used to indicate whether the tablet failed to perform a schema change
381
    std::atomic<bool> _alter_failed = false;
382
383
    // metrics of this tablet
384
    std::shared_ptr<MetricEntity> _metric_entity;
385
386
protected:
387
    std::timed_mutex _schema_change_lock;
388
389
public:
390
    IntCounter* query_scan_bytes = nullptr;
391
    IntCounter* query_scan_rows = nullptr;
392
    IntCounter* query_scan_count = nullptr;
393
    IntCounter* flush_bytes = nullptr;
394
    IntCounter* flush_finish_count = nullptr;
395
    std::atomic<int64_t> published_count = 0;
396
    std::atomic<int64_t> read_block_count = 0;
397
    std::atomic<int64_t> write_count = 0;
398
    std::atomic<int64_t> compaction_count = 0;
399
400
    CompactionStage compaction_stage = CompactionStage::NOT_SCHEDULED;
401
    std::mutex sample_info_lock;
402
    std::vector<CompactionSampleInfo> sample_infos;
403
    Status last_compaction_status = Status::OK();
404
};
405
406
struct CaptureRowsetOps {
407
    bool skip_missing_versions = false;
408
    bool quiet = false;
409
    bool include_stale_rowsets = true;
410
    bool enable_fetch_rowsets_from_peers = false;
411
412
    // ======== only take effect in cloud mode ========
413
414
    // Enable preference for cached/warmed-up rowsets when building version paths.
415
    // When enabled, the capture process will prioritize already cached rowsets
416
    // to avoid cold data reads and improve query performance.
417
    bool enable_prefer_cached_rowset {false};
418
419
    // Query freshness tolerance in milliseconds.
420
    // Defines the time window for considering data as "fresh enough".
421
    // Rowsets that became visible within this time range can be skipped if not warmed up,
422
    // but older rowsets (before current_time - query_freshness_tolerance_ms) that are
423
    // not warmed up will trigger fallback to normal capture.
424
    // Set to -1 to disable freshness tolerance checking.
425
    int64_t query_freshness_tolerance_ms {-1};
426
};
427
428
struct CaptureRowsetResult {
429
    std::vector<RowsetSharedPtr> rowsets;
430
    std::shared_ptr<DeleteBitmap> delete_bitmap;
431
};
432
433
struct TabletReadSource {
434
    std::vector<RowSetSplits> rs_splits;
435
    std::vector<RowsetMetaSharedPtr> delete_predicates;
436
    std::shared_ptr<DeleteBitmap> delete_bitmap;
437
    // Fill delete predicates with `rs_splits`
438
    void fill_delete_predicates();
439
};
440
441
} /* namespace doris */