Coverage Report

Created: 2026-03-14 20:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/compaction/cumulative_compaction_policy.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <stddef.h>
21
#include <stdint.h>
22
23
#include <memory>
24
#include <string>
25
#include <vector>
26
27
#include "common/config.h"
28
#include "storage/rowset/rowset.h"
29
#include "storage/rowset/rowset_meta.h"
30
31
namespace doris {
32
33
class Tablet;
34
struct Version;
35
36
inline constexpr std::string_view CUMULATIVE_SIZE_BASED_POLICY = "size_based";
37
38
/// This class CumulativeCompactionPolicy is the base class of cumulative compaction policy.
39
/// It defines the policy to do cumulative compaction. It has different derived classes, which implements
40
/// concrete cumulative compaction algorithm. The policy is configured by conf::cumulative_compaction_policy.
41
/// The policy functions is the main steps to do cumulative compaction. For example, how to pick candidate
42
/// rowsets from tablet using current policy, how to calculate the cumulative point and how to calculate
43
/// the tablet cumulative compaction score and so on.
44
class CumulativeCompactionPolicy {
45
public:
46
    /// Constructor function of CumulativeCompactionPolicy,
47
    /// it needs tablet pointer to access tablet method.
48
    /// param tablet, the shared pointer of tablet
49
595
    CumulativeCompactionPolicy() {}
50
51
    /// Destructor function of CumulativeCompactionPolicy.
52
595
    virtual ~CumulativeCompactionPolicy() {}
53
54
    /// Calculate the cumulative compaction score of the tablet. This function uses rowsets meta and current
55
    /// cumulative point to calculative the score of tablet. The score depends on the concrete algorithm of policy.
56
    /// In general, the score represents the segments nums to do cumulative compaction in total rowsets. The more
57
    /// score tablet gets, the earlier it can do  cumulative compaction.
58
    /// param all_rowsets, all rowsets in tablet.
59
    /// param current_cumulative_point, current cumulative point value.
60
    /// return score, the result score after calculate.
61
    virtual uint32_t calc_cumulative_compaction_score(Tablet* tablet) = 0;
62
63
    /// Pick input rowsets from candidate rowsets for compaction. This function is pure virtual function.
64
    /// Its implementation depends on concrete compaction policy.
65
    /// param candidate_rowsets, the candidate_rowsets vector container to pick input rowsets
66
    /// return input_rowsets, the vector container as return
67
    /// return last_delete_version, if has delete rowset, record the delete version from input_rowsets
68
    /// return compaction_score, calculate the compaction score of picked input rowset
69
    virtual int pick_input_rowsets(Tablet* tablet,
70
                                   const std::vector<RowsetSharedPtr>& candidate_rowsets,
71
                                   const int64_t max_compaction_score,
72
                                   const int64_t min_compaction_score,
73
                                   std::vector<RowsetSharedPtr>* input_rowsets,
74
                                   Version* last_delete_version, size_t* compaction_score,
75
                                   bool allow_delete = false) = 0;
76
77
    /// Update tablet's cumulative point after cumulative compaction finished. This function is pure virtual function.
78
    /// Each derived has its own update policy which depends on its concrete algorithm. When the cumulative point moves
79
    /// after output rowset, then output rowset will do base compaction next time.
80
    /// param input_rowsets, the picked input rowset to do compaction just now
81
    /// param output_rowset, the result rowset after compaction
82
    virtual void update_cumulative_point(Tablet* tablet,
83
                                         const std::vector<RowsetSharedPtr>& input_rowsets,
84
                                         RowsetSharedPtr output_rowset,
85
                                         Version& last_delete_version) = 0;
86
87
    /// Calculate tablet's cumulative point before compaction. This calculation just executes once when the tablet compacts
88
    /// first time after BE initialization and then motion of cumulative point depends on update_cumulative_point policy.
89
    /// This function is pure virtual function. In general, the cumulative point splits the rowsets into two parts:
90
    /// base rowsets, cumulative rowsets.
91
    /// param all_rowsets, all rowsets in the tablet
92
    /// param current_cumulative_point, current cumulative position
93
    /// return cumulative_point, the result of calculating cumulative point position
94
    virtual void calculate_cumulative_point(Tablet* tablet,
95
                                            const RowsetMetaMapContainer& all_rowsets,
96
                                            int64_t current_cumulative_point,
97
                                            int64_t* cumulative_point) = 0;
98
99
    // Updates the compaction level of a tablet after a compaction operation.
100
    virtual int64_t get_compaction_level(Tablet* tablet,
101
                                         const std::vector<RowsetSharedPtr>& input_rowsets,
102
                                         RowsetSharedPtr output_rowset) = 0;
103
104
    /// Fetch cumulative policy name
105
    virtual std::string_view name() = 0;
106
};
107
108
/// SizeBased cumulative compaction policy implementation. SizeBased policy which derives CumulativeCompactionPolicy is a optimized
109
/// version of num based cumulative compaction policy. This policy also uses linear structure to compact rowsets. The cumulative rowsets
110
/// can do compaction when they are in same level size. And when output rowset exceeds the promotion radio of base size or min promotion
111
/// size, it will do base compaction. This policy is targeting the use cases requiring lower write amplification, trading off read
112
/// amplification and space amplification.
113
class SizeBasedCumulativeCompactionPolicy final : public CumulativeCompactionPolicy {
114
public:
115
    /// Constructor function of SizeBasedCumulativeCompactionPolicy,
116
    /// it needs tablet pointer to access tablet method.
117
    /// param tablet, the shared pointer of tablet
118
    SizeBasedCumulativeCompactionPolicy(
119
            int64_t promotion_size = config::compaction_promotion_size_mbytes * 1024 * 1024,
120
            double promotion_ratio = config::compaction_promotion_ratio,
121
            int64_t promotion_min_size = config::compaction_promotion_min_size_mbytes * 1024 * 1024,
122
            int64_t promotion_version_count = config::compaction_promotion_version_count,
123
            int64_t compaction_min_size = config::compaction_min_size_mbytes * 1024 * 1024);
124
125
    /// Destructor function of SizeBasedCumulativeCompactionPolicy.
126
580
    ~SizeBasedCumulativeCompactionPolicy() {}
127
128
    /// SizeBased cumulative compaction policy implements calculate cumulative point function.
129
    /// When the first time the tablet does compact, this calculation is executed. Its main policy is to find first rowset
130
    /// which does not satisfied the promotion conditions.
131
    void calculate_cumulative_point(Tablet* tablet, const RowsetMetaMapContainer& all_rowsets,
132
                                    int64_t current_cumulative_point,
133
                                    int64_t* cumulative_point) override;
134
135
    /// SizeBased cumulative compaction policy implements pick input rowsets function.
136
    /// Its main policy is picking rowsets from candidate rowsets by comparing accumulative compaction_score,
137
    /// max_cumulative_compaction_num_singleton_deltas or checking whether there is delete version rowset,
138
    /// and choose those rowset in the same level to do cumulative compaction.
139
    int pick_input_rowsets(Tablet* tablet, const std::vector<RowsetSharedPtr>& candidate_rowsets,
140
                           const int64_t max_compaction_score, const int64_t min_compaction_score,
141
                           std::vector<RowsetSharedPtr>* input_rowsets,
142
                           Version* last_delete_version, size_t* compaction_score,
143
                           bool allow_delete = false) override;
144
145
    /// SizeBased cumulative compaction policy implements update cumulative point function.
146
    /// Its main policy is judging the output rowset size whether satisfied the promotion size.
147
    /// If it satisfied, this policy will update the cumulative point.
148
    void update_cumulative_point(Tablet* tablet, const std::vector<RowsetSharedPtr>& input_rowsets,
149
                                 RowsetSharedPtr _output_rowset,
150
                                 Version& last_delete_version) override;
151
152
    /// Num based cumulative compaction policy implements calc cumulative compaction score function.
153
    /// Its main policy is calculating the accumulative compaction score after current cumulative_point in tablet.
154
    uint32_t calc_cumulative_compaction_score(Tablet* tablet) override;
155
156
    int64_t get_compaction_level(Tablet* tablet, const std::vector<RowsetSharedPtr>& input_rowsets,
157
0
                                 RowsetSharedPtr output_rowset) override {
158
0
        return 0;
159
0
    }
160
161
0
    std::string_view name() override { return CUMULATIVE_SIZE_BASED_POLICY; }
162
163
private:
164
    /// calculate promotion size using current base rowset meta size and promotion configs
165
    void _calc_promotion_size(Tablet* tablet, RowsetMetaSharedPtr base_rowset_meta,
166
                              int64_t* promotion_size);
167
168
    /// calculate the disk size belong to which level, the level is divide by power of 2
169
    /// between compaction_promotion_size_mbytes and 1KB
170
    int64_t _level_size(const int64_t size);
171
172
    /// when policy calculate cumulative_compaction_score, update promotion size at the same time
173
    void _refresh_tablet_promotion_size(Tablet* tablet, int64_t promotion_size);
174
175
private:
176
    /// cumulative compaction promotion size, unit is byte.
177
    int64_t _promotion_size;
178
    /// cumulative compaction promotion ratio of base rowset total disk size.
179
    double _promotion_ratio;
180
    /// cumulative compaction promotion min size, unit is byte.
181
    int64_t _promotion_min_size;
182
    // cululative compaction promotion version count, only works for unique key MoW table
183
    int64_t _promotion_version_count;
184
    /// lower bound size to do compaction compaction.
185
    int64_t _compaction_min_size;
186
};
187
188
/// The factory of CumulativeCompactionPolicy, it can product different policy according to the `policy` parameter.
189
class CumulativeCompactionPolicyFactory {
190
public:
191
    /// Static factory function. It can product different policy according to the `policy` parameter and use tablet ptr
192
    /// to construct the policy. Now it can product size based and num based policies.
193
    static std::shared_ptr<CumulativeCompactionPolicy> create_cumulative_compaction_policy(
194
            const std::string_view& compaction_policy);
195
};
196
197
} // namespace doris