be/src/storage/compaction/cumulative_compaction_policy.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <stddef.h> |
21 | | #include <stdint.h> |
22 | | |
23 | | #include <memory> |
24 | | #include <string> |
25 | | #include <vector> |
26 | | |
27 | | #include "common/config.h" |
28 | | #include "storage/rowset/rowset.h" |
29 | | #include "storage/rowset/rowset_meta.h" |
30 | | |
31 | | namespace doris { |
32 | | |
33 | | class Tablet; |
34 | | struct Version; |
35 | | |
36 | | inline constexpr std::string_view CUMULATIVE_SIZE_BASED_POLICY = "size_based"; |
37 | | |
38 | | /// This class CumulativeCompactionPolicy is the base class of cumulative compaction policy. |
39 | | /// It defines the policy to do cumulative compaction. It has different derived classes, which implements |
40 | | /// concrete cumulative compaction algorithm. The policy is configured by conf::cumulative_compaction_policy. |
41 | | /// The policy functions is the main steps to do cumulative compaction. For example, how to pick candidate |
42 | | /// rowsets from tablet using current policy, how to calculate the cumulative point and how to calculate |
43 | | /// the tablet cumulative compaction score and so on. |
44 | | class CumulativeCompactionPolicy { |
45 | | public: |
46 | | /// Constructor function of CumulativeCompactionPolicy, |
47 | | /// it needs tablet pointer to access tablet method. |
48 | | /// param tablet, the shared pointer of tablet |
49 | 595 | CumulativeCompactionPolicy() {} |
50 | | |
51 | | /// Destructor function of CumulativeCompactionPolicy. |
52 | 595 | virtual ~CumulativeCompactionPolicy() {} |
53 | | |
54 | | /// Calculate the cumulative compaction score of the tablet. This function uses rowsets meta and current |
55 | | /// cumulative point to calculative the score of tablet. The score depends on the concrete algorithm of policy. |
56 | | /// In general, the score represents the segments nums to do cumulative compaction in total rowsets. The more |
57 | | /// score tablet gets, the earlier it can do cumulative compaction. |
58 | | /// param all_rowsets, all rowsets in tablet. |
59 | | /// param current_cumulative_point, current cumulative point value. |
60 | | /// return score, the result score after calculate. |
61 | | virtual uint32_t calc_cumulative_compaction_score(Tablet* tablet) = 0; |
62 | | |
63 | | /// Pick input rowsets from candidate rowsets for compaction. This function is pure virtual function. |
64 | | /// Its implementation depends on concrete compaction policy. |
65 | | /// param candidate_rowsets, the candidate_rowsets vector container to pick input rowsets |
66 | | /// return input_rowsets, the vector container as return |
67 | | /// return last_delete_version, if has delete rowset, record the delete version from input_rowsets |
68 | | /// return compaction_score, calculate the compaction score of picked input rowset |
69 | | virtual int pick_input_rowsets(Tablet* tablet, |
70 | | const std::vector<RowsetSharedPtr>& candidate_rowsets, |
71 | | const int64_t max_compaction_score, |
72 | | const int64_t min_compaction_score, |
73 | | std::vector<RowsetSharedPtr>* input_rowsets, |
74 | | Version* last_delete_version, size_t* compaction_score, |
75 | | bool allow_delete = false) = 0; |
76 | | |
77 | | /// Update tablet's cumulative point after cumulative compaction finished. This function is pure virtual function. |
78 | | /// Each derived has its own update policy which depends on its concrete algorithm. When the cumulative point moves |
79 | | /// after output rowset, then output rowset will do base compaction next time. |
80 | | /// param input_rowsets, the picked input rowset to do compaction just now |
81 | | /// param output_rowset, the result rowset after compaction |
82 | | virtual void update_cumulative_point(Tablet* tablet, |
83 | | const std::vector<RowsetSharedPtr>& input_rowsets, |
84 | | RowsetSharedPtr output_rowset, |
85 | | Version& last_delete_version) = 0; |
86 | | |
87 | | /// Calculate tablet's cumulative point before compaction. This calculation just executes once when the tablet compacts |
88 | | /// first time after BE initialization and then motion of cumulative point depends on update_cumulative_point policy. |
89 | | /// This function is pure virtual function. In general, the cumulative point splits the rowsets into two parts: |
90 | | /// base rowsets, cumulative rowsets. |
91 | | /// param all_rowsets, all rowsets in the tablet |
92 | | /// param current_cumulative_point, current cumulative position |
93 | | /// return cumulative_point, the result of calculating cumulative point position |
94 | | virtual void calculate_cumulative_point(Tablet* tablet, |
95 | | const RowsetMetaMapContainer& all_rowsets, |
96 | | int64_t current_cumulative_point, |
97 | | int64_t* cumulative_point) = 0; |
98 | | |
99 | | // Updates the compaction level of a tablet after a compaction operation. |
100 | | virtual int64_t get_compaction_level(Tablet* tablet, |
101 | | const std::vector<RowsetSharedPtr>& input_rowsets, |
102 | | RowsetSharedPtr output_rowset) = 0; |
103 | | |
104 | | /// Fetch cumulative policy name |
105 | | virtual std::string_view name() = 0; |
106 | | }; |
107 | | |
108 | | /// SizeBased cumulative compaction policy implementation. SizeBased policy which derives CumulativeCompactionPolicy is a optimized |
109 | | /// version of num based cumulative compaction policy. This policy also uses linear structure to compact rowsets. The cumulative rowsets |
110 | | /// can do compaction when they are in same level size. And when output rowset exceeds the promotion radio of base size or min promotion |
111 | | /// size, it will do base compaction. This policy is targeting the use cases requiring lower write amplification, trading off read |
112 | | /// amplification and space amplification. |
113 | | class SizeBasedCumulativeCompactionPolicy final : public CumulativeCompactionPolicy { |
114 | | public: |
115 | | /// Constructor function of SizeBasedCumulativeCompactionPolicy, |
116 | | /// it needs tablet pointer to access tablet method. |
117 | | /// param tablet, the shared pointer of tablet |
118 | | SizeBasedCumulativeCompactionPolicy( |
119 | | int64_t promotion_size = config::compaction_promotion_size_mbytes * 1024 * 1024, |
120 | | double promotion_ratio = config::compaction_promotion_ratio, |
121 | | int64_t promotion_min_size = config::compaction_promotion_min_size_mbytes * 1024 * 1024, |
122 | | int64_t promotion_version_count = config::compaction_promotion_version_count, |
123 | | int64_t compaction_min_size = config::compaction_min_size_mbytes * 1024 * 1024); |
124 | | |
125 | | /// Destructor function of SizeBasedCumulativeCompactionPolicy. |
126 | 580 | ~SizeBasedCumulativeCompactionPolicy() {} |
127 | | |
128 | | /// SizeBased cumulative compaction policy implements calculate cumulative point function. |
129 | | /// When the first time the tablet does compact, this calculation is executed. Its main policy is to find first rowset |
130 | | /// which does not satisfied the promotion conditions. |
131 | | void calculate_cumulative_point(Tablet* tablet, const RowsetMetaMapContainer& all_rowsets, |
132 | | int64_t current_cumulative_point, |
133 | | int64_t* cumulative_point) override; |
134 | | |
135 | | /// SizeBased cumulative compaction policy implements pick input rowsets function. |
136 | | /// Its main policy is picking rowsets from candidate rowsets by comparing accumulative compaction_score, |
137 | | /// max_cumulative_compaction_num_singleton_deltas or checking whether there is delete version rowset, |
138 | | /// and choose those rowset in the same level to do cumulative compaction. |
139 | | int pick_input_rowsets(Tablet* tablet, const std::vector<RowsetSharedPtr>& candidate_rowsets, |
140 | | const int64_t max_compaction_score, const int64_t min_compaction_score, |
141 | | std::vector<RowsetSharedPtr>* input_rowsets, |
142 | | Version* last_delete_version, size_t* compaction_score, |
143 | | bool allow_delete = false) override; |
144 | | |
145 | | /// SizeBased cumulative compaction policy implements update cumulative point function. |
146 | | /// Its main policy is judging the output rowset size whether satisfied the promotion size. |
147 | | /// If it satisfied, this policy will update the cumulative point. |
148 | | void update_cumulative_point(Tablet* tablet, const std::vector<RowsetSharedPtr>& input_rowsets, |
149 | | RowsetSharedPtr _output_rowset, |
150 | | Version& last_delete_version) override; |
151 | | |
152 | | /// Num based cumulative compaction policy implements calc cumulative compaction score function. |
153 | | /// Its main policy is calculating the accumulative compaction score after current cumulative_point in tablet. |
154 | | uint32_t calc_cumulative_compaction_score(Tablet* tablet) override; |
155 | | |
156 | | int64_t get_compaction_level(Tablet* tablet, const std::vector<RowsetSharedPtr>& input_rowsets, |
157 | 0 | RowsetSharedPtr output_rowset) override { |
158 | 0 | return 0; |
159 | 0 | } |
160 | | |
161 | 0 | std::string_view name() override { return CUMULATIVE_SIZE_BASED_POLICY; } |
162 | | |
163 | | private: |
164 | | /// calculate promotion size using current base rowset meta size and promotion configs |
165 | | void _calc_promotion_size(Tablet* tablet, RowsetMetaSharedPtr base_rowset_meta, |
166 | | int64_t* promotion_size); |
167 | | |
168 | | /// calculate the disk size belong to which level, the level is divide by power of 2 |
169 | | /// between compaction_promotion_size_mbytes and 1KB |
170 | | int64_t _level_size(const int64_t size); |
171 | | |
172 | | /// when policy calculate cumulative_compaction_score, update promotion size at the same time |
173 | | void _refresh_tablet_promotion_size(Tablet* tablet, int64_t promotion_size); |
174 | | |
175 | | private: |
176 | | /// cumulative compaction promotion size, unit is byte. |
177 | | int64_t _promotion_size; |
178 | | /// cumulative compaction promotion ratio of base rowset total disk size. |
179 | | double _promotion_ratio; |
180 | | /// cumulative compaction promotion min size, unit is byte. |
181 | | int64_t _promotion_min_size; |
182 | | // cululative compaction promotion version count, only works for unique key MoW table |
183 | | int64_t _promotion_version_count; |
184 | | /// lower bound size to do compaction compaction. |
185 | | int64_t _compaction_min_size; |
186 | | }; |
187 | | |
188 | | /// The factory of CumulativeCompactionPolicy, it can product different policy according to the `policy` parameter. |
189 | | class CumulativeCompactionPolicyFactory { |
190 | | public: |
191 | | /// Static factory function. It can product different policy according to the `policy` parameter and use tablet ptr |
192 | | /// to construct the policy. Now it can product size based and num based policies. |
193 | | static std::shared_ptr<CumulativeCompactionPolicy> create_cumulative_compaction_policy( |
194 | | const std::string_view& compaction_policy); |
195 | | }; |
196 | | |
197 | | } // namespace doris |