be/src/storage/tablet/tablet_meta.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/tablet/tablet_meta.h" |
19 | | |
20 | | #include <bvar/bvar.h> |
21 | | #include <gen_cpp/Descriptors_types.h> |
22 | | #include <gen_cpp/FrontendService_types.h> |
23 | | #include <gen_cpp/Types_types.h> |
24 | | #include <gen_cpp/olap_common.pb.h> |
25 | | #include <gen_cpp/olap_file.pb.h> |
26 | | #include <gen_cpp/segment_v2.pb.h> |
27 | | #include <gen_cpp/types.pb.h> |
28 | | #include <json2pb/pb_to_json.h> |
29 | | #include <time.h> |
30 | | |
31 | | #include <cstdint> |
32 | | #include <memory> |
33 | | #include <random> |
34 | | #include <set> |
35 | | #include <utility> |
36 | | |
37 | | #include "cloud/cloud_meta_mgr.h" |
38 | | #include "cloud/cloud_storage_engine.h" |
39 | | #include "cloud/config.h" |
40 | | #include "common/config.h" |
41 | | #include "io/fs/file_writer.h" |
42 | | #include "io/fs/local_file_system.h" |
43 | | #include "storage/data_dir.h" |
44 | | #include "storage/file_header.h" |
45 | | #include "storage/olap_common.h" |
46 | | #include "storage/olap_define.h" |
47 | | #include "storage/rowset/rowset.h" |
48 | | #include "storage/rowset/rowset_meta_manager.h" |
49 | | #include "storage/tablet/tablet_fwd.h" |
50 | | #include "storage/tablet/tablet_meta_manager.h" |
51 | | #include "storage/tablet/tablet_schema_cache.h" |
52 | | #include "storage/utils.h" |
53 | | #include "util/debug_points.h" |
54 | | #include "util/lru_cache.h" |
55 | | #include "util/mem_info.h" |
56 | | #include "util/parse_util.h" |
57 | | #include "util/string_util.h" |
58 | | #include "util/time.h" |
59 | | #include "util/uid_util.h" |
60 | | |
61 | | using std::string; |
62 | | using std::unordered_map; |
63 | | using std::vector; |
64 | | |
65 | | namespace doris { |
66 | | using namespace ErrorCode; |
67 | | |
68 | | bvar::Adder<uint64_t> g_contains_agg_with_cache_if_eligible_total( |
69 | | "g_contains_agg_with_cache_if_eligible_total"); |
70 | | bvar::Adder<uint64_t> g_contains_agg_with_cache_if_eligible_partial_hit( |
71 | | "g_contains_agg_with_cache_if_eligible_partial_hit"); |
72 | | bvar::Adder<uint64_t> g_contains_agg_with_cache_if_eligible_full_hit( |
73 | | "g_contains_agg_with_cache_if_eligible_full_hit"); |
74 | | bvar::Window<bvar::Adder<uint64_t>> g_contains_agg_with_cache_if_eligible_total_minute( |
75 | | "g_contains_agg_with_cache_if_eligible_total_1m", |
76 | | &g_contains_agg_with_cache_if_eligible_total, 60); |
77 | | bvar::Window<bvar::Adder<uint64_t>> g_contains_agg_with_cache_if_eligible_partial_hit_minute( |
78 | | "g_contains_agg_with_cache_if_eligible_partial_hit_1m", |
79 | | &g_contains_agg_with_cache_if_eligible_partial_hit, 60); |
80 | | bvar::Window<bvar::Adder<uint64_t>> g_contains_agg_with_cache_if_eligible_full_hit_minute( |
81 | | "g_contains_agg_with_cache_if_eligible_full_hit_1m", |
82 | | &g_contains_agg_with_cache_if_eligible_full_hit, 60); |
83 | | |
84 | | TabletMetaSharedPtr TabletMeta::create( |
85 | | const TCreateTabletReq& request, const TabletUid& tablet_uid, uint64_t shard_id, |
86 | | uint32_t next_unique_id, |
87 | 196 | const unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id) { |
88 | 196 | std::optional<TBinlogConfig> binlog_config; |
89 | 196 | if (request.__isset.binlog_config) { |
90 | 3 | binlog_config = request.binlog_config; |
91 | 3 | } |
92 | 196 | TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format = |
93 | 196 | request.inverted_index_file_storage_format; |
94 | | |
95 | | // We will discard this format. Don't make any further changes here. |
96 | 196 | if (request.__isset.inverted_index_storage_format) { |
97 | 196 | switch (request.inverted_index_storage_format) { |
98 | 0 | case TInvertedIndexStorageFormat::V1: |
99 | 0 | inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V1; |
100 | 0 | break; |
101 | 0 | case TInvertedIndexStorageFormat::V2: |
102 | 0 | inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V2; |
103 | 0 | break; |
104 | 196 | default: |
105 | 196 | break; |
106 | 196 | } |
107 | 196 | } |
108 | | // Decide storage format for this tablet. DEFAULT / not-set fall back to V2 on BE side. |
109 | 196 | TStorageFormat::type storage_format = |
110 | 196 | request.__isset.storage_format ? request.storage_format : TStorageFormat::V2; |
111 | 196 | return std::make_shared<TabletMeta>( |
112 | 196 | request.table_id, request.partition_id, request.tablet_id, request.replica_id, |
113 | 196 | request.tablet_schema.schema_hash, shard_id, request.tablet_schema, next_unique_id, |
114 | 196 | col_ordinal_to_unique_id, tablet_uid, |
115 | 196 | request.__isset.tablet_type ? request.tablet_type : TTabletType::TABLET_TYPE_DISK, |
116 | 196 | request.__isset.compression_type ? request.compression_type : TCompressionType::LZ4F, |
117 | 196 | request.__isset.storage_policy_id ? request.storage_policy_id : -1, |
118 | 196 | request.__isset.enable_unique_key_merge_on_write |
119 | 196 | ? request.enable_unique_key_merge_on_write |
120 | 196 | : false, |
121 | 196 | std::move(binlog_config), request.compaction_policy, |
122 | 196 | request.time_series_compaction_goal_size_mbytes, |
123 | 196 | request.time_series_compaction_file_count_threshold, |
124 | 196 | request.time_series_compaction_time_threshold_seconds, |
125 | 196 | request.time_series_compaction_empty_rowsets_threshold, |
126 | 196 | request.time_series_compaction_level_threshold, inverted_index_file_storage_format, |
127 | 196 | request.tde_algorithm, storage_format, |
128 | 196 | request.__isset.vertical_compaction_num_columns_per_group |
129 | 196 | ? request.vertical_compaction_num_columns_per_group |
130 | 196 | : 5, |
131 | 196 | request.__isset.row_binlog_schema ? &request.row_binlog_schema : nullptr); |
132 | 196 | } |
133 | | |
134 | 240k | TabletMeta::~TabletMeta() { |
135 | 240k | if (_handle) { |
136 | 239k | TabletSchemaCache::instance()->release(_handle); |
137 | 239k | } |
138 | 240k | } |
139 | | |
140 | | TabletMeta::TabletMeta() |
141 | 478k | : _tablet_uid(0, 0), |
142 | 478k | _schema(new TabletSchema), |
143 | 478k | _delete_bitmap(new DeleteBitmap(_tablet_id)), |
144 | 478k | _binlog_delvec(new DeleteBitmap(_tablet_id)) {} |
145 | | |
146 | | TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, |
147 | | int64_t replica_id, int32_t schema_hash, int32_t shard_id, |
148 | | const TTabletSchema& tablet_schema, uint32_t next_unique_id, |
149 | | const std::unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id, |
150 | | TabletUid tablet_uid, TTabletType::type tabletType, |
151 | | TCompressionType::type compression_type, int64_t storage_policy_id, |
152 | | bool enable_unique_key_merge_on_write, |
153 | | std::optional<TBinlogConfig> binlog_config, std::string compaction_policy, |
154 | | int64_t time_series_compaction_goal_size_mbytes, |
155 | | int64_t time_series_compaction_file_count_threshold, |
156 | | int64_t time_series_compaction_time_threshold_seconds, |
157 | | int64_t time_series_compaction_empty_rowsets_threshold, |
158 | | int64_t time_series_compaction_level_threshold, |
159 | | TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format, |
160 | | TEncryptionAlgorithm::type tde_algorithm, |
161 | | TStorageFormat::type storage_format, |
162 | | int32_t vertical_compaction_num_columns_per_group, |
163 | | const TTabletSchema* row_binlog_schema) |
164 | 575 | : _tablet_uid(0, 0), |
165 | 575 | _schema(new TabletSchema), |
166 | 575 | _delete_bitmap(new DeleteBitmap(tablet_id)), |
167 | 575 | _binlog_delvec(new DeleteBitmap(tablet_id)), |
168 | 575 | _storage_format(storage_format) { |
169 | 575 | TabletMetaPB tablet_meta_pb; |
170 | 575 | tablet_meta_pb.set_table_id(table_id); |
171 | 575 | tablet_meta_pb.set_partition_id(partition_id); |
172 | 575 | tablet_meta_pb.set_tablet_id(tablet_id); |
173 | 575 | tablet_meta_pb.set_replica_id(replica_id); |
174 | 575 | tablet_meta_pb.set_schema_hash(schema_hash); |
175 | 575 | tablet_meta_pb.set_shard_id(shard_id); |
176 | | // Persist the creation time, but it is not used |
177 | 575 | tablet_meta_pb.set_creation_time(time(nullptr)); |
178 | 575 | tablet_meta_pb.set_cumulative_layer_point(-1); |
179 | 575 | tablet_meta_pb.set_tablet_state(PB_RUNNING); |
180 | 575 | *(tablet_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto(); |
181 | 575 | tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_DISK |
182 | 575 | ? TabletTypePB::TABLET_TYPE_DISK |
183 | 575 | : TabletTypePB::TABLET_TYPE_MEMORY); |
184 | 575 | tablet_meta_pb.set_enable_unique_key_merge_on_write(enable_unique_key_merge_on_write); |
185 | 575 | tablet_meta_pb.set_storage_policy_id(storage_policy_id); |
186 | 575 | tablet_meta_pb.set_compaction_policy(compaction_policy); |
187 | 575 | tablet_meta_pb.set_time_series_compaction_goal_size_mbytes( |
188 | 575 | time_series_compaction_goal_size_mbytes); |
189 | 575 | tablet_meta_pb.set_time_series_compaction_file_count_threshold( |
190 | 575 | time_series_compaction_file_count_threshold); |
191 | 575 | tablet_meta_pb.set_time_series_compaction_time_threshold_seconds( |
192 | 575 | time_series_compaction_time_threshold_seconds); |
193 | 575 | tablet_meta_pb.set_time_series_compaction_empty_rowsets_threshold( |
194 | 575 | time_series_compaction_empty_rowsets_threshold); |
195 | 575 | tablet_meta_pb.set_time_series_compaction_level_threshold( |
196 | 575 | time_series_compaction_level_threshold); |
197 | 575 | tablet_meta_pb.set_vertical_compaction_num_columns_per_group( |
198 | 575 | vertical_compaction_num_columns_per_group); |
199 | 575 | SchemaCreateOptions schema_create_options_for_data = { |
200 | 575 | .col_ordinal_to_unique_id = col_ordinal_to_unique_id, |
201 | 575 | .compression_type = compression_type, |
202 | 575 | .inverted_index_file_storage_format = inverted_index_file_storage_format, |
203 | 575 | .next_unique_id = next_unique_id}; |
204 | 575 | TabletSchemaPB* schema_pb_for_data = tablet_meta_pb.mutable_schema(); |
205 | 575 | init_schema_from_thrift(tablet_schema, schema_create_options_for_data, schema_pb_for_data); |
206 | | |
207 | 575 | tablet_meta_pb.set_in_restore_mode(false); |
208 | | |
209 | 575 | TabletSchemaPB* schema_pb_for_row_binlog = nullptr; |
210 | 575 | if (row_binlog_schema != nullptr) { |
211 | 3 | tablet_meta_pb.set_row_binlog_schema_hash(row_binlog_schema->schema_hash); |
212 | 3 | DCHECK(binlog_config.has_value()); |
213 | 3 | DCHECK(binlog_config->enable && binlog_config->binlog_format == TBinlogFormat::ROW); |
214 | | |
215 | 3 | std::unordered_map<uint32_t, uint32_t> row_binlog_col_ordinal_to_unique_id; |
216 | 3 | uint32_t row_binlog_next_unique_id = 0; |
217 | 21 | for (uint32_t col_ordinal = 0; col_ordinal < row_binlog_schema->columns.size(); |
218 | 18 | ++col_ordinal) { |
219 | 18 | const auto& tcolumn = row_binlog_schema->columns[col_ordinal]; |
220 | 18 | uint32_t unique_id = 0; |
221 | 18 | if (tcolumn.col_unique_id >= 0) { |
222 | 0 | unique_id = tcolumn.col_unique_id; |
223 | 18 | } else { |
224 | 18 | unique_id = col_ordinal; |
225 | 18 | } |
226 | 18 | row_binlog_col_ordinal_to_unique_id[col_ordinal] = unique_id; |
227 | 18 | if (row_binlog_next_unique_id <= unique_id) { |
228 | 18 | row_binlog_next_unique_id = unique_id + 1; |
229 | 18 | } |
230 | 18 | } |
231 | | |
232 | 3 | SchemaCreateOptions schema_create_options_for_row_binlog = { |
233 | 3 | .col_ordinal_to_unique_id = row_binlog_col_ordinal_to_unique_id, |
234 | 3 | .compression_type = compression_type, |
235 | 3 | .inverted_index_file_storage_format = inverted_index_file_storage_format, |
236 | 3 | .next_unique_id = row_binlog_next_unique_id}; |
237 | 3 | schema_pb_for_row_binlog = tablet_meta_pb.mutable_row_binlog_schema(); |
238 | 3 | init_schema_from_thrift(*row_binlog_schema, schema_create_options_for_row_binlog, |
239 | 3 | schema_pb_for_row_binlog); |
240 | 3 | } |
241 | 575 | if (binlog_config.has_value()) { |
242 | 3 | BinlogConfig tmp_binlog_config; |
243 | 3 | tmp_binlog_config = binlog_config.value(); |
244 | 3 | tmp_binlog_config.to_pb(tablet_meta_pb.mutable_binlog_config()); |
245 | 3 | } |
246 | | |
247 | 575 | switch (tde_algorithm) { |
248 | 0 | case doris::TEncryptionAlgorithm::AES256: |
249 | 0 | tablet_meta_pb.set_encryption_algorithm(EncryptionAlgorithmPB::AES_256_CTR); |
250 | 0 | break; |
251 | 0 | case doris::TEncryptionAlgorithm::SM4: |
252 | 0 | tablet_meta_pb.set_encryption_algorithm(EncryptionAlgorithmPB::SM4_128_CTR); |
253 | 0 | break; |
254 | 575 | default: |
255 | 575 | tablet_meta_pb.set_encryption_algorithm(EncryptionAlgorithmPB::PLAINTEXT); |
256 | 575 | } |
257 | | |
258 | | // Initialize default external ColumnMeta usage according to storage format. |
259 | | // V2: legacy behavior, inline ColumnMetaPB only. |
260 | | // V3: V2 + external ColumnMetaPB (CMO) enabled by default. |
261 | 575 | switch (_storage_format) { |
262 | 575 | case TStorageFormat::V2: |
263 | 575 | case TStorageFormat::DEFAULT: |
264 | 575 | case TStorageFormat::V1: |
265 | 575 | break; |
266 | 0 | case TStorageFormat::V3: |
267 | 0 | schema_pb_for_data->set_storage_format(TabletStorageFormatPB::TABLET_STORAGE_FORMAT_V3); |
268 | 0 | _schema->set_storage_format(TabletStorageFormatPB::TABLET_STORAGE_FORMAT_V3); |
269 | 0 | if (schema_pb_for_row_binlog != nullptr) { |
270 | 0 | schema_pb_for_row_binlog->set_storage_format( |
271 | 0 | TabletStorageFormatPB::TABLET_STORAGE_FORMAT_V3); |
272 | 0 | } |
273 | 0 | break; |
274 | 0 | default: |
275 | 0 | break; |
276 | 575 | } |
277 | | |
278 | 575 | init_from_pb(tablet_meta_pb); |
279 | 575 | } |
280 | | |
281 | | TabletMeta::TabletMeta(const TabletMeta& b) |
282 | 1.32k | : MetadataAdder(b), |
283 | 1.32k | _table_id(b._table_id), |
284 | 1.32k | _index_id(b._index_id), |
285 | 1.32k | _partition_id(b._partition_id), |
286 | 1.32k | _tablet_id(b._tablet_id), |
287 | 1.32k | _replica_id(b._replica_id), |
288 | 1.32k | _schema_hash(b._schema_hash), |
289 | 1.32k | _shard_id(b._shard_id), |
290 | 1.32k | _creation_time(b._creation_time), |
291 | 1.32k | _cumulative_layer_point(b._cumulative_layer_point), |
292 | 1.32k | _tablet_uid(b._tablet_uid), |
293 | 1.32k | _tablet_type(b._tablet_type), |
294 | 1.32k | _tablet_state(b._tablet_state), |
295 | 1.32k | _schema(b._schema), |
296 | 1.32k | _rs_metas(b._rs_metas), |
297 | 1.32k | _stale_rs_metas(b._stale_rs_metas), |
298 | 1.32k | _in_restore_mode(b._in_restore_mode), |
299 | 1.32k | _preferred_rowset_type(b._preferred_rowset_type), |
300 | 1.32k | _storage_policy_id(b._storage_policy_id), |
301 | 1.32k | _cooldown_meta_id(b._cooldown_meta_id), |
302 | 1.32k | _enable_unique_key_merge_on_write(b._enable_unique_key_merge_on_write), |
303 | 1.32k | _delete_bitmap(b._delete_bitmap), |
304 | 1.32k | _binlog_delvec(b._binlog_delvec), |
305 | 1.32k | _row_binlog_schema_hash(b._row_binlog_schema_hash), |
306 | 1.32k | _row_binlog_schema(b._row_binlog_schema), |
307 | 1.32k | _row_binlog_rs_metas(b._row_binlog_rs_metas), |
308 | 1.32k | _binlog_config(b._binlog_config), |
309 | 1.32k | _compaction_policy(b._compaction_policy), |
310 | 1.32k | _time_series_compaction_goal_size_mbytes(b._time_series_compaction_goal_size_mbytes), |
311 | | _time_series_compaction_file_count_threshold( |
312 | 1.32k | b._time_series_compaction_file_count_threshold), |
313 | | _time_series_compaction_time_threshold_seconds( |
314 | 1.32k | b._time_series_compaction_time_threshold_seconds), |
315 | | _time_series_compaction_empty_rowsets_threshold( |
316 | 1.32k | b._time_series_compaction_empty_rowsets_threshold), |
317 | 1.32k | _time_series_compaction_level_threshold(b._time_series_compaction_level_threshold), |
318 | | _vertical_compaction_num_columns_per_group( |
319 | 1.32k | b._vertical_compaction_num_columns_per_group) {}; |
320 | | |
321 | | void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, |
322 | 18.7M | ColumnPB* column) { |
323 | 18.7M | column->set_unique_id(unique_id); |
324 | 18.7M | column->set_name(tcolumn.column_name); |
325 | 18.7M | column->set_is_auto_increment(tcolumn.is_auto_increment); |
326 | 18.7M | if (tcolumn.__isset.is_on_update_current_timestamp) { |
327 | 18.7M | column->set_is_on_update_current_timestamp(tcolumn.is_on_update_current_timestamp); |
328 | 18.7M | } |
329 | 18.7M | string data_type; |
330 | 18.7M | EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type); |
331 | 18.7M | column->set_type(data_type); |
332 | | |
333 | 18.7M | uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type, |
334 | 18.7M | tcolumn.column_type.len); |
335 | 18.7M | column->set_length(length); |
336 | 18.7M | column->set_index_length(length); |
337 | 18.7M | column->set_precision(tcolumn.column_type.precision); |
338 | 18.7M | column->set_frac(tcolumn.column_type.scale); |
339 | | |
340 | 18.7M | if (tcolumn.__isset.result_is_nullable) { |
341 | 2.92k | column->set_result_is_nullable(tcolumn.result_is_nullable); |
342 | 2.92k | } |
343 | | |
344 | 18.7M | if (tcolumn.__isset.be_exec_version) { |
345 | 18.7M | column->set_be_exec_version(tcolumn.be_exec_version); |
346 | 18.7M | } |
347 | | |
348 | 18.7M | if (tcolumn.column_type.type == TPrimitiveType::VARCHAR || |
349 | 18.7M | tcolumn.column_type.type == TPrimitiveType::STRING) { |
350 | 6.66M | if (!tcolumn.column_type.__isset.index_len) { |
351 | 109 | column->set_index_length(10); |
352 | 6.66M | } else { |
353 | 6.66M | column->set_index_length(tcolumn.column_type.index_len); |
354 | 6.66M | } |
355 | 6.66M | } |
356 | 18.7M | if (!tcolumn.is_key) { |
357 | 13.7M | column->set_is_key(false); |
358 | 13.7M | if (tcolumn.__isset.aggregation) { |
359 | 2.93k | column->set_aggregation(tcolumn.aggregation); |
360 | 13.7M | } else { |
361 | 13.7M | string aggregation_type; |
362 | 13.7M | EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type); |
363 | 13.7M | column->set_aggregation(aggregation_type); |
364 | 13.7M | } |
365 | 13.7M | } else { |
366 | 4.91M | column->set_is_key(true); |
367 | 4.91M | column->set_aggregation("NONE"); |
368 | 4.91M | } |
369 | 18.7M | column->set_is_nullable(tcolumn.is_allow_null); |
370 | 18.7M | if (tcolumn.__isset.default_value) { |
371 | 1.61M | column->set_default_value(tcolumn.default_value); |
372 | 1.61M | } |
373 | 18.7M | if (tcolumn.__isset.is_bloom_filter_column) { |
374 | 11.1k | column->set_is_bf_column(tcolumn.is_bloom_filter_column); |
375 | 11.1k | } |
376 | 18.7M | if (tcolumn.__isset.visible) { |
377 | 18.7M | column->set_visible(tcolumn.visible); |
378 | 18.7M | } |
379 | 21.0M | for (size_t i = 0; i < tcolumn.children_column.size(); i++) { |
380 | 2.37M | ColumnPB* children_column = column->add_children_columns(); |
381 | 2.37M | init_column_from_tcolumn(tcolumn.children_column[i].col_unique_id, |
382 | 2.37M | tcolumn.children_column[i], children_column); |
383 | 2.37M | } |
384 | 18.7M | if (tcolumn.column_type.__isset.variant_max_subcolumns_count) { |
385 | 18.7M | column->set_variant_max_subcolumns_count(tcolumn.column_type.variant_max_subcolumns_count); |
386 | 18.7M | } |
387 | 18.7M | if (tcolumn.__isset.pattern_type) { |
388 | 39.5k | switch (tcolumn.pattern_type) { |
389 | 1.62k | case TPatternType::MATCH_NAME: |
390 | 1.62k | column->set_pattern_type(PatternTypePB::MATCH_NAME); |
391 | 1.62k | break; |
392 | 37.8k | case TPatternType::MATCH_NAME_GLOB: |
393 | 37.8k | column->set_pattern_type(PatternTypePB::MATCH_NAME_GLOB); |
394 | 39.5k | } |
395 | 39.5k | } |
396 | 18.7M | if (tcolumn.__isset.variant_enable_typed_paths_to_sparse) { |
397 | 18.7M | column->set_variant_enable_typed_paths_to_sparse( |
398 | 18.7M | tcolumn.variant_enable_typed_paths_to_sparse); |
399 | 18.7M | } |
400 | 18.7M | if (tcolumn.__isset.variant_max_sparse_column_statistics_size) { |
401 | 18.7M | column->set_variant_max_sparse_column_statistics_size( |
402 | 18.7M | tcolumn.variant_max_sparse_column_statistics_size); |
403 | 18.7M | } |
404 | 18.7M | if (tcolumn.__isset.variant_sparse_hash_shard_count) { |
405 | 16.3M | column->set_variant_sparse_hash_shard_count(tcolumn.variant_sparse_hash_shard_count); |
406 | 16.3M | } |
407 | 18.7M | if (tcolumn.column_type.__isset.variant_enable_doc_mode) { |
408 | 18.7M | column->set_variant_enable_doc_mode(tcolumn.column_type.variant_enable_doc_mode); |
409 | 18.7M | } |
410 | 18.7M | if (tcolumn.__isset.variant_doc_materialization_min_rows) { |
411 | 16.4M | column->set_variant_doc_materialization_min_rows( |
412 | 16.4M | tcolumn.variant_doc_materialization_min_rows); |
413 | 16.4M | } |
414 | 18.7M | if (tcolumn.__isset.variant_doc_hash_shard_count) { |
415 | 16.4M | column->set_variant_doc_hash_shard_count(tcolumn.variant_doc_hash_shard_count); |
416 | 16.4M | } |
417 | 18.7M | if (tcolumn.__isset.variant_enable_nested_group) { |
418 | 16.4M | column->set_variant_enable_nested_group(tcolumn.variant_enable_nested_group); |
419 | 16.4M | } |
420 | 18.7M | } |
421 | | |
422 | | void TabletMeta::init_schema_from_thrift(const TTabletSchema& tablet_schema, |
423 | | const SchemaCreateOptions& schema_create_options, |
424 | 578 | TabletSchemaPB* tablet_schema_pb) { |
425 | 578 | const std::unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id = |
426 | 578 | schema_create_options.col_ordinal_to_unique_id; |
427 | 578 | TCompressionType::type compression_type = schema_create_options.compression_type; |
428 | 578 | TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format = |
429 | 578 | schema_create_options.inverted_index_file_storage_format; |
430 | 578 | uint32_t next_unique_id = schema_create_options.next_unique_id; |
431 | | |
432 | 578 | tablet_schema_pb->set_num_short_key_columns(tablet_schema.short_key_column_count); |
433 | 578 | tablet_schema_pb->set_num_rows_per_row_block(config::default_num_rows_per_column_file_block); |
434 | 578 | tablet_schema_pb->set_sequence_col_idx(tablet_schema.sequence_col_idx); |
435 | 578 | auto p_seq_map = tablet_schema_pb->mutable_seq_map(); // ColumnGroupsPB |
436 | 578 | for (auto& it : tablet_schema.seq_map) { // std::vector< ::doris::TColumnGroup> |
437 | 0 | uint32_t key = it.sequence_column; |
438 | 0 | ColumnGroupPB* cg_pb = p_seq_map->add_cg(); // ColumnGroupPB {key: {v1, v2, v3}} |
439 | 0 | cg_pb->set_sequence_column(key); |
440 | 0 | for (auto v : it.columns_in_group) { |
441 | 0 | cg_pb->add_columns_in_group(v); |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | 578 | switch (tablet_schema.keys_type) { |
446 | 54 | case TKeysType::DUP_KEYS: |
447 | 54 | tablet_schema_pb->set_keys_type(KeysType::DUP_KEYS); |
448 | 54 | break; |
449 | 194 | case TKeysType::UNIQUE_KEYS: |
450 | 194 | tablet_schema_pb->set_keys_type(KeysType::UNIQUE_KEYS); |
451 | 194 | break; |
452 | 69 | case TKeysType::AGG_KEYS: |
453 | 69 | tablet_schema_pb->set_keys_type(KeysType::AGG_KEYS); |
454 | 69 | break; |
455 | 261 | default: |
456 | 261 | LOG(WARNING) << "unknown tablet keys type"; |
457 | 261 | break; |
458 | 578 | } |
459 | | |
460 | | // compress_kind used to compress segment files |
461 | 578 | tablet_schema_pb->set_compress_kind(COMPRESS_LZ4); |
462 | | |
463 | | // compression_type used to compress segment page |
464 | 578 | switch (compression_type) { |
465 | 0 | case TCompressionType::NO_COMPRESSION: |
466 | 0 | tablet_schema_pb->set_compression_type(segment_v2::NO_COMPRESSION); |
467 | 0 | break; |
468 | 0 | case TCompressionType::SNAPPY: |
469 | 0 | tablet_schema_pb->set_compression_type(segment_v2::SNAPPY); |
470 | 0 | break; |
471 | 0 | case TCompressionType::LZ4: |
472 | 0 | tablet_schema_pb->set_compression_type(segment_v2::LZ4); |
473 | 0 | break; |
474 | 578 | case TCompressionType::LZ4F: |
475 | 578 | tablet_schema_pb->set_compression_type(segment_v2::LZ4F); |
476 | 578 | break; |
477 | 0 | case TCompressionType::ZLIB: |
478 | 0 | tablet_schema_pb->set_compression_type(segment_v2::ZLIB); |
479 | 0 | break; |
480 | 0 | case TCompressionType::ZSTD: |
481 | 0 | tablet_schema_pb->set_compression_type(segment_v2::ZSTD); |
482 | 0 | break; |
483 | 0 | default: |
484 | 0 | tablet_schema_pb->set_compression_type(segment_v2::LZ4F); |
485 | 0 | break; |
486 | 578 | } |
487 | | |
488 | 578 | switch (inverted_index_file_storage_format) { |
489 | 0 | case TInvertedIndexFileStorageFormat::V1: |
490 | 0 | tablet_schema_pb->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1); |
491 | 0 | break; |
492 | 578 | case TInvertedIndexFileStorageFormat::V2: |
493 | 578 | tablet_schema_pb->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); |
494 | 578 | break; |
495 | 0 | case TInvertedIndexFileStorageFormat::V3: |
496 | 0 | tablet_schema_pb->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3); |
497 | 0 | break; |
498 | 0 | default: |
499 | 0 | tablet_schema_pb->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3); |
500 | 0 | break; |
501 | 578 | } |
502 | | |
503 | 578 | switch (tablet_schema.sort_type) { |
504 | 0 | case TSortType::type::ZORDER: |
505 | 0 | tablet_schema_pb->set_sort_type(SortType::ZORDER); |
506 | 0 | break; |
507 | 578 | default: |
508 | 578 | tablet_schema_pb->set_sort_type(SortType::LEXICAL); |
509 | 578 | } |
510 | 578 | tablet_schema_pb->set_sort_col_num(tablet_schema.sort_col_num); |
511 | 578 | for (const auto& i : tablet_schema.cluster_key_uids) { |
512 | 2 | tablet_schema_pb->add_cluster_key_uids(i); |
513 | 2 | } |
514 | | |
515 | | // set column information |
516 | 578 | uint32_t col_ordinal = 0; |
517 | 578 | bool has_bf_columns = false; |
518 | 1.92k | for (TColumn tcolumn : tablet_schema.columns) { |
519 | 1.92k | ColumnPB* column = tablet_schema_pb->add_column(); |
520 | 1.92k | uint32_t unique_id = -1; |
521 | 1.92k | if (tcolumn.col_unique_id >= 0) { |
522 | 11 | unique_id = tcolumn.col_unique_id; |
523 | 1.91k | } else { |
524 | 1.91k | unique_id = col_ordinal_to_unique_id.at(col_ordinal); |
525 | 1.91k | } |
526 | 1.92k | col_ordinal++; |
527 | 1.92k | init_column_from_tcolumn(unique_id, tcolumn, column); |
528 | | |
529 | 1.92k | if (column->is_bf_column()) { |
530 | 0 | has_bf_columns = true; |
531 | 0 | } |
532 | | |
533 | 1.92k | if (tablet_schema.__isset.indexes) { |
534 | 2 | for (auto& index : tablet_schema.indexes) { |
535 | 2 | if (index.index_type == TIndexType::type::BLOOMFILTER || |
536 | 2 | index.index_type == TIndexType::type::NGRAM_BF) { |
537 | 0 | DCHECK_EQ(index.columns.size(), 1); |
538 | 0 | if (iequal(tcolumn.column_name, index.columns[0])) { |
539 | 0 | column->set_is_bf_column(true); |
540 | 0 | break; |
541 | 0 | } |
542 | 0 | } |
543 | 2 | } |
544 | 2 | } |
545 | 1.92k | } |
546 | | |
547 | | // copy index meta |
548 | 578 | if (tablet_schema.__isset.indexes) { |
549 | 1 | for (auto& index : tablet_schema.indexes) { |
550 | 1 | TabletIndexPB* index_pb = tablet_schema_pb->add_index(); |
551 | 1 | index_pb->set_index_id(index.index_id); |
552 | 1 | index_pb->set_index_name(index.index_name); |
553 | | // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side |
554 | | // get column unique id by name |
555 | 1 | for (auto column_name : index.columns) { |
556 | 2 | for (auto column : tablet_schema_pb->column()) { |
557 | 2 | if (iequal(column.name(), column_name)) { |
558 | 1 | index_pb->add_col_unique_id(column.unique_id()); |
559 | 1 | } |
560 | 2 | } |
561 | 1 | } |
562 | 1 | switch (index.index_type) { |
563 | 1 | case TIndexType::BITMAP: |
564 | 1 | index_pb->set_index_type(IndexType::BITMAP); |
565 | 1 | break; |
566 | 0 | case TIndexType::INVERTED: |
567 | 0 | index_pb->set_index_type(IndexType::INVERTED); |
568 | 0 | break; |
569 | 0 | case TIndexType::ANN: |
570 | 0 | index_pb->set_index_type(IndexType::ANN); |
571 | 0 | break; |
572 | 0 | case TIndexType::BLOOMFILTER: |
573 | 0 | index_pb->set_index_type(IndexType::BLOOMFILTER); |
574 | 0 | break; |
575 | 0 | case TIndexType::NGRAM_BF: |
576 | 0 | index_pb->set_index_type(IndexType::NGRAM_BF); |
577 | 0 | break; |
578 | 1 | } |
579 | | |
580 | 1 | if (index.__isset.properties) { |
581 | 0 | auto properties = index_pb->mutable_properties(); |
582 | 0 | for (auto kv : index.properties) { |
583 | 0 | (*properties)[kv.first] = kv.second; |
584 | 0 | } |
585 | 0 | } |
586 | 1 | } |
587 | 1 | } |
588 | | |
589 | 578 | tablet_schema_pb->set_next_column_unique_id(next_unique_id); |
590 | 578 | if (has_bf_columns && tablet_schema.__isset.bloom_filter_fpp) { |
591 | 0 | tablet_schema_pb->set_bf_fpp(tablet_schema.bloom_filter_fpp); |
592 | 0 | } |
593 | | |
594 | 578 | if (tablet_schema.__isset.is_in_memory) { |
595 | 0 | tablet_schema_pb->set_is_in_memory(tablet_schema.is_in_memory); |
596 | 0 | } |
597 | | |
598 | 578 | if (tablet_schema.__isset.disable_auto_compaction) { |
599 | 10 | tablet_schema_pb->set_disable_auto_compaction(tablet_schema.disable_auto_compaction); |
600 | 10 | } |
601 | | |
602 | | // Deprecated legacy flatten-nested switch. Distinct from variant_enable_nested_group. |
603 | 578 | if (tablet_schema.__isset.variant_enable_flatten_nested) { |
604 | 578 | tablet_schema_pb->set_enable_variant_flatten_nested( |
605 | 578 | tablet_schema.variant_enable_flatten_nested); |
606 | 578 | } |
607 | | |
608 | 578 | if (tablet_schema.__isset.delete_sign_idx) { |
609 | 578 | tablet_schema_pb->set_delete_sign_idx(tablet_schema.delete_sign_idx); |
610 | 578 | } |
611 | 578 | if (tablet_schema.__isset.store_row_column) { |
612 | 578 | tablet_schema_pb->set_store_row_column(tablet_schema.store_row_column); |
613 | 578 | } |
614 | 578 | if (tablet_schema.__isset.row_store_page_size) { |
615 | 578 | tablet_schema_pb->set_row_store_page_size(tablet_schema.row_store_page_size); |
616 | 578 | } |
617 | 578 | if (tablet_schema.__isset.storage_page_size) { |
618 | 578 | tablet_schema_pb->set_storage_page_size(tablet_schema.storage_page_size); |
619 | 578 | } |
620 | 578 | if (tablet_schema.__isset.storage_dict_page_size) { |
621 | 578 | tablet_schema_pb->set_storage_dict_page_size(tablet_schema.storage_dict_page_size); |
622 | 578 | } |
623 | 578 | if (tablet_schema.__isset.skip_write_index_on_load) { |
624 | 578 | tablet_schema_pb->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load); |
625 | 578 | } |
626 | 578 | if (tablet_schema.__isset.row_store_col_cids) { |
627 | 0 | tablet_schema_pb->mutable_row_store_column_unique_ids()->Add( |
628 | 0 | tablet_schema.row_store_col_cids.begin(), tablet_schema.row_store_col_cids.end()); |
629 | 0 | } |
630 | 578 | } |
631 | | |
632 | 53.0k | void TabletMeta::remove_rowset_delete_bitmap(const RowsetId& rowset_id, const Version& version) { |
633 | 53.0k | if (_enable_unique_key_merge_on_write) { |
634 | 29.9k | delete_bitmap().remove({rowset_id, 0, 0}, {rowset_id, UINT32_MAX, 0}); |
635 | 29.9k | if (config::enable_mow_verbose_log) { |
636 | 0 | LOG_INFO("delete rowset delete bitmap. tablet={}, rowset={}, version={}", tablet_id(), |
637 | 0 | rowset_id.to_string(), version.to_string()); |
638 | 0 | } |
639 | 29.9k | size_t rowset_cache_version_size = delete_bitmap().remove_rowset_cache_version(rowset_id); |
640 | 29.9k | _check_mow_rowset_cache_version_size(rowset_cache_version_size); |
641 | 29.9k | } |
642 | 53.0k | } |
643 | | |
644 | 4 | Status TabletMeta::create_from_file(const string& file_path) { |
645 | 4 | TabletMetaPB tablet_meta_pb; |
646 | 4 | RETURN_IF_ERROR(load_from_file(file_path, &tablet_meta_pb)); |
647 | 4 | init_from_pb(tablet_meta_pb); |
648 | 4 | return Status::OK(); |
649 | 4 | } |
650 | | |
651 | 14 | Status TabletMeta::load_from_file(const string& file_path, TabletMetaPB* tablet_meta_pb) { |
652 | 14 | FileHeader<TabletMetaPB> file_header(file_path); |
653 | | // In file_header.deserialize(), it validates file length, signature, checksum of protobuf. |
654 | 14 | RETURN_IF_ERROR(file_header.deserialize()); |
655 | 14 | try { |
656 | 14 | tablet_meta_pb->CopyFrom(file_header.message()); |
657 | 14 | } catch (const std::exception& e) { |
658 | 0 | LOG(WARNING) << "Failed to copy protocol buffer object: " << e.what() |
659 | 0 | << ", file=" << file_path; |
660 | 0 | return Status::Error<PARSE_PROTOBUF_ERROR>( |
661 | 0 | "fail to copy protocol buffer object. file={}, error={}", file_path, e.what()); |
662 | 0 | } |
663 | 14 | return Status::OK(); |
664 | 14 | } |
665 | | |
666 | 6 | Status TabletMeta::create_from_buffer(const uint8_t* buffer, size_t buffer_size) { |
667 | 6 | FileHeader<TabletMetaPB> file_header(""); // empty file path |
668 | 6 | RETURN_IF_ERROR(file_header.deserialize_from_memory(buffer, buffer_size)); |
669 | | |
670 | 2 | TabletMetaPB tablet_meta_pb; |
671 | 2 | try { |
672 | 2 | tablet_meta_pb.CopyFrom(file_header.message()); |
673 | 2 | } catch (const std::exception& e) { |
674 | 0 | LOG(WARNING) << "Failed to copy protocol buffer object from buffer: " << e.what(); |
675 | 0 | return Status::Error<ErrorCode::PARSE_PROTOBUF_ERROR>( |
676 | 0 | "fail to copy protocol buffer object from buffer. error={}", e.what()); |
677 | 0 | } |
678 | | |
679 | 2 | init_from_pb(tablet_meta_pb); |
680 | 2 | return Status::OK(); |
681 | 2 | } |
682 | | |
683 | | std::string TabletMeta::construct_header_file_path(const string& schema_hash_path, |
684 | 3 | int64_t tablet_id) { |
685 | 3 | std::stringstream header_name_stream; |
686 | 3 | header_name_stream << schema_hash_path << "/" << tablet_id << ".hdr"; |
687 | 3 | return header_name_stream.str(); |
688 | 3 | } |
689 | | |
690 | 0 | Status TabletMeta::save_as_json(const string& file_path) { |
691 | 0 | std::string json_meta; |
692 | 0 | json2pb::Pb2JsonOptions json_options; |
693 | 0 | json_options.pretty_json = true; |
694 | 0 | json_options.bytes_to_base64 = true; |
695 | 0 | to_json(&json_meta, json_options); |
696 | | // save to file |
697 | 0 | io::FileWriterPtr file_writer; |
698 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_file(file_path, &file_writer)); |
699 | 0 | RETURN_IF_ERROR(file_writer->append(json_meta)); |
700 | 0 | RETURN_IF_ERROR(file_writer->close()); |
701 | 0 | return Status::OK(); |
702 | 0 | } |
703 | | |
704 | 1.07k | Status TabletMeta::save(const string& file_path) { |
705 | 1.07k | TabletMetaPB tablet_meta_pb; |
706 | 1.07k | to_meta_pb(&tablet_meta_pb, false); |
707 | 1.07k | return TabletMeta::save(file_path, tablet_meta_pb); |
708 | 1.07k | } |
709 | | |
710 | 1.08k | Status TabletMeta::save(const string& file_path, const TabletMetaPB& tablet_meta_pb) { |
711 | 1.08k | DCHECK(!file_path.empty()); |
712 | 1.08k | FileHeader<TabletMetaPB> file_header(file_path); |
713 | 1.08k | try { |
714 | 1.08k | file_header.mutable_message()->CopyFrom(tablet_meta_pb); |
715 | 1.08k | } catch (...) { |
716 | 0 | LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path; |
717 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
718 | 0 | "fail to copy protocol buffer object. file={}", file_path); |
719 | 0 | } |
720 | 1.08k | RETURN_IF_ERROR(file_header.prepare()); |
721 | 1.08k | RETURN_IF_ERROR(file_header.serialize()); |
722 | 1.08k | return Status::OK(); |
723 | 1.08k | } |
724 | | |
725 | 1.99k | Status TabletMeta::save_meta(DataDir* data_dir) { |
726 | 1.99k | std::lock_guard<std::shared_mutex> wrlock(_meta_lock); |
727 | 1.99k | return _save_meta(data_dir); |
728 | 1.99k | } |
729 | | |
730 | 1.99k | Status TabletMeta::_save_meta(DataDir* data_dir) { |
731 | | // check if tablet uid is valid |
732 | 1.99k | if (_tablet_uid.hi == 0 && _tablet_uid.lo == 0) { |
733 | 0 | LOG(FATAL) << "tablet_uid is invalid" |
734 | 0 | << " tablet=" << tablet_id() << " _tablet_uid=" << _tablet_uid.to_string(); |
735 | 0 | } |
736 | 1.99k | string meta_binary; |
737 | | |
738 | 1.99k | auto t1 = MonotonicMicros(); |
739 | 1.99k | serialize(&meta_binary); |
740 | 1.99k | auto t2 = MonotonicMicros(); |
741 | 1.99k | Status status = TabletMetaManager::save(data_dir, tablet_id(), schema_hash(), meta_binary); |
742 | 1.99k | if (!status.ok()) { |
743 | 0 | LOG(FATAL) << "fail to save tablet_meta. status=" << status << ", tablet_id=" << tablet_id() |
744 | 0 | << ", schema_hash=" << schema_hash(); |
745 | 0 | } |
746 | 1.99k | auto t3 = MonotonicMicros(); |
747 | 1.99k | auto cost = t3 - t1; |
748 | 1.99k | if (cost > 1 * 1000 * 1000) { |
749 | 0 | LOG(INFO) << "save tablet(" << tablet_id() << ") meta too slow. serialize cost " << t2 - t1 |
750 | 0 | << "(us), serialized binary size: " << meta_binary.length() |
751 | 0 | << "(bytes), write rocksdb cost " << t3 - t2 << "(us)"; |
752 | 0 | } |
753 | 1.99k | return status; |
754 | 1.99k | } |
755 | | |
756 | 1.99k | void TabletMeta::serialize(string* meta_binary) { |
757 | 1.99k | TabletMetaPB tablet_meta_pb; |
758 | 1.99k | to_meta_pb(&tablet_meta_pb, false); |
759 | 1.99k | if (tablet_meta_pb.partition_id() <= 0) { |
760 | 244 | LOG(WARNING) << "invalid partition id " << tablet_meta_pb.partition_id() << " tablet " |
761 | 244 | << tablet_meta_pb.tablet_id(); |
762 | 244 | } |
763 | 1.99k | DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", { |
764 | 1.99k | long partition_id = tablet_meta_pb.partition_id(); |
765 | 1.99k | tablet_meta_pb.set_partition_id(0); |
766 | 1.99k | LOG(WARNING) << "set debug point TabletMeta::serialize::zero_partition_id old=" |
767 | 1.99k | << partition_id << " new=" << tablet_meta_pb.DebugString(); |
768 | 1.99k | }); |
769 | 1.99k | bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary); |
770 | 1.99k | if (!_rs_metas.empty() || !_stale_rs_metas.empty()) { |
771 | 1.99k | _avg_rs_meta_serialize_size = |
772 | 1.99k | meta_binary->length() / (_rs_metas.size() + _stale_rs_metas.size()); |
773 | 1.99k | if (meta_binary->length() > config::tablet_meta_serialize_size_limit || |
774 | 1.99k | !serialize_success) { |
775 | 0 | int64_t origin_meta_size = meta_binary->length(); |
776 | 0 | int64_t stale_rowsets_num = tablet_meta_pb.stale_rs_metas().size(); |
777 | 0 | tablet_meta_pb.clear_stale_rs_metas(); |
778 | 0 | meta_binary->clear(); |
779 | 0 | serialize_success = tablet_meta_pb.SerializeToString(meta_binary); |
780 | 0 | LOG(WARNING) << "tablet meta serialization size exceeds limit: " |
781 | 0 | << config::tablet_meta_serialize_size_limit |
782 | 0 | << " clean up stale rowsets, tablet id: " << tablet_id() |
783 | 0 | << " stale rowset num: " << stale_rowsets_num |
784 | 0 | << " serialization size before clean " << origin_meta_size |
785 | 0 | << " serialization size after clean " << meta_binary->length(); |
786 | 0 | } |
787 | 1.99k | } |
788 | | |
789 | 1.99k | if (!serialize_success) { |
790 | 0 | LOG(FATAL) << "failed to serialize meta " << tablet_id(); |
791 | 0 | } |
792 | 1.99k | } |
793 | | |
794 | 142k | Status TabletMeta::deserialize(std::string_view meta_binary) { |
795 | 142k | TabletMetaPB tablet_meta_pb; |
796 | 142k | bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(), |
797 | 142k | static_cast<int32_t>(meta_binary.size())); |
798 | 142k | if (!parsed) { |
799 | 0 | return Status::Error<INIT_FAILED>("parse tablet meta failed"); |
800 | 0 | } |
801 | 142k | init_from_pb(tablet_meta_pb); |
802 | 142k | return Status::OK(); |
803 | 142k | } |
804 | | |
805 | 478k | void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { |
806 | 478k | _table_id = tablet_meta_pb.table_id(); |
807 | 478k | _index_id = tablet_meta_pb.index_id(); |
808 | 478k | _partition_id = tablet_meta_pb.partition_id(); |
809 | 478k | _tablet_id = tablet_meta_pb.tablet_id(); |
810 | 478k | _replica_id = tablet_meta_pb.replica_id(); |
811 | 478k | _schema_hash = tablet_meta_pb.schema_hash(); |
812 | 478k | _shard_id = tablet_meta_pb.shard_id(); |
813 | 478k | _creation_time = tablet_meta_pb.creation_time(); |
814 | 478k | _cumulative_layer_point = tablet_meta_pb.cumulative_layer_point(); |
815 | 478k | _tablet_uid = TabletUid(tablet_meta_pb.tablet_uid()); |
816 | 478k | _ttl_seconds = tablet_meta_pb.ttl_seconds(); |
817 | 478k | if (tablet_meta_pb.has_tablet_type()) { |
818 | 478k | _tablet_type = tablet_meta_pb.tablet_type(); |
819 | 478k | } else { |
820 | 212 | _tablet_type = TabletTypePB::TABLET_TYPE_DISK; |
821 | 212 | } |
822 | | |
823 | | // init _tablet_state |
824 | 478k | switch (tablet_meta_pb.tablet_state()) { |
825 | 20.3k | case PB_NOTREADY: |
826 | 20.3k | _tablet_state = TabletState::TABLET_NOTREADY; |
827 | 20.3k | break; |
828 | 456k | case PB_RUNNING: |
829 | 456k | _tablet_state = TabletState::TABLET_RUNNING; |
830 | 456k | break; |
831 | 0 | case PB_TOMBSTONED: |
832 | 0 | _tablet_state = TabletState::TABLET_TOMBSTONED; |
833 | 0 | break; |
834 | 0 | case PB_STOPPED: |
835 | 0 | _tablet_state = TabletState::TABLET_STOPPED; |
836 | 0 | break; |
837 | 1.37k | case PB_SHUTDOWN: |
838 | 1.37k | _tablet_state = TabletState::TABLET_SHUTDOWN; |
839 | 1.37k | break; |
840 | 0 | default: |
841 | 0 | LOG(WARNING) << "tablet has no state. tablet=" << tablet_id() |
842 | 0 | << ", schema_hash=" << schema_hash(); |
843 | 478k | } |
844 | | |
845 | | // init _schema |
846 | 478k | TabletSchemaSPtr schema = std::make_shared<TabletSchema>(); |
847 | 478k | schema->init_from_pb(tablet_meta_pb.schema()); |
848 | 478k | if (_handle) { |
849 | 4 | TabletSchemaCache::instance()->release(_handle); |
850 | 4 | } |
851 | 478k | auto pair = TabletSchemaCache::instance()->insert(schema->to_key()); |
852 | 478k | _handle = pair.first; |
853 | 478k | _schema = pair.second; |
854 | | |
855 | 478k | if (tablet_meta_pb.has_row_binlog_schema()) { |
856 | 25 | TabletSchemaSPtr row_binlog_schema = std::make_shared<TabletSchema>(); |
857 | 25 | row_binlog_schema->init_from_pb(tablet_meta_pb.row_binlog_schema()); |
858 | 25 | _row_binlog_schema = std::move(row_binlog_schema); |
859 | 25 | _row_binlog_schema_hash = tablet_meta_pb.row_binlog_schema_hash(); |
860 | 25 | } |
861 | | |
862 | 479k | if (tablet_meta_pb.has_enable_unique_key_merge_on_write()) { |
863 | 479k | _enable_unique_key_merge_on_write = tablet_meta_pb.enable_unique_key_merge_on_write(); |
864 | 479k | _delete_bitmap->set_tablet_id(_tablet_id); |
865 | 479k | _binlog_delvec->set_tablet_id(_tablet_id); |
866 | 479k | } |
867 | | |
868 | | // init _rs_metas |
869 | 478k | for (auto& it : tablet_meta_pb.rs_metas()) { |
870 | 248k | RowsetMetaSharedPtr rs_meta(new RowsetMeta()); |
871 | 248k | rs_meta->init_from_pb(it); |
872 | 248k | _rs_metas.emplace(rs_meta->version(), rs_meta); |
873 | 248k | } |
874 | | |
875 | | // For mow table, delete bitmap of stale rowsets has not been persisted. |
876 | | // When be restart, query should not read the stale rowset, otherwise duplicate keys |
877 | | // will be read out. Therefore, we don't add them to _stale_rs_meta for mow table. |
878 | 479k | if (!config::skip_loading_stale_rowset_meta && !_enable_unique_key_merge_on_write) { |
879 | 363k | for (auto& it : tablet_meta_pb.stale_rs_metas()) { |
880 | 2.81k | RowsetMetaSharedPtr rs_meta(new RowsetMeta()); |
881 | 2.81k | rs_meta->init_from_pb(it); |
882 | 2.81k | _stale_rs_metas.emplace(rs_meta->version(), rs_meta); |
883 | 2.81k | } |
884 | 363k | } |
885 | | |
886 | 478k | for (auto& it : tablet_meta_pb.row_binlog_rs_metas()) { |
887 | 35 | RowsetMetaSharedPtr rs_meta(new RowsetMeta()); |
888 | 35 | rs_meta->init_from_pb(it); |
889 | 35 | _row_binlog_rs_metas.emplace(rs_meta->version(), rs_meta); |
890 | 35 | } |
891 | | |
892 | 479k | if (tablet_meta_pb.has_in_restore_mode()) { |
893 | 479k | _in_restore_mode = tablet_meta_pb.in_restore_mode(); |
894 | 479k | } |
895 | | |
896 | 478k | if (tablet_meta_pb.has_preferred_rowset_type()) { |
897 | 478k | _preferred_rowset_type = tablet_meta_pb.preferred_rowset_type(); |
898 | 478k | } |
899 | | |
900 | 478k | _storage_policy_id = tablet_meta_pb.storage_policy_id(); |
901 | 478k | if (tablet_meta_pb.has_cooldown_meta_id()) { |
902 | 335k | _cooldown_meta_id = tablet_meta_pb.cooldown_meta_id(); |
903 | 335k | } |
904 | | |
905 | 478k | if (tablet_meta_pb.has_delete_bitmap()) { |
906 | 26.0k | int rst_ids_size = tablet_meta_pb.delete_bitmap().rowset_ids_size(); |
907 | 26.0k | int seg_ids_size = tablet_meta_pb.delete_bitmap().segment_ids_size(); |
908 | 26.0k | int versions_size = tablet_meta_pb.delete_bitmap().versions_size(); |
909 | 26.0k | int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size(); |
910 | 26.0k | int binlog_mark_size = tablet_meta_pb.delete_bitmap().is_binlog_delvec_size(); |
911 | 26.0k | CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size && |
912 | 26.0k | seg_maps_size == versions_size); |
913 | 26.0k | CHECK(binlog_mark_size == 0 || binlog_mark_size == rst_ids_size); |
914 | 28.1k | for (int i = 0; i < rst_ids_size; ++i) { |
915 | 2.06k | RowsetId rst_id; |
916 | 2.06k | rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i)); |
917 | 2.06k | auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i); |
918 | 2.06k | auto ver = tablet_meta_pb.delete_bitmap().versions(i); |
919 | 2.06k | auto bitmap = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps(i).data(); |
920 | 2.06k | bool from_binlog = tablet_meta_pb.delete_bitmap().is_binlog_delvec_size() > 0 |
921 | 2.06k | ? tablet_meta_pb.delete_bitmap().is_binlog_delvec(i) |
922 | 2.06k | : false; |
923 | 2.06k | if (!from_binlog) { |
924 | 2.05k | delete_bitmap().delete_bitmap[{rst_id, seg_id, ver}] = |
925 | 2.05k | roaring::Roaring::read(bitmap); |
926 | 2.05k | } else { |
927 | 6 | binlog_delvec().delete_bitmap[{rst_id, seg_id, ver}] = |
928 | 6 | roaring::Roaring::read(bitmap); |
929 | 6 | } |
930 | 2.06k | } |
931 | 26.0k | } |
932 | | |
933 | 478k | if (tablet_meta_pb.has_binlog_config()) { |
934 | 142k | _binlog_config = tablet_meta_pb.binlog_config(); |
935 | 142k | } |
936 | 478k | _compaction_policy = tablet_meta_pb.compaction_policy(); |
937 | 478k | _time_series_compaction_goal_size_mbytes = |
938 | 478k | tablet_meta_pb.time_series_compaction_goal_size_mbytes(); |
939 | 478k | _time_series_compaction_file_count_threshold = |
940 | 478k | tablet_meta_pb.time_series_compaction_file_count_threshold(); |
941 | 478k | _time_series_compaction_time_threshold_seconds = |
942 | 478k | tablet_meta_pb.time_series_compaction_time_threshold_seconds(); |
943 | 478k | _time_series_compaction_empty_rowsets_threshold = |
944 | 478k | tablet_meta_pb.time_series_compaction_empty_rowsets_threshold(); |
945 | 478k | _time_series_compaction_level_threshold = |
946 | 478k | tablet_meta_pb.time_series_compaction_level_threshold(); |
947 | 478k | _vertical_compaction_num_columns_per_group = |
948 | 478k | tablet_meta_pb.vertical_compaction_num_columns_per_group(); |
949 | | |
950 | 478k | if (tablet_meta_pb.has_encryption_algorithm()) { |
951 | 478k | _encryption_algorithm = tablet_meta_pb.encryption_algorithm(); |
952 | 478k | } |
953 | | |
954 | 478k | if (tablet_meta_pb.has_row_binlog_schema_hash()) { |
955 | 25 | _row_binlog_schema_hash = tablet_meta_pb.row_binlog_schema_hash(); |
956 | 25 | } |
957 | 478k | } |
958 | | |
959 | 3.08k | void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb, bool cloud_get_rowset_meta) { |
960 | 3.08k | tablet_meta_pb->set_table_id(table_id()); |
961 | 3.08k | tablet_meta_pb->set_index_id(index_id()); |
962 | 3.08k | tablet_meta_pb->set_partition_id(partition_id()); |
963 | 3.08k | tablet_meta_pb->set_tablet_id(tablet_id()); |
964 | 3.08k | tablet_meta_pb->set_replica_id(replica_id()); |
965 | 3.08k | tablet_meta_pb->set_schema_hash(schema_hash()); |
966 | 3.08k | tablet_meta_pb->set_shard_id(shard_id()); |
967 | 3.08k | tablet_meta_pb->set_creation_time(creation_time()); |
968 | 3.08k | tablet_meta_pb->set_cumulative_layer_point(cumulative_layer_point()); |
969 | 3.08k | *(tablet_meta_pb->mutable_tablet_uid()) = tablet_uid().to_proto(); |
970 | 3.08k | tablet_meta_pb->set_tablet_type(_tablet_type); |
971 | 3.08k | tablet_meta_pb->set_ttl_seconds(_ttl_seconds); |
972 | 3.08k | switch (tablet_state()) { |
973 | 9 | case TABLET_NOTREADY: |
974 | 9 | tablet_meta_pb->set_tablet_state(PB_NOTREADY); |
975 | 9 | break; |
976 | 648 | case TABLET_RUNNING: |
977 | 648 | tablet_meta_pb->set_tablet_state(PB_RUNNING); |
978 | 648 | break; |
979 | 0 | case TABLET_TOMBSTONED: |
980 | 0 | tablet_meta_pb->set_tablet_state(PB_TOMBSTONED); |
981 | 0 | break; |
982 | 0 | case TABLET_STOPPED: |
983 | 0 | tablet_meta_pb->set_tablet_state(PB_STOPPED); |
984 | 0 | break; |
985 | 2.43k | case TABLET_SHUTDOWN: |
986 | 2.43k | tablet_meta_pb->set_tablet_state(PB_SHUTDOWN); |
987 | 2.43k | break; |
988 | 3.08k | } |
989 | | |
990 | | // RowsetMetaPB is separated from TabletMetaPB |
991 | 3.08k | if (!config::is_cloud_mode() || cloud_get_rowset_meta) { |
992 | 16.0k | for (const auto& [_, rs] : _rs_metas) { |
993 | 16.0k | rs->to_rowset_pb(tablet_meta_pb->add_rs_metas()); |
994 | 16.0k | } |
995 | 3.08k | for (const auto& [_, rs] : _stale_rs_metas) { |
996 | 244 | rs->to_rowset_pb(tablet_meta_pb->add_stale_rs_metas()); |
997 | 244 | } |
998 | 3.08k | for (const auto& [_, rs] : _row_binlog_rs_metas) { |
999 | 6 | rs->to_rowset_pb(tablet_meta_pb->add_row_binlog_rs_metas()); |
1000 | 6 | } |
1001 | 3.08k | } |
1002 | | |
1003 | 3.08k | _schema->to_schema_pb(tablet_meta_pb->mutable_schema()); |
1004 | | |
1005 | 3.08k | if (_row_binlog_schema != nullptr) { |
1006 | 6 | _row_binlog_schema->to_schema_pb(tablet_meta_pb->mutable_row_binlog_schema()); |
1007 | 6 | tablet_meta_pb->set_row_binlog_schema_hash(_row_binlog_schema_hash); |
1008 | 6 | } |
1009 | | |
1010 | 3.08k | tablet_meta_pb->set_in_restore_mode(in_restore_mode()); |
1011 | | |
1012 | | // to avoid modify tablet meta to the greatest extend |
1013 | 3.08k | if (_preferred_rowset_type == BETA_ROWSET) { |
1014 | 3.08k | tablet_meta_pb->set_preferred_rowset_type(_preferred_rowset_type); |
1015 | 3.08k | } |
1016 | 3.08k | if (_storage_policy_id > 0) { |
1017 | 5 | tablet_meta_pb->set_storage_policy_id(_storage_policy_id); |
1018 | 5 | } |
1019 | 3.08k | if (_cooldown_meta_id.initialized()) { |
1020 | 5 | tablet_meta_pb->mutable_cooldown_meta_id()->CopyFrom(_cooldown_meta_id.to_proto()); |
1021 | 5 | } |
1022 | | |
1023 | 3.08k | tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write); |
1024 | | |
1025 | 3.08k | if (_enable_unique_key_merge_on_write) { |
1026 | 220 | std::set<RowsetId> stale_rs_ids; |
1027 | 220 | for (const auto& [_, rowset] : _stale_rs_metas) { |
1028 | 194 | stale_rs_ids.insert(rowset->rowset_id()); |
1029 | 194 | } |
1030 | 220 | DeleteBitmapPB* delete_bitmap_pb = tablet_meta_pb->mutable_delete_bitmap(); |
1031 | 220 | for (auto& [id, bitmap] : delete_bitmap().snapshot().delete_bitmap) { |
1032 | 76 | auto& [rowset_id, segment_id, ver] = id; |
1033 | 76 | if (stale_rs_ids.count(rowset_id) != 0) { |
1034 | 26 | continue; |
1035 | 26 | } |
1036 | 50 | delete_bitmap_pb->add_rowset_ids(rowset_id.to_string()); |
1037 | 50 | delete_bitmap_pb->add_segment_ids(segment_id); |
1038 | 50 | delete_bitmap_pb->add_versions(ver); |
1039 | 50 | delete_bitmap_pb->add_is_binlog_delvec(false); |
1040 | 50 | std::string bitmap_data(bitmap.getSizeInBytes(), '\0'); |
1041 | 50 | bitmap.write(bitmap_data.data()); |
1042 | 50 | *(delete_bitmap_pb->add_segment_delete_bitmaps()) = std::move(bitmap_data); |
1043 | 50 | } |
1044 | | |
1045 | 220 | for (auto& [id, bitmap] : binlog_delvec().snapshot().delete_bitmap) { |
1046 | 0 | auto& [rowset_id, segment_id, ver] = id; |
1047 | 0 | delete_bitmap_pb->add_rowset_ids(rowset_id.to_string()); |
1048 | 0 | delete_bitmap_pb->add_segment_ids(segment_id); |
1049 | 0 | delete_bitmap_pb->add_versions(ver); |
1050 | 0 | delete_bitmap_pb->add_is_binlog_delvec(true); |
1051 | 0 | std::string bitmap_data(bitmap.getSizeInBytes(), '\0'); |
1052 | 0 | bitmap.write(bitmap_data.data()); |
1053 | 0 | *(delete_bitmap_pb->add_segment_delete_bitmaps()) = std::move(bitmap_data); |
1054 | 0 | } |
1055 | 220 | } |
1056 | 3.08k | _binlog_config.to_pb(tablet_meta_pb->mutable_binlog_config()); |
1057 | 3.08k | tablet_meta_pb->set_compaction_policy(compaction_policy()); |
1058 | 3.08k | tablet_meta_pb->set_time_series_compaction_goal_size_mbytes( |
1059 | 3.08k | time_series_compaction_goal_size_mbytes()); |
1060 | 3.08k | tablet_meta_pb->set_time_series_compaction_file_count_threshold( |
1061 | 3.08k | time_series_compaction_file_count_threshold()); |
1062 | 3.08k | tablet_meta_pb->set_time_series_compaction_time_threshold_seconds( |
1063 | 3.08k | time_series_compaction_time_threshold_seconds()); |
1064 | 3.08k | tablet_meta_pb->set_time_series_compaction_empty_rowsets_threshold( |
1065 | 3.08k | time_series_compaction_empty_rowsets_threshold()); |
1066 | 3.08k | tablet_meta_pb->set_time_series_compaction_level_threshold( |
1067 | 3.08k | time_series_compaction_level_threshold()); |
1068 | 3.08k | tablet_meta_pb->set_vertical_compaction_num_columns_per_group( |
1069 | 3.08k | vertical_compaction_num_columns_per_group()); |
1070 | | |
1071 | 3.08k | tablet_meta_pb->set_encryption_algorithm(_encryption_algorithm); |
1072 | 3.08k | } |
1073 | | |
1074 | 4 | void TabletMeta::to_json(string* json_string, json2pb::Pb2JsonOptions& options) { |
1075 | 4 | TabletMetaPB tablet_meta_pb; |
1076 | 4 | to_meta_pb(&tablet_meta_pb, false); |
1077 | 4 | json2pb::ProtoMessageToJson(tablet_meta_pb, json_string, options); |
1078 | 4 | } |
1079 | | |
1080 | 1.20M | Version TabletMeta::max_version() const { |
1081 | 1.20M | Version max_version = {-1, 0}; |
1082 | 3.93M | for (const auto& [_, rs_meta] : _rs_metas) { |
1083 | 3.93M | if (rs_meta->end_version() > max_version.second) { |
1084 | 1.40M | max_version = rs_meta->version(); |
1085 | 1.40M | } |
1086 | 3.93M | } |
1087 | 1.20M | return max_version; |
1088 | 1.20M | } |
1089 | | |
1090 | 420k | size_t TabletMeta::version_count_cross_with_range(const Version& range) const { |
1091 | 420k | size_t count = 0; |
1092 | 718k | for (const auto& [_, rs_meta] : _rs_metas) { |
1093 | 718k | if (!(range.first > rs_meta->version().second || range.second < rs_meta->version().first)) { |
1094 | 718k | count++; |
1095 | 718k | } |
1096 | 718k | } |
1097 | 420k | return count; |
1098 | 420k | } |
1099 | | |
1100 | 9.25k | Status TabletMeta::add_rs_meta(const RowsetMetaSharedPtr& rs_meta) { |
1101 | | // check RowsetMeta is valid |
1102 | 504k | for (const auto& [_, rs] : _rs_metas) { |
1103 | 504k | if (rs->version() == rs_meta->version()) { |
1104 | 0 | if (rs->rowset_id() != rs_meta->rowset_id()) { |
1105 | 0 | return Status::Error<PUSH_VERSION_ALREADY_EXIST>( |
1106 | 0 | "version already exist. rowset_id={}, version={}, tablet={}", |
1107 | 0 | rs->rowset_id().to_string(), rs->version().to_string(), tablet_id()); |
1108 | 0 | } else { |
1109 | | // rowsetid,version is equal, it is a duplicate req, skip it |
1110 | 0 | return Status::OK(); |
1111 | 0 | } |
1112 | 0 | } |
1113 | 504k | } |
1114 | 9.25k | _rs_metas.emplace(rs_meta->version(), rs_meta); |
1115 | 9.25k | return Status::OK(); |
1116 | 9.25k | } |
1117 | | |
1118 | 3 | Status TabletMeta::add_row_binlog_rs_meta(const RowsetMetaSharedPtr& row_binlog_meta) { |
1119 | | // check RowsetMeta is valid |
1120 | 3 | for (auto& [_, rs] : _row_binlog_rs_metas) { |
1121 | 0 | if (rs->version() == row_binlog_meta->version()) { |
1122 | 0 | if (rs->rowset_id() != row_binlog_meta->rowset_id()) { |
1123 | 0 | return Status::Error<PUSH_VERSION_ALREADY_EXIST>( |
1124 | 0 | "binlog version already exist. binlog_rowset_id={}, version={}, tablet={}", |
1125 | 0 | rs->rowset_id().to_string(), rs->version().to_string(), tablet_id()); |
1126 | 0 | } else { |
1127 | | // rowsetid,version is equal, it is a duplicate req, skip it |
1128 | 0 | return Status::OK(); |
1129 | 0 | } |
1130 | 0 | } |
1131 | 0 | } |
1132 | 3 | _row_binlog_rs_metas.emplace(row_binlog_meta->version(), row_binlog_meta); |
1133 | 3 | return Status::OK(); |
1134 | 3 | } |
1135 | | |
1136 | 305k | void TabletMeta::add_rowsets_unchecked(const std::vector<RowsetSharedPtr>& to_add) { |
1137 | 318k | for (const auto& rs : to_add) { |
1138 | 318k | _rs_metas.emplace(rs->rowset_meta()->version(), rs->rowset_meta()); |
1139 | 318k | } |
1140 | 305k | } |
1141 | | |
1142 | | void TabletMeta::delete_rs_meta_by_version(const Version& version, |
1143 | 870 | std::vector<RowsetMetaSharedPtr>* deleted_rs_metas) { |
1144 | 870 | size_t rowset_cache_version_size = 0; |
1145 | 870 | if (auto it = _rs_metas.find(version); it != _rs_metas.end()) { |
1146 | 870 | if (deleted_rs_metas != nullptr) { |
1147 | 0 | deleted_rs_metas->push_back(it->second); |
1148 | 0 | } |
1149 | 870 | auto rowset_id = it->second->rowset_id(); |
1150 | 870 | _rs_metas.erase(it); |
1151 | 870 | if (_enable_unique_key_merge_on_write) { |
1152 | 47 | rowset_cache_version_size = _delete_bitmap->remove_rowset_cache_version(rowset_id); |
1153 | 47 | } |
1154 | 870 | return; |
1155 | 870 | } |
1156 | 0 | _check_mow_rowset_cache_version_size(rowset_cache_version_size); |
1157 | 0 | } |
1158 | | |
1159 | | void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add, |
1160 | | const std::vector<RowsetMetaSharedPtr>& to_delete, |
1161 | 6.54k | bool same_version) { |
1162 | 6.54k | size_t rowset_cache_version_size = 0; |
1163 | | // Remove to_delete rowsets from _rs_metas |
1164 | 55.4k | for (auto rs_to_del : to_delete) { |
1165 | 55.4k | if (auto it = _rs_metas.find(rs_to_del->version()); it != _rs_metas.end()) { |
1166 | 55.4k | auto rowset_id = it->second->rowset_id(); |
1167 | 55.4k | _rs_metas.erase(it); |
1168 | 55.4k | if (_enable_unique_key_merge_on_write) { |
1169 | 31.0k | rowset_cache_version_size = _delete_bitmap->remove_rowset_cache_version(rowset_id); |
1170 | 31.0k | } |
1171 | 55.4k | } |
1172 | 55.4k | } |
1173 | 6.54k | if (!same_version) { |
1174 | | // put to_delete rowsets in _stale_rs_metas. |
1175 | 55.4k | for (auto rs_to_del : to_delete) { |
1176 | 55.4k | _stale_rs_metas.emplace(rs_to_del->version(), rs_to_del); |
1177 | 55.4k | } |
1178 | 6.51k | } |
1179 | | |
1180 | | // put to_add rowsets in _rs_metas. |
1181 | 6.54k | for (auto rs_to_add : to_add) { |
1182 | 36 | _rs_metas.emplace(rs_to_add->version(), rs_to_add); |
1183 | 36 | } |
1184 | 6.54k | _check_mow_rowset_cache_version_size(rowset_cache_version_size); |
1185 | 6.54k | } |
1186 | | |
1187 | | void TabletMeta::modify_row_binlog_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add, |
1188 | 0 | const std::vector<RowsetMetaSharedPtr>& to_delete) { |
1189 | 0 | for (const auto& rs_to_del : to_delete) { |
1190 | 0 | _row_binlog_rs_metas.erase(rs_to_del->version()); |
1191 | 0 | } |
1192 | |
|
1193 | 0 | for (const auto& rs_to_add : to_add) { |
1194 | 0 | _row_binlog_rs_metas.emplace(rs_to_add->version(), rs_to_add); |
1195 | 0 | } |
1196 | 0 | } |
1197 | | |
1198 | | // Use the passing "rs_metas" to replace the rs meta in this tablet meta |
1199 | | // Also clear the _stale_rs_metas because this tablet meta maybe copyied from |
1200 | | // an existing tablet before. Add after revise, only the passing "rs_metas" |
1201 | | // is needed. |
1202 | 5 | void TabletMeta::revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas) { |
1203 | 5 | { |
1204 | 5 | std::lock_guard<std::shared_mutex> wrlock(_meta_lock); |
1205 | 5 | _rs_metas.clear(); |
1206 | 10 | for (auto& rs_meta : rs_metas) { |
1207 | 10 | _rs_metas.emplace(rs_meta->version(), rs_meta); |
1208 | 10 | } |
1209 | 5 | _stale_rs_metas.clear(); |
1210 | 5 | } |
1211 | 5 | if (_enable_unique_key_merge_on_write) { |
1212 | 0 | _delete_bitmap->clear_rowset_cache_version(); |
1213 | 0 | } |
1214 | 5 | } |
1215 | | |
1216 | 0 | void TabletMeta::revise_row_binlog_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas) { |
1217 | 0 | std::lock_guard<std::shared_mutex> wrlock(_meta_lock); |
1218 | 0 | _row_binlog_rs_metas.clear(); |
1219 | 0 | for (auto& rs_meta : rs_metas) { |
1220 | 0 | _row_binlog_rs_metas.emplace(rs_meta->version(), rs_meta); |
1221 | 0 | } |
1222 | 0 | } |
1223 | | |
1224 | | // This method should call after revise_rs_metas, since new rs_metas might be a subset |
1225 | | // of original tablet, we should revise the delete_bitmap according to current rowset. |
1226 | | // |
1227 | | // Delete bitmap is protected by Tablet::_meta_lock, we don't need to acquire the |
1228 | | // TabletMeta's _meta_lock |
1229 | 1 | void TabletMeta::revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap) { |
1230 | 1 | _delete_bitmap = std::make_unique<DeleteBitmap>(tablet_id()); |
1231 | 2 | for (const auto& [_, rs] : _rs_metas) { |
1232 | 2 | DeleteBitmap rs_bm(tablet_id()); |
1233 | 2 | delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX}, |
1234 | 2 | &rs_bm); |
1235 | 2 | _delete_bitmap->merge(rs_bm); |
1236 | 2 | } |
1237 | 1 | for (const auto& [_, rs] : _stale_rs_metas) { |
1238 | 0 | DeleteBitmap rs_bm(tablet_id()); |
1239 | 0 | delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX}, |
1240 | 0 | &rs_bm); |
1241 | 0 | _delete_bitmap->merge(rs_bm); |
1242 | 0 | } |
1243 | 1 | } |
1244 | | |
1245 | 0 | void TabletMeta::revise_binlog_delvec_unlocked(const DeleteBitmap& binlog_delvec) { |
1246 | 0 | _binlog_delvec = std::make_unique<DeleteBitmap>(tablet_id()); |
1247 | 0 | for (const auto& [_, rs] : _row_binlog_rs_metas) { |
1248 | 0 | DeleteBitmap rs_bm(tablet_id()); |
1249 | 0 | binlog_delvec.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX}, |
1250 | 0 | &rs_bm); |
1251 | 0 | _binlog_delvec->merge(rs_bm); |
1252 | 0 | } |
1253 | 0 | } |
1254 | | |
1255 | 54.6k | void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) { |
1256 | 54.6k | _stale_rs_metas.erase(version); |
1257 | 54.6k | } |
1258 | | |
1259 | 0 | RowsetMetaSharedPtr TabletMeta::acquire_rs_meta_by_version(const Version& version) const { |
1260 | 0 | if (auto it = _rs_metas.find(version); it != _rs_metas.end()) { |
1261 | 0 | return it->second; |
1262 | 0 | } |
1263 | 0 | return nullptr; |
1264 | 0 | } |
1265 | | |
1266 | 2.68k | RowsetMetaSharedPtr TabletMeta::acquire_stale_rs_meta_by_version(const Version& version) const { |
1267 | 2.68k | if (auto it = _stale_rs_metas.find(version); it != _stale_rs_metas.end()) { |
1268 | 2.67k | return it->second; |
1269 | 2.67k | } |
1270 | 8 | return nullptr; |
1271 | 2.68k | } |
1272 | | |
1273 | | RowsetMetaSharedPtr TabletMeta::acquire_row_binlog_rs_meta_by_version( |
1274 | 0 | const Version& version) const { |
1275 | 0 | if (auto it = _row_binlog_rs_metas.find(version); it != _row_binlog_rs_metas.end()) { |
1276 | 0 | return it->second; |
1277 | 0 | } |
1278 | 0 | return nullptr; |
1279 | 0 | } |
1280 | | |
1281 | 21 | Status TabletMeta::set_partition_id(int64_t partition_id) { |
1282 | 21 | if ((_partition_id > 0 && _partition_id != partition_id) || partition_id < 1) { |
1283 | 0 | LOG(WARNING) << "cur partition id=" << _partition_id << " new partition id=" << partition_id |
1284 | 0 | << " not equal"; |
1285 | 0 | } |
1286 | 21 | _partition_id = partition_id; |
1287 | 21 | return Status::OK(); |
1288 | 21 | } |
1289 | | |
1290 | 0 | void TabletMeta::clear_stale_rowset() { |
1291 | 0 | _stale_rs_metas.clear(); |
1292 | 0 | if (_enable_unique_key_merge_on_write) { |
1293 | 0 | _delete_bitmap->clear_rowset_cache_version(); |
1294 | 0 | } |
1295 | 0 | } |
1296 | | |
1297 | 0 | void TabletMeta::clear_rowsets() { |
1298 | 0 | _rs_metas.clear(); |
1299 | 0 | if (_enable_unique_key_merge_on_write) { |
1300 | 0 | _delete_bitmap->clear_rowset_cache_version(); |
1301 | 0 | } |
1302 | 0 | } |
1303 | | |
1304 | 36.4k | void TabletMeta::_check_mow_rowset_cache_version_size(size_t rowset_cache_version_size) { |
1305 | 36.4k | if (_enable_unique_key_merge_on_write && config::enable_mow_verbose_log && |
1306 | 36.4k | rowset_cache_version_size > _rs_metas.size() + _stale_rs_metas.size()) { |
1307 | 0 | std::stringstream ss; |
1308 | 0 | auto rowset_ids = _delete_bitmap->get_rowset_cache_version(); |
1309 | 0 | std::set<std::string> tablet_rowset_ids; |
1310 | 0 | { |
1311 | 0 | std::shared_lock rlock(_meta_lock); |
1312 | 0 | for (const auto& [_, rs_meta] : _rs_metas) { |
1313 | 0 | tablet_rowset_ids.emplace(rs_meta->rowset_id().to_string()); |
1314 | 0 | } |
1315 | 0 | for (const auto& [_, rs_meta] : _stale_rs_metas) { |
1316 | 0 | tablet_rowset_ids.emplace(rs_meta->rowset_id().to_string()); |
1317 | 0 | } |
1318 | 0 | } |
1319 | 0 | for (const auto& rowset_id : rowset_ids) { |
1320 | 0 | if (tablet_rowset_ids.find(rowset_id) == tablet_rowset_ids.end()) { |
1321 | 0 | ss << rowset_id << ", "; |
1322 | 0 | } |
1323 | 0 | } |
1324 | | // size(rowset_cache_version) <= size(_rs_metas) + size(_stale_rs_metas) + size(_unused_rs) |
1325 | 0 | std::string msg = fmt::format( |
1326 | 0 | "tablet: {}, rowset_cache_version size: {}, " |
1327 | 0 | "_rs_metas size: {}, _stale_rs_metas size: {}, delta: {}. rowset only in cache: {}", |
1328 | 0 | _tablet_id, rowset_cache_version_size, _rs_metas.size(), _stale_rs_metas.size(), |
1329 | 0 | rowset_cache_version_size - _rs_metas.size() - _stale_rs_metas.size(), ss.str()); |
1330 | 0 | LOG(INFO) << msg; |
1331 | 0 | } |
1332 | 36.4k | } |
1333 | | |
1334 | 3 | bool operator==(const TabletMeta& a, const TabletMeta& b) { |
1335 | 3 | if (a._table_id != b._table_id) return false; |
1336 | 3 | if (a._index_id != b._index_id) return false; |
1337 | 3 | if (a._partition_id != b._partition_id) return false; |
1338 | 3 | if (a._tablet_id != b._tablet_id) return false; |
1339 | 3 | if (a._replica_id != b._replica_id) return false; |
1340 | 3 | if (a._schema_hash != b._schema_hash) return false; |
1341 | 3 | if (a._shard_id != b._shard_id) return false; |
1342 | 3 | if (a._creation_time != b._creation_time) return false; |
1343 | 3 | if (a._cumulative_layer_point != b._cumulative_layer_point) return false; |
1344 | 3 | if (a._tablet_uid != b._tablet_uid) return false; |
1345 | 3 | if (a._tablet_type != b._tablet_type) return false; |
1346 | 3 | if (a._tablet_state != b._tablet_state) return false; |
1347 | 3 | if (*a._schema != *b._schema) return false; |
1348 | 3 | if (a._rs_metas != b._rs_metas) return false; |
1349 | 3 | if (a._in_restore_mode != b._in_restore_mode) return false; |
1350 | 3 | if (a._preferred_rowset_type != b._preferred_rowset_type) return false; |
1351 | 3 | if (a._storage_policy_id != b._storage_policy_id) return false; |
1352 | 3 | if (a._compaction_policy != b._compaction_policy) return false; |
1353 | 3 | if (a._time_series_compaction_goal_size_mbytes != b._time_series_compaction_goal_size_mbytes) |
1354 | 0 | return false; |
1355 | 3 | if (a._time_series_compaction_file_count_threshold != |
1356 | 3 | b._time_series_compaction_file_count_threshold) |
1357 | 0 | return false; |
1358 | 3 | if (a._time_series_compaction_time_threshold_seconds != |
1359 | 3 | b._time_series_compaction_time_threshold_seconds) |
1360 | 0 | return false; |
1361 | 3 | if (a._time_series_compaction_empty_rowsets_threshold != |
1362 | 3 | b._time_series_compaction_empty_rowsets_threshold) |
1363 | 0 | return false; |
1364 | 3 | if (a._time_series_compaction_level_threshold != b._time_series_compaction_level_threshold) |
1365 | 0 | return false; |
1366 | 3 | return true; |
1367 | 3 | } |
1368 | | |
1369 | 0 | bool operator!=(const TabletMeta& a, const TabletMeta& b) { |
1370 | 0 | return !(a == b); |
1371 | 0 | } |
1372 | | |
1373 | | // We cannot just copy the underlying memory to construct a string |
1374 | | // due to equivalent objects may have different padding bytes. |
1375 | | // Reading padding bytes is undefined behavior, neither copy nor |
1376 | | // placement new will help simplify the code. |
1377 | | // Refer to C11 standards §6.2.6.1/6 and §6.7.9/21 for more info. |
1378 | 5.42M | static std::string agg_cache_key(int64_t tablet_id, const DeleteBitmap::BitmapKey& bmk) { |
1379 | 5.42M | std::string ret(sizeof(tablet_id) + sizeof(bmk), '\0'); |
1380 | 5.42M | *reinterpret_cast<int64_t*>(ret.data()) = tablet_id; |
1381 | 5.42M | auto t = reinterpret_cast<DeleteBitmap::BitmapKey*>(ret.data() + sizeof(tablet_id)); |
1382 | 5.42M | std::get<RowsetId>(*t).version = std::get<RowsetId>(bmk).version; |
1383 | 5.42M | std::get<RowsetId>(*t).hi = std::get<RowsetId>(bmk).hi; |
1384 | 5.42M | std::get<RowsetId>(*t).mi = std::get<RowsetId>(bmk).mi; |
1385 | 5.42M | std::get<RowsetId>(*t).lo = std::get<RowsetId>(bmk).lo; |
1386 | 5.42M | std::get<1>(*t) = std::get<1>(bmk); |
1387 | 5.42M | std::get<2>(*t) = std::get<2>(bmk); |
1388 | 5.42M | return ret; |
1389 | 5.42M | } |
1390 | | |
1391 | | // decode cache key info from a agg_cache_key |
1392 | | static void decode_agg_cache_key(const std::string& key_str, int64_t& tablet_id, |
1393 | 45.2k | DeleteBitmap::BitmapKey& bmk) { |
1394 | 45.2k | const char* ptr = key_str.data(); |
1395 | 45.2k | tablet_id = *reinterpret_cast<const int64_t*>(ptr); |
1396 | 45.2k | ptr += sizeof(tablet_id); |
1397 | 45.2k | const auto* t = reinterpret_cast<const DeleteBitmap::BitmapKey*>(ptr); |
1398 | 45.2k | std::get<RowsetId>(bmk).version = std::get<RowsetId>(*t).version; |
1399 | 45.2k | std::get<RowsetId>(bmk).hi = std::get<RowsetId>(*t).hi; |
1400 | 45.2k | std::get<RowsetId>(bmk).mi = std::get<RowsetId>(*t).mi; |
1401 | 45.2k | std::get<RowsetId>(bmk).lo = std::get<RowsetId>(*t).lo; |
1402 | 45.2k | std::get<1>(bmk) = std::get<1>(*t); |
1403 | 45.2k | std::get<2>(bmk) = std::get<2>(*t); |
1404 | 45.2k | } |
1405 | | |
1406 | | DeleteBitmapAggCache::DeleteBitmapAggCache(size_t capacity) |
1407 | 5 | : LRUCachePolicy(CachePolicy::CacheType::DELETE_BITMAP_AGG_CACHE, capacity, |
1408 | 5 | LRUCacheType::SIZE, config::delete_bitmap_agg_cache_stale_sweep_time_sec, |
1409 | 5 | /*num_shards*/ 256, |
1410 | 5 | /*element_count_capacity*/ 0, /*enable_prune*/ true, |
1411 | 5 | /*is_lru_k*/ false) {} |
1412 | | |
1413 | 10.3M | DeleteBitmapAggCache* DeleteBitmapAggCache::instance() { |
1414 | 10.3M | return ExecEnv::GetInstance()->delete_bitmap_agg_cache(); |
1415 | 10.3M | } |
1416 | | |
1417 | 5 | DeleteBitmapAggCache* DeleteBitmapAggCache::create_instance(size_t capacity) { |
1418 | 5 | return new DeleteBitmapAggCache(capacity); |
1419 | 5 | } |
1420 | | |
1421 | 2 | DeleteBitmap DeleteBitmapAggCache::snapshot(int64_t tablet_id) { |
1422 | 2 | DeleteBitmap ret(tablet_id); |
1423 | 45.2k | auto collector = [&](const LRUHandle* handle) { |
1424 | 45.2k | auto key = handle->key().to_string(); |
1425 | 45.2k | int64_t key_tablet_id; |
1426 | 45.2k | DeleteBitmap::BitmapKey bmk; |
1427 | 45.2k | decode_agg_cache_key(key, key_tablet_id, bmk); |
1428 | 45.2k | if (key_tablet_id == tablet_id) { |
1429 | 16 | const auto& dbm = reinterpret_cast<DeleteBitmapAggCache::Value*>(handle->value)->bitmap; |
1430 | 16 | ret.set(bmk, dbm); |
1431 | 16 | } |
1432 | 45.2k | }; |
1433 | 2 | DeleteBitmapAggCache::instance()->for_each_entry(collector); |
1434 | 2 | return ret; |
1435 | 2 | } |
1436 | | |
1437 | 1.16M | DeleteBitmap::DeleteBitmap(int64_t tablet_id) : _tablet_id(tablet_id) {} |
1438 | | |
1439 | 3.92k | DeleteBitmap::DeleteBitmap(const DeleteBitmap& o) { |
1440 | 3.92k | std::shared_lock l1(o.lock); |
1441 | 3.92k | delete_bitmap = o.delete_bitmap; |
1442 | 3.92k | _tablet_id = o._tablet_id; |
1443 | 3.92k | } |
1444 | | |
1445 | 26.4k | DeleteBitmap& DeleteBitmap::operator=(const DeleteBitmap& o) { |
1446 | 26.4k | if (this == &o) return *this; |
1447 | 25.2k | if (this < &o) { |
1448 | 25.2k | std::unique_lock l1(lock); |
1449 | 25.2k | std::shared_lock l2(o.lock); |
1450 | 25.2k | delete_bitmap = o.delete_bitmap; |
1451 | 25.2k | _tablet_id = o._tablet_id; |
1452 | 25.2k | } else { |
1453 | 16 | std::shared_lock l2(o.lock); |
1454 | 16 | std::unique_lock l1(lock); |
1455 | 16 | delete_bitmap = o.delete_bitmap; |
1456 | 16 | _tablet_id = o._tablet_id; |
1457 | 16 | } |
1458 | 25.2k | return *this; |
1459 | 26.4k | } |
1460 | | |
1461 | 0 | DeleteBitmap::DeleteBitmap(DeleteBitmap&& o) noexcept { |
1462 | 0 | std::scoped_lock l(o.lock, o._rowset_cache_version_lock); |
1463 | 0 | delete_bitmap = std::move(o.delete_bitmap); |
1464 | 0 | _tablet_id = std::move(o._tablet_id); |
1465 | 0 | o._rowset_cache_version.clear(); |
1466 | 0 | } |
1467 | | |
1468 | 0 | DeleteBitmap& DeleteBitmap::operator=(DeleteBitmap&& o) noexcept { |
1469 | 0 | if (this == &o) return *this; |
1470 | 0 | std::scoped_lock l(lock, o.lock, o._rowset_cache_version_lock); |
1471 | 0 | delete_bitmap = std::move(o.delete_bitmap); |
1472 | 0 | _tablet_id = std::move(o._tablet_id); |
1473 | 0 | o._rowset_cache_version.clear(); |
1474 | 0 | return *this; |
1475 | 0 | } |
1476 | | |
1477 | 0 | DeleteBitmap DeleteBitmap::from_pb(const DeleteBitmapPB& pb, int64_t tablet_id) { |
1478 | 0 | size_t len = pb.rowset_ids().size(); |
1479 | 0 | DCHECK_EQ(len, pb.segment_ids().size()); |
1480 | 0 | DCHECK_EQ(len, pb.versions().size()); |
1481 | 0 | DeleteBitmap delete_bitmap(tablet_id); |
1482 | 0 | for (int32_t i = 0; i < len; ++i) { |
1483 | 0 | RowsetId rs_id; |
1484 | 0 | rs_id.init(pb.rowset_ids(i)); |
1485 | 0 | BitmapKey key = {rs_id, pb.segment_ids(i), pb.versions(i)}; |
1486 | 0 | delete_bitmap.delete_bitmap[key] = |
1487 | 0 | roaring::Roaring::read(pb.segment_delete_bitmaps(i).data()); |
1488 | 0 | } |
1489 | 0 | return delete_bitmap; |
1490 | 0 | } |
1491 | | |
1492 | 0 | DeleteBitmapPB DeleteBitmap::to_pb() { |
1493 | 0 | std::shared_lock l(lock); |
1494 | 0 | DeleteBitmapPB ret; |
1495 | 0 | for (const auto& [k, v] : delete_bitmap) { |
1496 | 0 | ret.mutable_rowset_ids()->Add(std::get<0>(k).to_string()); |
1497 | 0 | ret.mutable_segment_ids()->Add(std::get<1>(k)); |
1498 | 0 | ret.mutable_versions()->Add(std::get<2>(k)); |
1499 | 0 | std::string bitmap_data(v.getSizeInBytes(), '\0'); |
1500 | 0 | v.write(bitmap_data.data()); |
1501 | 0 | ret.mutable_segment_delete_bitmaps()->Add(std::move(bitmap_data)); |
1502 | 0 | } |
1503 | 0 | return ret; |
1504 | 0 | } |
1505 | | |
1506 | 447 | DeleteBitmap DeleteBitmap::snapshot() const { |
1507 | 447 | std::shared_lock l(lock); |
1508 | 447 | return DeleteBitmap(*this); |
1509 | 447 | } |
1510 | | |
1511 | 3 | DeleteBitmap DeleteBitmap::snapshot(Version version) const { |
1512 | | // Take snapshot first, then remove keys greater than given version. |
1513 | 3 | DeleteBitmap snapshot = this->snapshot(); |
1514 | 3 | auto it = snapshot.delete_bitmap.begin(); |
1515 | 412 | while (it != snapshot.delete_bitmap.end()) { |
1516 | 409 | if (std::get<2>(it->first) > version) { |
1517 | 4 | it = snapshot.delete_bitmap.erase(it); |
1518 | 405 | } else { |
1519 | 405 | it++; |
1520 | 405 | } |
1521 | 409 | } |
1522 | 3 | return snapshot; |
1523 | 3 | } |
1524 | | |
1525 | 5.05M | void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) { |
1526 | 5.05M | std::lock_guard l(lock); |
1527 | 5.05M | delete_bitmap[bmk].add(row_id); |
1528 | 5.05M | } |
1529 | | |
1530 | 0 | int DeleteBitmap::remove(const BitmapKey& bmk, uint32_t row_id) { |
1531 | 0 | std::lock_guard l(lock); |
1532 | 0 | auto it = delete_bitmap.find(bmk); |
1533 | 0 | if (it == delete_bitmap.end()) return -1; |
1534 | 0 | it->second.remove(row_id); |
1535 | 0 | return 0; |
1536 | 0 | } |
1537 | | |
1538 | 66.4k | void DeleteBitmap::remove(const BitmapKey& start, const BitmapKey& end) { |
1539 | 66.4k | std::lock_guard l(lock); |
1540 | 84.1k | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) { |
1541 | 53.0k | auto& [k, _] = *it; |
1542 | 53.0k | if (k >= end) { |
1543 | 35.4k | break; |
1544 | 35.4k | } |
1545 | 17.6k | it = delete_bitmap.erase(it); |
1546 | 17.6k | } |
1547 | 66.4k | } |
1548 | | |
1549 | 884 | void DeleteBitmap::remove(const std::vector<std::tuple<BitmapKey, BitmapKey>>& key_ranges) { |
1550 | 884 | std::lock_guard l(lock); |
1551 | 884 | for (auto& [start, end] : key_ranges) { |
1552 | 1.80k | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) { |
1553 | 1.80k | auto& [k, _] = *it; |
1554 | 1.80k | if (k >= end) { |
1555 | 884 | break; |
1556 | 884 | } |
1557 | 924 | it = delete_bitmap.erase(it); |
1558 | 924 | } |
1559 | 884 | } |
1560 | 884 | } |
1561 | | |
1562 | 4.18M | bool DeleteBitmap::contains(const BitmapKey& bmk, uint32_t row_id) const { |
1563 | 4.18M | std::shared_lock l(lock); |
1564 | 4.18M | auto it = delete_bitmap.find(bmk); |
1565 | 4.18M | return it != delete_bitmap.end() && it->second.contains(row_id); |
1566 | 4.18M | } |
1567 | | |
1568 | 0 | bool DeleteBitmap::contain_rowsets(const RowsetIdUnorderedSet& rowset_ids) const { |
1569 | 0 | std::shared_lock l(lock); |
1570 | 0 | return std::any_of(delete_bitmap.begin(), delete_bitmap.end(), [&](const auto& entry) { |
1571 | 0 | return rowset_ids.contains(std::get<0>(entry.first)); |
1572 | 0 | }); |
1573 | 0 | } |
1574 | | |
1575 | 2 | bool DeleteBitmap::contains_agg(const BitmapKey& bmk, uint32_t row_id) const { |
1576 | 2 | return get_agg(bmk)->contains(row_id); |
1577 | 2 | } |
1578 | | |
1579 | 0 | bool DeleteBitmap::empty() const { |
1580 | 0 | std::shared_lock l(lock); |
1581 | 0 | return delete_bitmap.empty(); |
1582 | 0 | } |
1583 | | |
1584 | 284k | uint64_t DeleteBitmap::cardinality() const { |
1585 | 284k | std::shared_lock l(lock); |
1586 | 284k | uint64_t res = 0; |
1587 | 1.40M | for (auto entry : delete_bitmap) { |
1588 | 1.40M | if (std::get<1>(entry.first) != DeleteBitmap::INVALID_SEGMENT_ID) { |
1589 | 52.7k | res += entry.second.cardinality(); |
1590 | 52.7k | } |
1591 | 1.40M | } |
1592 | 284k | return res; |
1593 | 284k | } |
1594 | | |
1595 | 6 | uint64_t DeleteBitmap::get_size() const { |
1596 | 6 | std::shared_lock l(lock); |
1597 | 6 | uint64_t charge = 0; |
1598 | 44 | for (auto& [k, v] : delete_bitmap) { |
1599 | 44 | if (std::get<1>(k) != DeleteBitmap::INVALID_SEGMENT_ID) { |
1600 | 44 | charge += v.getSizeInBytes(); |
1601 | 44 | } |
1602 | 44 | } |
1603 | 6 | return charge; |
1604 | 6 | } |
1605 | | |
1606 | | bool DeleteBitmap::contains_agg_with_cache_if_eligible(const BitmapKey& bmk, |
1607 | 4.18M | uint32_t row_id) const { |
1608 | 4.18M | g_contains_agg_with_cache_if_eligible_total << 1; |
1609 | 4.18M | int64_t start_version {0}; |
1610 | 4.20M | if (config::enable_mow_get_agg_by_cache) { |
1611 | 4.20M | auto deleter = [&](Cache::Handle* handle) { |
1612 | 1.25M | DeleteBitmapAggCache::instance()->release(handle); |
1613 | 1.25M | }; |
1614 | 4.20M | std::unique_ptr<Cache::Handle, decltype(deleter)> dbm_handle(nullptr, deleter); |
1615 | 4.20M | int64_t cached_version = 0; |
1616 | | // 1. try to lookup the desired key directly |
1617 | 4.20M | dbm_handle.reset(DeleteBitmapAggCache::instance()->lookup(agg_cache_key(_tablet_id, bmk))); |
1618 | 4.20M | if (dbm_handle != nullptr) { |
1619 | 1.25M | cached_version = std::get<2>(bmk); |
1620 | 2.95M | } else { |
1621 | | // 2. if not found, try to lookup with cached version |
1622 | 2.95M | cached_version = _get_rowset_cache_version(bmk); |
1623 | 2.95M | if (cached_version > 0) { |
1624 | 27.5k | if (cached_version > std::get<2>(bmk)) { |
1625 | 27.2k | cached_version = 0; |
1626 | 27.2k | } else { |
1627 | 254 | dbm_handle.reset(DeleteBitmapAggCache::instance()->lookup(agg_cache_key( |
1628 | 254 | _tablet_id, {std::get<0>(bmk), std::get<1>(bmk), cached_version}))); |
1629 | 254 | } |
1630 | 27.5k | } |
1631 | 2.95M | } |
1632 | 4.20M | if (dbm_handle != nullptr) { |
1633 | 1.25M | const auto& cached_dbm = |
1634 | 1.25M | reinterpret_cast<DeleteBitmapAggCache::Value*>( |
1635 | 1.25M | DeleteBitmapAggCache::instance()->value(dbm_handle.get())) |
1636 | 1.25M | ->bitmap; |
1637 | 1.25M | if (cached_version == std::get<2>(bmk)) { |
1638 | 1.24M | g_contains_agg_with_cache_if_eligible_full_hit << 1; |
1639 | 1.24M | } else { |
1640 | 2.92k | g_contains_agg_with_cache_if_eligible_partial_hit << 1; |
1641 | 2.92k | } |
1642 | 1.25M | if (cached_dbm.contains(row_id)) { |
1643 | 354 | return true; |
1644 | 354 | } |
1645 | 1.25M | if (cached_version == std::get<2>(bmk)) { |
1646 | 1.25M | return false; |
1647 | 1.25M | } |
1648 | 1.32k | start_version = cached_version + 1; |
1649 | 1.32k | } |
1650 | 4.20M | } |
1651 | 2.93M | DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), start_version}; |
1652 | 2.93M | std::shared_lock l(lock); |
1653 | 2.93M | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1654 | 1.29M | auto& [k, bm] = *it; |
1655 | 1.29M | if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) || |
1656 | 1.29M | std::get<2>(k) > std::get<2>(bmk)) { |
1657 | 1.28M | break; |
1658 | 1.28M | } |
1659 | 1.57k | if (bm.contains(row_id)) { |
1660 | 21 | return true; |
1661 | 21 | } |
1662 | 1.57k | } |
1663 | 2.93M | return false; |
1664 | 2.93M | } |
1665 | | |
1666 | 13 | void DeleteBitmap::remove_sentinel_marks() { |
1667 | 13 | std::lock_guard l(lock); |
1668 | 142 | for (auto it = delete_bitmap.begin(), end = delete_bitmap.end(); it != end;) { |
1669 | 129 | if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) { |
1670 | 125 | it = delete_bitmap.erase(it); |
1671 | 125 | } else { |
1672 | 4 | ++it; |
1673 | 4 | } |
1674 | 129 | } |
1675 | 13 | } |
1676 | | |
1677 | 8.30k | int DeleteBitmap::set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) { |
1678 | 8.30k | std::lock_guard l(lock); |
1679 | 8.30k | auto [_, inserted] = delete_bitmap.insert_or_assign(bmk, segment_delete_bitmap); |
1680 | 8.30k | return inserted; |
1681 | 8.30k | } |
1682 | | |
1683 | 7 | int DeleteBitmap::get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const { |
1684 | 7 | std::shared_lock l(lock); |
1685 | 7 | auto it = delete_bitmap.find(bmk); |
1686 | 7 | if (it == delete_bitmap.end()) return -1; |
1687 | 7 | *segment_delete_bitmap = it->second; // copy |
1688 | 7 | return 0; |
1689 | 7 | } |
1690 | | |
1691 | 54 | const roaring::Roaring* DeleteBitmap::get(const BitmapKey& bmk) const { |
1692 | 54 | std::shared_lock l(lock); |
1693 | 54 | auto it = delete_bitmap.find(bmk); |
1694 | 54 | if (it == delete_bitmap.end()) return nullptr; |
1695 | 41 | return &(it->second); // get address |
1696 | 54 | } |
1697 | | |
1698 | | void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end, |
1699 | 27.6k | DeleteBitmap* subset_rowset_map) const { |
1700 | 27.6k | DCHECK(start < end); |
1701 | 27.6k | std::shared_lock l(lock); |
1702 | 34.0k | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1703 | 15.7k | auto& [k, bm] = *it; |
1704 | 15.7k | if (k >= end) { |
1705 | 9.34k | break; |
1706 | 9.34k | } |
1707 | 6.38k | subset_rowset_map->set(k, bm); |
1708 | 6.38k | } |
1709 | 27.6k | } |
1710 | | |
1711 | | void DeleteBitmap::subset(std::vector<std::pair<RowsetId, int64_t>>& rowset_ids, |
1712 | | int64_t start_version, int64_t end_version, |
1713 | 0 | DeleteBitmap* subset_delete_map) const { |
1714 | 0 | DCHECK(start_version <= end_version); |
1715 | 0 | for (auto& [rowset_id, _] : rowset_ids) { |
1716 | 0 | BitmapKey start {rowset_id, 0, 0}; |
1717 | 0 | BitmapKey end {rowset_id, UINT32_MAX, end_version + 1}; |
1718 | 0 | std::shared_lock l(lock); |
1719 | 0 | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1720 | 0 | auto& [k, bm] = *it; |
1721 | 0 | if (k >= end) { |
1722 | 0 | break; |
1723 | 0 | } |
1724 | 0 | auto version = std::get<2>(k); |
1725 | 0 | if (version >= start_version && version <= end_version) { |
1726 | 0 | subset_delete_map->merge(k, bm); |
1727 | 0 | VLOG_DEBUG << "subset delete bitmap, tablet=" << _tablet_id << ", version=[" |
1728 | 0 | << start_version << ", " << end_version |
1729 | 0 | << "]. rowset=" << std::get<0>(k).to_string() |
1730 | 0 | << ", segment=" << std::get<1>(k) << ", version=" << version |
1731 | 0 | << ", cardinality=" << bm.cardinality(); |
1732 | 0 | } |
1733 | 0 | } |
1734 | 0 | } |
1735 | 0 | } |
1736 | | |
1737 | | void DeleteBitmap::subset_and_agg(std::vector<std::pair<RowsetId, int64_t>>& rowset_ids, |
1738 | | int64_t start_version, int64_t end_version, |
1739 | 1 | DeleteBitmap* subset_delete_map) const { |
1740 | 1 | DCHECK(start_version <= end_version); |
1741 | 2 | for (auto& [rowset_id, segment_num] : rowset_ids) { |
1742 | 6 | for (int64_t seg_id = 0; seg_id < segment_num; ++seg_id) { |
1743 | 4 | BitmapKey end {rowset_id, seg_id, end_version}; |
1744 | 4 | auto bm = get_agg_without_cache(end, start_version); |
1745 | 4 | VLOG_DEBUG << "subset delete bitmap, tablet=" << _tablet_id << ", rowset=" << rowset_id |
1746 | 0 | << ", segment=" << seg_id << ", version=[" << start_version << "-" |
1747 | 0 | << end_version << "], cardinality=" << bm->cardinality(); |
1748 | 4 | if (bm->isEmpty()) { |
1749 | 0 | continue; |
1750 | 0 | } |
1751 | 4 | subset_delete_map->merge(end, *bm); |
1752 | 4 | } |
1753 | 2 | } |
1754 | 1 | } |
1755 | | |
1756 | 260 | size_t DeleteBitmap::get_count_with_range(const BitmapKey& start, const BitmapKey& end) const { |
1757 | 260 | DCHECK(start < end); |
1758 | 260 | size_t count = 0; |
1759 | 260 | std::shared_lock l(lock); |
1760 | 391 | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1761 | 323 | auto& [k, bm] = *it; |
1762 | 323 | if (k >= end) { |
1763 | 192 | break; |
1764 | 192 | } |
1765 | 131 | count++; |
1766 | 131 | } |
1767 | 260 | return count; |
1768 | 260 | } |
1769 | | |
1770 | 17.0k | void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) { |
1771 | 17.0k | std::lock_guard l(lock); |
1772 | 17.0k | auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap); |
1773 | 17.0k | if (!succ) { |
1774 | 0 | iter->second |= segment_delete_bitmap; |
1775 | 0 | } |
1776 | 17.0k | } |
1777 | | |
1778 | 47.3k | void DeleteBitmap::merge(const DeleteBitmap& other) { |
1779 | 47.3k | std::lock_guard l(lock); |
1780 | 47.3k | for (auto& i : other.delete_bitmap) { |
1781 | 2.20k | auto [j, succ] = this->delete_bitmap.insert(i); |
1782 | 2.20k | if (!succ) j->second |= i.second; |
1783 | 2.20k | } |
1784 | 47.3k | } |
1785 | | |
1786 | 683k | uint64_t DeleteBitmap::get_delete_bitmap_count() { |
1787 | 683k | std::shared_lock l(lock); |
1788 | 683k | uint64_t count = 0; |
1789 | 1.28M | for (auto it = delete_bitmap.begin(); it != delete_bitmap.end(); it++) { |
1790 | 604k | if (std::get<1>(it->first) != DeleteBitmap::INVALID_SEGMENT_ID) { |
1791 | 49.7k | count++; |
1792 | 49.7k | } |
1793 | 604k | } |
1794 | 683k | return count; |
1795 | 683k | } |
1796 | | |
1797 | | void DeleteBitmap::traverse_rowset_and_version( |
1798 | 0 | const std::function<int(const RowsetId& rowsetId, int64_t version)>& func) const { |
1799 | 0 | std::shared_lock l(lock); |
1800 | 0 | auto it = delete_bitmap.cbegin(); |
1801 | 0 | while (it != delete_bitmap.cend()) { |
1802 | 0 | RowsetId rowset_id = std::get<0>(it->first); |
1803 | 0 | int64_t version = std::get<2>(it->first); |
1804 | 0 | int result = func(rowset_id, version); |
1805 | 0 | if (result == -2) { |
1806 | | // find next <rowset, version> |
1807 | 0 | it++; |
1808 | 0 | } else { |
1809 | | // find next <rowset> |
1810 | 0 | it = delete_bitmap.upper_bound({rowset_id, std::numeric_limits<SegmentId>::max(), |
1811 | 0 | std::numeric_limits<Version>::max()}); |
1812 | 0 | } |
1813 | 0 | } |
1814 | 0 | } |
1815 | | |
1816 | 0 | bool DeleteBitmap::has_calculated_for_multi_segments(const RowsetId& rowset_id) const { |
1817 | 0 | return contains({rowset_id, INVALID_SEGMENT_ID, TEMP_VERSION_COMMON}, ROWSET_SENTINEL_MARK); |
1818 | 0 | } |
1819 | | |
1820 | 60.9k | size_t DeleteBitmap::remove_rowset_cache_version(const RowsetId& rowset_id) { |
1821 | 60.9k | std::lock_guard l(_rowset_cache_version_lock); |
1822 | 60.9k | _rowset_cache_version.erase(rowset_id); |
1823 | 60.9k | VLOG_DEBUG << "remove agg cache version for tablet=" << _tablet_id |
1824 | 0 | << ", rowset=" << rowset_id.to_string(); |
1825 | 60.9k | return _rowset_cache_version.size(); |
1826 | 60.9k | } |
1827 | | |
1828 | 0 | void DeleteBitmap::clear_rowset_cache_version() { |
1829 | 0 | std::lock_guard l(_rowset_cache_version_lock); |
1830 | 0 | _rowset_cache_version.clear(); |
1831 | 0 | VLOG_DEBUG << "clear agg cache version for tablet=" << _tablet_id; |
1832 | 0 | } |
1833 | | |
1834 | 0 | std::set<std::string> DeleteBitmap::get_rowset_cache_version() { |
1835 | 0 | std::set<std::string> set; |
1836 | 0 | std::shared_lock l(_rowset_cache_version_lock); |
1837 | 0 | for (auto& [k, _] : _rowset_cache_version) { |
1838 | 0 | set.insert(k.to_string()); |
1839 | 0 | } |
1840 | 0 | return set; |
1841 | 0 | } |
1842 | | |
1843 | 3.00M | DeleteBitmap::Version DeleteBitmap::_get_rowset_cache_version(const BitmapKey& bmk) const { |
1844 | 3.00M | std::shared_lock l(_rowset_cache_version_lock); |
1845 | 3.00M | if (auto it = _rowset_cache_version.find(std::get<0>(bmk)); it != _rowset_cache_version.end()) { |
1846 | 39.1k | auto& segment_cache_version = it->second; |
1847 | 39.1k | if (auto it1 = segment_cache_version.find(std::get<1>(bmk)); |
1848 | 39.1k | it1 != segment_cache_version.end()) { |
1849 | 39.1k | return it1->second; |
1850 | 39.1k | } |
1851 | 39.1k | } |
1852 | 2.96M | return 0; |
1853 | 3.00M | } |
1854 | | |
1855 | 2 | DeleteBitmap DeleteBitmap::agg_cache_snapshot() { |
1856 | 2 | return DeleteBitmapAggCache::instance()->snapshot(_tablet_id); |
1857 | 2 | } |
1858 | | |
1859 | 958k | void DeleteBitmap::set_tablet_id(int64_t tablet_id) { |
1860 | 958k | _tablet_id = tablet_id; |
1861 | 958k | } |
1862 | | |
1863 | 1.22M | std::shared_ptr<roaring::Roaring> DeleteBitmap::get_agg(const BitmapKey& bmk) const { |
1864 | 1.22M | std::string key_str = agg_cache_key(_tablet_id, bmk); // Cache key container |
1865 | 1.22M | CacheKey key(key_str); |
1866 | 1.22M | Cache::Handle* handle = DeleteBitmapAggCache::instance()->lookup(key); |
1867 | | |
1868 | 1.22M | DeleteBitmapAggCache::Value* val = |
1869 | 1.22M | handle == nullptr ? nullptr |
1870 | 1.22M | : reinterpret_cast<DeleteBitmapAggCache::Value*>( |
1871 | 1.16M | DeleteBitmapAggCache::instance()->value(handle)); |
1872 | | // FIXME: do we need a mutex here to get rid of duplicated initializations |
1873 | | // of cache entries in some cases? |
1874 | 1.22M | if (val == nullptr) { // Renew if needed, put a new Value to cache |
1875 | 56.8k | val = new DeleteBitmapAggCache::Value(); |
1876 | 56.8k | Version start_version = |
1877 | 56.8k | config::enable_mow_get_agg_by_cache ? _get_rowset_cache_version(bmk) : 0; |
1878 | 56.8k | if (start_version > 0) { |
1879 | 11.5k | Cache::Handle* handle2 = DeleteBitmapAggCache::instance()->lookup( |
1880 | 11.5k | agg_cache_key(_tablet_id, {std::get<0>(bmk), std::get<1>(bmk), start_version})); |
1881 | | |
1882 | 11.5k | DBUG_EXECUTE_IF("DeleteBitmap::get_agg.cache_miss", { |
1883 | 11.5k | if (handle2 != nullptr) { |
1884 | 11.5k | auto p = dp->param("percent", 0.3); |
1885 | 11.5k | std::mt19937 gen {std::random_device {}()}; |
1886 | 11.5k | std::bernoulli_distribution inject_fault {p}; |
1887 | 11.5k | if (inject_fault(gen)) { |
1888 | 11.5k | LOG_INFO("injection DeleteBitmap::get_agg.cache_miss, tablet_id={}", |
1889 | 11.5k | _tablet_id); |
1890 | 11.5k | handle2 = nullptr; |
1891 | 11.5k | } |
1892 | 11.5k | } |
1893 | 11.5k | }); |
1894 | 11.5k | if (handle2 == nullptr || start_version > std::get<2>(bmk)) { |
1895 | 92 | start_version = 0; |
1896 | 11.4k | } else { |
1897 | 11.4k | val->bitmap |= reinterpret_cast<DeleteBitmapAggCache::Value*>( |
1898 | 11.4k | DeleteBitmapAggCache::instance()->value(handle2)) |
1899 | 11.4k | ->bitmap; |
1900 | 11.4k | VLOG_DEBUG << "get agg cache version=" << start_version |
1901 | 14 | << " for tablet=" << _tablet_id |
1902 | 14 | << ", rowset=" << std::get<0>(bmk).to_string() |
1903 | 14 | << ", segment=" << std::get<1>(bmk); |
1904 | 11.4k | start_version += 1; |
1905 | 11.4k | } |
1906 | 11.5k | if (handle2 != nullptr) { |
1907 | 11.5k | DeleteBitmapAggCache::instance()->release(handle2); |
1908 | 11.5k | } |
1909 | 11.5k | } |
1910 | 56.8k | { |
1911 | 56.8k | std::shared_lock l(lock); |
1912 | 56.8k | DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), start_version}; |
1913 | 64.3k | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1914 | 24.1k | auto& [k, bm] = *it; |
1915 | 24.1k | if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) || |
1916 | 24.1k | std::get<2>(k) > std::get<2>(bmk)) { |
1917 | 16.6k | break; |
1918 | 16.6k | } |
1919 | 7.42k | val->bitmap |= bm; |
1920 | 7.42k | } |
1921 | 56.8k | } |
1922 | 56.8k | size_t charge = val->bitmap.getSizeInBytes() + sizeof(DeleteBitmapAggCache::Value); |
1923 | 56.8k | handle = DeleteBitmapAggCache::instance()->insert(key, val, charge, charge, |
1924 | 56.8k | CachePriority::NORMAL); |
1925 | 56.8k | if (config::enable_mow_get_agg_by_cache && !val->bitmap.isEmpty()) { |
1926 | 17.1k | std::lock_guard l(_rowset_cache_version_lock); |
1927 | | // this version is already agg |
1928 | 17.1k | _rowset_cache_version[std::get<0>(bmk)][std::get<1>(bmk)] = std::get<2>(bmk); |
1929 | 17.1k | VLOG_DEBUG << "set agg cache version=" << std::get<2>(bmk) |
1930 | 11 | << " for tablet=" << _tablet_id |
1931 | 11 | << ", rowset=" << std::get<0>(bmk).to_string() |
1932 | 11 | << ", segment=" << std::get<1>(bmk); |
1933 | 17.1k | } |
1934 | 56.8k | if (start_version > 0 && config::enable_mow_get_agg_correctness_check_core) { |
1935 | 0 | std::shared_ptr<roaring::Roaring> bitmap = get_agg_without_cache(bmk); |
1936 | 0 | if (val->bitmap != *bitmap) { |
1937 | 0 | CHECK(false) << ". get agg correctness check failed for tablet=" << _tablet_id |
1938 | 0 | << ", rowset=" << std::get<0>(bmk).to_string() |
1939 | 0 | << ", segment=" << std::get<1>(bmk) << ", version=" << std::get<2>(bmk) |
1940 | 0 | << ". start_version from cache=" << start_version |
1941 | 0 | << ", delete_bitmap cardinality with cache=" |
1942 | 0 | << val->bitmap.cardinality() |
1943 | 0 | << ", delete_bitmap cardinality without cache=" |
1944 | 0 | << bitmap->cardinality(); |
1945 | 0 | } |
1946 | 0 | } |
1947 | 56.8k | } |
1948 | | |
1949 | | // It is natural for the cache to reclaim the underlying memory |
1950 | 1.22M | return std::shared_ptr<roaring::Roaring>( |
1951 | 1.22M | &val->bitmap, [handle](...) { DeleteBitmapAggCache::instance()->release(handle); }); |
1952 | 1.22M | } |
1953 | | |
1954 | | std::shared_ptr<roaring::Roaring> DeleteBitmap::get_agg_without_cache( |
1955 | 6.48k | const BitmapKey& bmk, const int64_t start_version) const { |
1956 | 6.48k | std::shared_ptr<roaring::Roaring> bitmap = std::make_shared<roaring::Roaring>(); |
1957 | 6.48k | std::shared_lock l(lock); |
1958 | 6.48k | DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), start_version}; |
1959 | 35.0k | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1960 | 33.4k | auto& [k, bm] = *it; |
1961 | 33.4k | if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) || |
1962 | 33.4k | std::get<2>(k) > std::get<2>(bmk)) { |
1963 | 4.82k | break; |
1964 | 4.82k | } |
1965 | 28.6k | *bitmap |= bm; |
1966 | 28.6k | } |
1967 | 6.48k | return bitmap; |
1968 | 6.48k | } |
1969 | | |
1970 | 0 | DeleteBitmap DeleteBitmap::diffset(const std::set<BitmapKey>& key_set) const { |
1971 | 0 | std::shared_lock l(lock); |
1972 | 0 | auto diff_key_set_view = |
1973 | 0 | delete_bitmap | std::ranges::views::transform([](const auto& kv) { return kv.first; }) | |
1974 | 0 | std::ranges::views::filter( |
1975 | 0 | [&key_set](const auto& key) { return !key_set.contains(key); }); |
1976 | |
|
1977 | 0 | DeleteBitmap dbm(_tablet_id); |
1978 | 0 | for (const auto& key : diff_key_set_view) { |
1979 | 0 | const auto* bitmap = get(key); |
1980 | 0 | DCHECK_NE(bitmap, nullptr); |
1981 | 0 | dbm.delete_bitmap[key] = *bitmap; |
1982 | 0 | } |
1983 | 0 | return dbm; |
1984 | 0 | } |
1985 | | |
1986 | 0 | std::string tablet_state_name(TabletState state) { |
1987 | 0 | switch (state) { |
1988 | 0 | case TABLET_NOTREADY: |
1989 | 0 | return "TABLET_NOTREADY"; |
1990 | | |
1991 | 0 | case TABLET_RUNNING: |
1992 | 0 | return "TABLET_RUNNING"; |
1993 | | |
1994 | 0 | case TABLET_TOMBSTONED: |
1995 | 0 | return "TABLET_TOMBSTONED"; |
1996 | | |
1997 | 0 | case TABLET_STOPPED: |
1998 | 0 | return "TABLET_STOPPED"; |
1999 | | |
2000 | 0 | case TABLET_SHUTDOWN: |
2001 | 0 | return "TABLET_SHUTDOWN"; |
2002 | | |
2003 | 0 | default: |
2004 | 0 | return "TabletState(" + std::to_string(state) + ")"; |
2005 | 0 | } |
2006 | 0 | } |
2007 | | |
2008 | | } // namespace doris |