/root/doris/be/src/olap/tablet_meta.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/tablet_meta.h" |
19 | | |
20 | | #include <gen_cpp/Descriptors_types.h> |
21 | | #include <gen_cpp/Types_types.h> |
22 | | #include <gen_cpp/olap_common.pb.h> |
23 | | #include <gen_cpp/olap_file.pb.h> |
24 | | #include <gen_cpp/segment_v2.pb.h> |
25 | | #include <gen_cpp/types.pb.h> |
26 | | #include <json2pb/pb_to_json.h> |
27 | | #include <time.h> |
28 | | |
29 | | #include <cstdint> |
30 | | #include <memory> |
31 | | #include <set> |
32 | | #include <utility> |
33 | | |
34 | | #include "cloud/cloud_meta_mgr.h" |
35 | | #include "cloud/cloud_storage_engine.h" |
36 | | #include "cloud/config.h" |
37 | | #include "common/config.h" |
38 | | #include "gutil/integral_types.h" |
39 | | #include "io/fs/file_writer.h" |
40 | | #include "io/fs/local_file_system.h" |
41 | | #include "olap/data_dir.h" |
42 | | #include "olap/file_header.h" |
43 | | #include "olap/olap_common.h" |
44 | | #include "olap/olap_define.h" |
45 | | #include "olap/rowset/rowset.h" |
46 | | #include "olap/rowset/rowset_meta_manager.h" |
47 | | #include "olap/tablet_fwd.h" |
48 | | #include "olap/tablet_meta_manager.h" |
49 | | #include "olap/tablet_schema_cache.h" |
50 | | #include "olap/utils.h" |
51 | | #include "util/debug_points.h" |
52 | | #include "util/mem_info.h" |
53 | | #include "util/parse_util.h" |
54 | | #include "util/string_util.h" |
55 | | #include "util/time.h" |
56 | | #include "util/uid_util.h" |
57 | | |
58 | | using std::string; |
59 | | using std::unordered_map; |
60 | | using std::vector; |
61 | | |
62 | | namespace doris { |
63 | | #include "common/compile_check_begin.h" |
64 | | using namespace ErrorCode; |
65 | | |
66 | | TabletMetaSharedPtr TabletMeta::create( |
67 | | const TCreateTabletReq& request, const TabletUid& tablet_uid, uint64_t shard_id, |
68 | | uint32_t next_unique_id, |
69 | 299 | const unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id) { |
70 | 299 | std::optional<TBinlogConfig> binlog_config; |
71 | 299 | if (request.__isset.binlog_config) { |
72 | 0 | binlog_config = request.binlog_config; |
73 | 0 | } |
74 | 299 | TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format = |
75 | 299 | request.inverted_index_file_storage_format; |
76 | | |
77 | | // We will discard this format. Don't make any further changes here. |
78 | 299 | if (request.__isset.inverted_index_storage_format) { |
79 | 299 | switch (request.inverted_index_storage_format) { |
80 | 0 | case TInvertedIndexStorageFormat::V1: |
81 | 0 | inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V1; |
82 | 0 | break; |
83 | 0 | case TInvertedIndexStorageFormat::V2: |
84 | 0 | inverted_index_file_storage_format = TInvertedIndexFileStorageFormat::V2; |
85 | 0 | break; |
86 | 299 | default: |
87 | 299 | break; |
88 | 299 | } |
89 | 299 | } |
90 | 299 | return std::make_shared<TabletMeta>( |
91 | 299 | request.table_id, request.partition_id, request.tablet_id, request.replica_id, |
92 | 299 | request.tablet_schema.schema_hash, shard_id, request.tablet_schema, next_unique_id, |
93 | 299 | col_ordinal_to_unique_id, tablet_uid, |
94 | 299 | request.__isset.tablet_type ? request.tablet_type : TTabletType::TABLET_TYPE_DISK, |
95 | 299 | request.compression_type, request.storage_policy_id, |
96 | 299 | request.__isset.enable_unique_key_merge_on_write |
97 | 299 | ? request.enable_unique_key_merge_on_write |
98 | 299 | : false, |
99 | 299 | std::move(binlog_config), request.compaction_policy, |
100 | 299 | request.time_series_compaction_goal_size_mbytes, |
101 | 299 | request.time_series_compaction_file_count_threshold, |
102 | 299 | request.time_series_compaction_time_threshold_seconds, |
103 | 299 | request.time_series_compaction_empty_rowsets_threshold, |
104 | 299 | request.time_series_compaction_level_threshold, inverted_index_file_storage_format); |
105 | 299 | } |
106 | | |
107 | 1.00k | TabletMeta::~TabletMeta() { |
108 | 1.00k | if (_handle) { |
109 | 931 | TabletSchemaCache::instance()->release(_handle); |
110 | 931 | } |
111 | 1.00k | } |
112 | | |
113 | | TabletMeta::TabletMeta() |
114 | | : _tablet_uid(0, 0), |
115 | | _schema(new TabletSchema), |
116 | 538 | _delete_bitmap(new DeleteBitmap(_tablet_id)) {} |
117 | | |
118 | | TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id, |
119 | | int64_t replica_id, int32_t schema_hash, int32_t shard_id, |
120 | | const TTabletSchema& tablet_schema, uint32_t next_unique_id, |
121 | | const std::unordered_map<uint32_t, uint32_t>& col_ordinal_to_unique_id, |
122 | | TabletUid tablet_uid, TTabletType::type tabletType, |
123 | | TCompressionType::type compression_type, int64_t storage_policy_id, |
124 | | bool enable_unique_key_merge_on_write, |
125 | | std::optional<TBinlogConfig> binlog_config, std::string compaction_policy, |
126 | | int64_t time_series_compaction_goal_size_mbytes, |
127 | | int64_t time_series_compaction_file_count_threshold, |
128 | | int64_t time_series_compaction_time_threshold_seconds, |
129 | | int64_t time_series_compaction_empty_rowsets_threshold, |
130 | | int64_t time_series_compaction_level_threshold, |
131 | | TInvertedIndexFileStorageFormat::type inverted_index_file_storage_format) |
132 | | : _tablet_uid(0, 0), |
133 | | _schema(new TabletSchema), |
134 | 443 | _delete_bitmap(new DeleteBitmap(tablet_id)) { |
135 | 443 | TabletMetaPB tablet_meta_pb; |
136 | 443 | tablet_meta_pb.set_table_id(table_id); |
137 | 443 | tablet_meta_pb.set_partition_id(partition_id); |
138 | 443 | tablet_meta_pb.set_tablet_id(tablet_id); |
139 | 443 | tablet_meta_pb.set_replica_id(replica_id); |
140 | 443 | tablet_meta_pb.set_schema_hash(schema_hash); |
141 | 443 | tablet_meta_pb.set_shard_id(shard_id); |
142 | | // Persist the creation time, but it is not used |
143 | 443 | tablet_meta_pb.set_creation_time(time(nullptr)); |
144 | 443 | tablet_meta_pb.set_cumulative_layer_point(-1); |
145 | 443 | tablet_meta_pb.set_tablet_state(PB_RUNNING); |
146 | 443 | *(tablet_meta_pb.mutable_tablet_uid()) = tablet_uid.to_proto(); |
147 | 443 | tablet_meta_pb.set_tablet_type(tabletType == TTabletType::TABLET_TYPE_DISK |
148 | 443 | ? TabletTypePB::TABLET_TYPE_DISK |
149 | 443 | : TabletTypePB::TABLET_TYPE_MEMORY); |
150 | 443 | tablet_meta_pb.set_enable_unique_key_merge_on_write(enable_unique_key_merge_on_write); |
151 | 443 | tablet_meta_pb.set_storage_policy_id(storage_policy_id); |
152 | 443 | tablet_meta_pb.set_compaction_policy(compaction_policy); |
153 | 443 | tablet_meta_pb.set_time_series_compaction_goal_size_mbytes( |
154 | 443 | time_series_compaction_goal_size_mbytes); |
155 | 443 | tablet_meta_pb.set_time_series_compaction_file_count_threshold( |
156 | 443 | time_series_compaction_file_count_threshold); |
157 | 443 | tablet_meta_pb.set_time_series_compaction_time_threshold_seconds( |
158 | 443 | time_series_compaction_time_threshold_seconds); |
159 | 443 | tablet_meta_pb.set_time_series_compaction_empty_rowsets_threshold( |
160 | 443 | time_series_compaction_empty_rowsets_threshold); |
161 | 443 | tablet_meta_pb.set_time_series_compaction_level_threshold( |
162 | 443 | time_series_compaction_level_threshold); |
163 | 443 | TabletSchemaPB* schema = tablet_meta_pb.mutable_schema(); |
164 | 443 | schema->set_num_short_key_columns(tablet_schema.short_key_column_count); |
165 | 443 | schema->set_num_rows_per_row_block(config::default_num_rows_per_column_file_block); |
166 | 443 | schema->set_sequence_col_idx(tablet_schema.sequence_col_idx); |
167 | 443 | switch (tablet_schema.keys_type) { |
168 | 31 | case TKeysType::DUP_KEYS: |
169 | 31 | schema->set_keys_type(KeysType::DUP_KEYS); |
170 | 31 | break; |
171 | 301 | case TKeysType::UNIQUE_KEYS: |
172 | 301 | schema->set_keys_type(KeysType::UNIQUE_KEYS); |
173 | 301 | break; |
174 | 63 | case TKeysType::AGG_KEYS: |
175 | 63 | schema->set_keys_type(KeysType::AGG_KEYS); |
176 | 63 | break; |
177 | 48 | default: |
178 | 48 | LOG(WARNING) << "unknown tablet keys type"; |
179 | 48 | break; |
180 | 443 | } |
181 | | // compress_kind used to compress segment files |
182 | 443 | schema->set_compress_kind(COMPRESS_LZ4); |
183 | | |
184 | | // compression_type used to compress segment page |
185 | 443 | switch (compression_type) { |
186 | 0 | case TCompressionType::NO_COMPRESSION: |
187 | 0 | schema->set_compression_type(segment_v2::NO_COMPRESSION); |
188 | 0 | break; |
189 | 0 | case TCompressionType::SNAPPY: |
190 | 0 | schema->set_compression_type(segment_v2::SNAPPY); |
191 | 0 | break; |
192 | 0 | case TCompressionType::LZ4: |
193 | 0 | schema->set_compression_type(segment_v2::LZ4); |
194 | 0 | break; |
195 | 443 | case TCompressionType::LZ4F: |
196 | 443 | schema->set_compression_type(segment_v2::LZ4F); |
197 | 443 | break; |
198 | 0 | case TCompressionType::ZLIB: |
199 | 0 | schema->set_compression_type(segment_v2::ZLIB); |
200 | 0 | break; |
201 | 0 | case TCompressionType::ZSTD: |
202 | 0 | schema->set_compression_type(segment_v2::ZSTD); |
203 | 0 | break; |
204 | 0 | default: |
205 | 0 | schema->set_compression_type(segment_v2::LZ4F); |
206 | 0 | break; |
207 | 443 | } |
208 | | |
209 | 443 | switch (inverted_index_file_storage_format) { |
210 | 0 | case TInvertedIndexFileStorageFormat::V1: |
211 | 0 | schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1); |
212 | 0 | break; |
213 | 443 | case TInvertedIndexFileStorageFormat::V2: |
214 | 443 | schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); |
215 | 443 | break; |
216 | 0 | case TInvertedIndexFileStorageFormat::V3: |
217 | 0 | schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V3); |
218 | 0 | break; |
219 | 0 | default: |
220 | 0 | schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2); |
221 | 0 | break; |
222 | 443 | } |
223 | | |
224 | 443 | switch (tablet_schema.sort_type) { |
225 | 0 | case TSortType::type::ZORDER: |
226 | 0 | schema->set_sort_type(SortType::ZORDER); |
227 | 0 | break; |
228 | 443 | default: |
229 | 443 | schema->set_sort_type(SortType::LEXICAL); |
230 | 443 | } |
231 | 443 | schema->set_sort_col_num(tablet_schema.sort_col_num); |
232 | 443 | for (const auto& i : tablet_schema.cluster_key_uids) { |
233 | 2 | schema->add_cluster_key_uids(i); |
234 | 2 | } |
235 | 443 | tablet_meta_pb.set_in_restore_mode(false); |
236 | | |
237 | | // set column information |
238 | 443 | uint32_t col_ordinal = 0; |
239 | 443 | bool has_bf_columns = false; |
240 | 2.10k | for (TColumn tcolumn : tablet_schema.columns) { |
241 | 2.10k | ColumnPB* column = schema->add_column(); |
242 | 2.10k | uint32_t unique_id = -1; |
243 | 2.10k | if (tcolumn.col_unique_id >= 0) { |
244 | 1 | unique_id = tcolumn.col_unique_id; |
245 | 2.10k | } else { |
246 | 2.10k | unique_id = col_ordinal_to_unique_id.at(col_ordinal); |
247 | 2.10k | } |
248 | 2.10k | col_ordinal++; |
249 | 2.10k | init_column_from_tcolumn(unique_id, tcolumn, column); |
250 | | |
251 | 2.10k | if (column->is_bf_column()) { |
252 | 0 | has_bf_columns = true; |
253 | 0 | } |
254 | | |
255 | 2.10k | if (tablet_schema.__isset.indexes) { |
256 | 0 | for (auto& index : tablet_schema.indexes) { |
257 | 0 | if (index.index_type == TIndexType::type::BITMAP) { |
258 | 0 | DCHECK_EQ(index.columns.size(), 1); |
259 | 0 | if (iequal(tcolumn.column_name, index.columns[0])) { |
260 | 0 | column->set_has_bitmap_index(true); |
261 | 0 | break; |
262 | 0 | } |
263 | 0 | } else if (index.index_type == TIndexType::type::BLOOMFILTER || |
264 | 0 | index.index_type == TIndexType::type::NGRAM_BF) { |
265 | 0 | DCHECK_EQ(index.columns.size(), 1); |
266 | 0 | if (iequal(tcolumn.column_name, index.columns[0])) { |
267 | 0 | column->set_is_bf_column(true); |
268 | 0 | break; |
269 | 0 | } |
270 | 0 | } |
271 | 0 | } |
272 | 0 | } |
273 | 2.10k | } |
274 | | |
275 | | // copy index meta |
276 | 443 | if (tablet_schema.__isset.indexes) { |
277 | 0 | for (auto& index : tablet_schema.indexes) { |
278 | 0 | TabletIndexPB* index_pb = schema->add_index(); |
279 | 0 | index_pb->set_index_id(index.index_id); |
280 | 0 | index_pb->set_index_name(index.index_name); |
281 | | // init col_unique_id in index at be side, since col_unique_id may be -1 at fe side |
282 | | // get column unique id by name |
283 | 0 | for (auto column_name : index.columns) { |
284 | 0 | for (auto column : schema->column()) { |
285 | 0 | if (iequal(column.name(), column_name)) { |
286 | 0 | index_pb->add_col_unique_id(column.unique_id()); |
287 | 0 | } |
288 | 0 | } |
289 | 0 | } |
290 | 0 | switch (index.index_type) { |
291 | 0 | case TIndexType::BITMAP: |
292 | 0 | index_pb->set_index_type(IndexType::BITMAP); |
293 | 0 | break; |
294 | 0 | case TIndexType::INVERTED: |
295 | 0 | index_pb->set_index_type(IndexType::INVERTED); |
296 | 0 | break; |
297 | 0 | case TIndexType::BLOOMFILTER: |
298 | 0 | index_pb->set_index_type(IndexType::BLOOMFILTER); |
299 | 0 | break; |
300 | 0 | case TIndexType::NGRAM_BF: |
301 | 0 | index_pb->set_index_type(IndexType::NGRAM_BF); |
302 | 0 | break; |
303 | 0 | } |
304 | | |
305 | 0 | if (index.__isset.properties) { |
306 | 0 | auto properties = index_pb->mutable_properties(); |
307 | 0 | for (auto kv : index.properties) { |
308 | 0 | (*properties)[kv.first] = kv.second; |
309 | 0 | } |
310 | 0 | } |
311 | 0 | } |
312 | 0 | } |
313 | | |
314 | 443 | schema->set_next_column_unique_id(next_unique_id); |
315 | 443 | if (has_bf_columns && tablet_schema.__isset.bloom_filter_fpp) { |
316 | 0 | schema->set_bf_fpp(tablet_schema.bloom_filter_fpp); |
317 | 0 | } |
318 | | |
319 | 443 | if (tablet_schema.__isset.is_in_memory) { |
320 | 0 | schema->set_is_in_memory(tablet_schema.is_in_memory); |
321 | 0 | } |
322 | | |
323 | 443 | if (tablet_schema.__isset.disable_auto_compaction) { |
324 | 0 | schema->set_disable_auto_compaction(tablet_schema.disable_auto_compaction); |
325 | 0 | } |
326 | | |
327 | 443 | if (tablet_schema.__isset.variant_enable_flatten_nested) { |
328 | 443 | schema->set_variant_enable_flatten_nested(tablet_schema.variant_enable_flatten_nested); |
329 | 443 | } |
330 | | |
331 | 443 | if (tablet_schema.__isset.enable_single_replica_compaction) { |
332 | 443 | schema->set_enable_single_replica_compaction( |
333 | 443 | tablet_schema.enable_single_replica_compaction); |
334 | 443 | } |
335 | | |
336 | 443 | if (tablet_schema.__isset.delete_sign_idx) { |
337 | 443 | schema->set_delete_sign_idx(tablet_schema.delete_sign_idx); |
338 | 443 | } |
339 | 443 | if (tablet_schema.__isset.store_row_column) { |
340 | 443 | schema->set_store_row_column(tablet_schema.store_row_column); |
341 | 443 | } |
342 | 443 | if (tablet_schema.__isset.row_store_page_size) { |
343 | 443 | schema->set_row_store_page_size(tablet_schema.row_store_page_size); |
344 | 443 | } |
345 | 443 | if (tablet_schema.__isset.storage_page_size) { |
346 | 443 | schema->set_storage_page_size(tablet_schema.storage_page_size); |
347 | 443 | } |
348 | 443 | if (tablet_schema.__isset.skip_write_index_on_load) { |
349 | 443 | schema->set_skip_write_index_on_load(tablet_schema.skip_write_index_on_load); |
350 | 443 | } |
351 | 443 | if (tablet_schema.__isset.row_store_col_cids) { |
352 | 0 | schema->mutable_row_store_column_unique_ids()->Add(tablet_schema.row_store_col_cids.begin(), |
353 | 0 | tablet_schema.row_store_col_cids.end()); |
354 | 0 | } |
355 | 443 | if (binlog_config.has_value()) { |
356 | 0 | BinlogConfig tmp_binlog_config; |
357 | 0 | tmp_binlog_config = binlog_config.value(); |
358 | 0 | tmp_binlog_config.to_pb(tablet_meta_pb.mutable_binlog_config()); |
359 | 0 | } |
360 | | |
361 | 443 | init_from_pb(tablet_meta_pb); |
362 | 443 | } |
363 | | |
364 | | TabletMeta::TabletMeta(const TabletMeta& b) |
365 | | : MetadataAdder(b), |
366 | | _table_id(b._table_id), |
367 | | _index_id(b._index_id), |
368 | | _partition_id(b._partition_id), |
369 | | _tablet_id(b._tablet_id), |
370 | | _replica_id(b._replica_id), |
371 | | _schema_hash(b._schema_hash), |
372 | | _shard_id(b._shard_id), |
373 | | _creation_time(b._creation_time), |
374 | | _cumulative_layer_point(b._cumulative_layer_point), |
375 | | _tablet_uid(b._tablet_uid), |
376 | | _tablet_type(b._tablet_type), |
377 | | _tablet_state(b._tablet_state), |
378 | | _schema(b._schema), |
379 | | _rs_metas(b._rs_metas), |
380 | | _stale_rs_metas(b._stale_rs_metas), |
381 | | _in_restore_mode(b._in_restore_mode), |
382 | | _preferred_rowset_type(b._preferred_rowset_type), |
383 | | _storage_policy_id(b._storage_policy_id), |
384 | | _cooldown_meta_id(b._cooldown_meta_id), |
385 | | _enable_unique_key_merge_on_write(b._enable_unique_key_merge_on_write), |
386 | | _delete_bitmap(b._delete_bitmap), |
387 | | _binlog_config(b._binlog_config), |
388 | | _compaction_policy(b._compaction_policy), |
389 | | _time_series_compaction_goal_size_mbytes(b._time_series_compaction_goal_size_mbytes), |
390 | | _time_series_compaction_file_count_threshold( |
391 | | b._time_series_compaction_file_count_threshold), |
392 | | _time_series_compaction_time_threshold_seconds( |
393 | | b._time_series_compaction_time_threshold_seconds), |
394 | | _time_series_compaction_empty_rowsets_threshold( |
395 | | b._time_series_compaction_empty_rowsets_threshold), |
396 | 0 | _time_series_compaction_level_threshold(b._time_series_compaction_level_threshold) {}; |
397 | | |
398 | | void TabletMeta::init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, |
399 | 2.10k | ColumnPB* column) { |
400 | 2.10k | column->set_unique_id(unique_id); |
401 | 2.10k | column->set_name(tcolumn.column_name); |
402 | 2.10k | column->set_has_bitmap_index(tcolumn.has_bitmap_index); |
403 | 2.10k | column->set_is_auto_increment(tcolumn.is_auto_increment); |
404 | 2.10k | string data_type; |
405 | 2.10k | EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type); |
406 | 2.10k | column->set_type(data_type); |
407 | | |
408 | 2.10k | uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type, |
409 | 2.10k | tcolumn.column_type.len); |
410 | 2.10k | column->set_length(length); |
411 | 2.10k | column->set_index_length(length); |
412 | 2.10k | column->set_precision(tcolumn.column_type.precision); |
413 | 2.10k | column->set_frac(tcolumn.column_type.scale); |
414 | | |
415 | 2.10k | if (tcolumn.__isset.result_is_nullable) { |
416 | 0 | column->set_result_is_nullable(tcolumn.result_is_nullable); |
417 | 0 | } |
418 | | |
419 | 2.10k | if (tcolumn.__isset.be_exec_version) { |
420 | 2.10k | column->set_be_exec_version(tcolumn.be_exec_version); |
421 | 2.10k | } |
422 | | |
423 | 2.10k | if (tcolumn.column_type.type == TPrimitiveType::VARCHAR || |
424 | 2.10k | tcolumn.column_type.type == TPrimitiveType::STRING) { |
425 | 101 | if (!tcolumn.column_type.__isset.index_len) { |
426 | 101 | column->set_index_length(10); |
427 | 101 | } else { |
428 | 0 | column->set_index_length(tcolumn.column_type.index_len); |
429 | 0 | } |
430 | 101 | } |
431 | 2.10k | if (!tcolumn.is_key) { |
432 | 1.13k | column->set_is_key(false); |
433 | 1.13k | if (tcolumn.__isset.aggregation) { |
434 | 0 | column->set_aggregation(tcolumn.aggregation); |
435 | 1.13k | } else { |
436 | 1.13k | string aggregation_type; |
437 | 1.13k | EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type); |
438 | 1.13k | column->set_aggregation(aggregation_type); |
439 | 1.13k | } |
440 | 1.13k | } else { |
441 | 973 | column->set_is_key(true); |
442 | 973 | column->set_aggregation("NONE"); |
443 | 973 | } |
444 | 2.10k | column->set_is_nullable(tcolumn.is_allow_null); |
445 | 2.10k | if (tcolumn.__isset.default_value) { |
446 | 0 | column->set_default_value(tcolumn.default_value); |
447 | 0 | } |
448 | 2.10k | if (tcolumn.__isset.is_bloom_filter_column) { |
449 | 0 | column->set_is_bf_column(tcolumn.is_bloom_filter_column); |
450 | 0 | } |
451 | 2.10k | for (size_t i = 0; i < tcolumn.children_column.size(); i++) { |
452 | 0 | ColumnPB* children_column = column->add_children_columns(); |
453 | 0 | init_column_from_tcolumn(tcolumn.children_column[i].col_unique_id, |
454 | 0 | tcolumn.children_column[i], children_column); |
455 | 0 | } |
456 | 2.10k | } |
457 | | |
458 | 4 | Status TabletMeta::create_from_file(const string& file_path) { |
459 | 4 | TabletMetaPB tablet_meta_pb; |
460 | 4 | RETURN_IF_ERROR(load_from_file(file_path, &tablet_meta_pb)); |
461 | 4 | init_from_pb(tablet_meta_pb); |
462 | 4 | return Status::OK(); |
463 | 4 | } |
464 | | |
465 | 7 | Status TabletMeta::load_from_file(const string& file_path, TabletMetaPB* tablet_meta_pb) { |
466 | 7 | FileHeader<TabletMetaPB> file_header(file_path); |
467 | | // In file_header.deserialize(), it validates file length, signature, checksum of protobuf. |
468 | 7 | RETURN_IF_ERROR(file_header.deserialize()); |
469 | 7 | try { |
470 | 7 | tablet_meta_pb->CopyFrom(file_header.message()); |
471 | 7 | } catch (...) { |
472 | 0 | return Status::Error<PARSE_PROTOBUF_ERROR>("fail to copy protocol buffer object. file={}", |
473 | 0 | file_path); |
474 | 0 | } |
475 | 7 | return Status::OK(); |
476 | 7 | } |
477 | | |
478 | | std::string TabletMeta::construct_header_file_path(const string& schema_hash_path, |
479 | 3 | int64_t tablet_id) { |
480 | 3 | std::stringstream header_name_stream; |
481 | 3 | header_name_stream << schema_hash_path << "/" << tablet_id << ".hdr"; |
482 | 3 | return header_name_stream.str(); |
483 | 3 | } |
484 | | |
485 | 0 | Status TabletMeta::save_as_json(const string& file_path) { |
486 | 0 | std::string json_meta; |
487 | 0 | json2pb::Pb2JsonOptions json_options; |
488 | 0 | json_options.pretty_json = true; |
489 | 0 | json_options.bytes_to_base64 = true; |
490 | 0 | to_json(&json_meta, json_options); |
491 | | // save to file |
492 | 0 | io::FileWriterPtr file_writer; |
493 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_file(file_path, &file_writer)); |
494 | 0 | RETURN_IF_ERROR(file_writer->append(json_meta)); |
495 | 0 | RETURN_IF_ERROR(file_writer->close()); |
496 | 0 | return Status::OK(); |
497 | 0 | } |
498 | | |
499 | 229 | Status TabletMeta::save(const string& file_path) { |
500 | 229 | TabletMetaPB tablet_meta_pb; |
501 | 229 | to_meta_pb(&tablet_meta_pb); |
502 | 229 | return TabletMeta::save(file_path, tablet_meta_pb); |
503 | 229 | } |
504 | | |
505 | 232 | Status TabletMeta::save(const string& file_path, const TabletMetaPB& tablet_meta_pb) { |
506 | 232 | DCHECK(!file_path.empty()); |
507 | 232 | FileHeader<TabletMetaPB> file_header(file_path); |
508 | 232 | try { |
509 | 232 | file_header.mutable_message()->CopyFrom(tablet_meta_pb); |
510 | 232 | } catch (...) { |
511 | 0 | LOG(WARNING) << "fail to copy protocol buffer object. file='" << file_path; |
512 | 0 | return Status::Error<ErrorCode::INTERNAL_ERROR>( |
513 | 0 | "fail to copy protocol buffer object. file={}", file_path); |
514 | 0 | } |
515 | 232 | RETURN_IF_ERROR(file_header.prepare()); |
516 | 232 | RETURN_IF_ERROR(file_header.serialize()); |
517 | 232 | return Status::OK(); |
518 | 232 | } |
519 | | |
520 | 564 | Status TabletMeta::save_meta(DataDir* data_dir) { |
521 | 564 | std::lock_guard<std::shared_mutex> wrlock(_meta_lock); |
522 | 564 | return _save_meta(data_dir); |
523 | 564 | } |
524 | | |
525 | 564 | Status TabletMeta::_save_meta(DataDir* data_dir) { |
526 | | // check if tablet uid is valid |
527 | 564 | if (_tablet_uid.hi == 0 && _tablet_uid.lo == 0) { |
528 | 0 | LOG(FATAL) << "tablet_uid is invalid" |
529 | 0 | << " tablet=" << tablet_id() << " _tablet_uid=" << _tablet_uid.to_string(); |
530 | 0 | } |
531 | 564 | string meta_binary; |
532 | | |
533 | 564 | auto t1 = MonotonicMicros(); |
534 | 564 | serialize(&meta_binary); |
535 | 564 | auto t2 = MonotonicMicros(); |
536 | 564 | Status status = TabletMetaManager::save(data_dir, tablet_id(), schema_hash(), meta_binary); |
537 | 564 | if (!status.ok()) { |
538 | 0 | LOG(FATAL) << "fail to save tablet_meta. status=" << status << ", tablet_id=" << tablet_id() |
539 | 0 | << ", schema_hash=" << schema_hash(); |
540 | 0 | } |
541 | 564 | auto t3 = MonotonicMicros(); |
542 | 564 | auto cost = t3 - t1; |
543 | 564 | if (cost > 1 * 1000 * 1000) { |
544 | 0 | LOG(INFO) << "save tablet(" << tablet_id() << ") meta too slow. serialize cost " << t2 - t1 |
545 | 0 | << "(us), serialized binary size: " << meta_binary.length() |
546 | 0 | << "(bytes), write rocksdb cost " << t3 - t2 << "(us)"; |
547 | 0 | } |
548 | 564 | return status; |
549 | 564 | } |
550 | | |
551 | 569 | void TabletMeta::serialize(string* meta_binary) { |
552 | 569 | TabletMetaPB tablet_meta_pb; |
553 | 569 | to_meta_pb(&tablet_meta_pb); |
554 | 569 | if (tablet_meta_pb.partition_id() <= 0) { |
555 | 468 | LOG(WARNING) << "invalid partition id " << tablet_meta_pb.partition_id() << " tablet " |
556 | 468 | << tablet_meta_pb.tablet_id(); |
557 | 468 | } |
558 | 569 | DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", { |
559 | 569 | long partition_id = tablet_meta_pb.partition_id(); |
560 | 569 | tablet_meta_pb.set_partition_id(0); |
561 | 569 | LOG(WARNING) << "set debug point TabletMeta::serialize::zero_partition_id old=" |
562 | 569 | << partition_id << " new=" << tablet_meta_pb.DebugString(); |
563 | 569 | }); |
564 | 569 | bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary); |
565 | 569 | if (!_rs_metas.empty() || !_stale_rs_metas.empty()) { |
566 | 569 | _avg_rs_meta_serialize_size = |
567 | 569 | meta_binary->length() / (_rs_metas.size() + _stale_rs_metas.size()); |
568 | 569 | if (meta_binary->length() > config::tablet_meta_serialize_size_limit || |
569 | 569 | !serialize_success) { |
570 | 0 | int64_t origin_meta_size = meta_binary->length(); |
571 | 0 | int64_t stale_rowsets_num = tablet_meta_pb.stale_rs_metas().size(); |
572 | 0 | tablet_meta_pb.clear_stale_rs_metas(); |
573 | 0 | meta_binary->clear(); |
574 | 0 | serialize_success = tablet_meta_pb.SerializeToString(meta_binary); |
575 | 0 | LOG(WARNING) << "tablet meta serialization size exceeds limit: " |
576 | 0 | << config::tablet_meta_serialize_size_limit |
577 | 0 | << " clean up stale rowsets, tablet id: " << tablet_id() |
578 | 0 | << " stale rowset num: " << stale_rowsets_num |
579 | 0 | << " serialization size before clean " << origin_meta_size |
580 | 0 | << " serialization size after clean " << meta_binary->length(); |
581 | 0 | } |
582 | 569 | } |
583 | | |
584 | 569 | if (!serialize_success) { |
585 | 0 | LOG(FATAL) << "failed to serialize meta " << tablet_id(); |
586 | 0 | } |
587 | 569 | } |
588 | | |
589 | 461 | Status TabletMeta::deserialize(std::string_view meta_binary) { |
590 | 461 | TabletMetaPB tablet_meta_pb; |
591 | 461 | bool parsed = tablet_meta_pb.ParseFromArray(meta_binary.data(), |
592 | 461 | static_cast<int32_t>(meta_binary.size())); |
593 | 461 | if (!parsed) { |
594 | 0 | return Status::Error<INIT_FAILED>("parse tablet meta failed"); |
595 | 0 | } |
596 | 461 | init_from_pb(tablet_meta_pb); |
597 | 461 | return Status::OK(); |
598 | 461 | } |
599 | | |
600 | 934 | void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { |
601 | 934 | _table_id = tablet_meta_pb.table_id(); |
602 | 934 | _index_id = tablet_meta_pb.index_id(); |
603 | 934 | _partition_id = tablet_meta_pb.partition_id(); |
604 | 934 | _tablet_id = tablet_meta_pb.tablet_id(); |
605 | 934 | _replica_id = tablet_meta_pb.replica_id(); |
606 | 934 | _schema_hash = tablet_meta_pb.schema_hash(); |
607 | 934 | _shard_id = tablet_meta_pb.shard_id(); |
608 | 934 | _creation_time = tablet_meta_pb.creation_time(); |
609 | 934 | _cumulative_layer_point = tablet_meta_pb.cumulative_layer_point(); |
610 | 934 | _tablet_uid = TabletUid(tablet_meta_pb.tablet_uid()); |
611 | 934 | _ttl_seconds = tablet_meta_pb.ttl_seconds(); |
612 | 934 | if (tablet_meta_pb.has_tablet_type()) { |
613 | 914 | _tablet_type = tablet_meta_pb.tablet_type(); |
614 | 914 | } else { |
615 | 20 | _tablet_type = TabletTypePB::TABLET_TYPE_DISK; |
616 | 20 | } |
617 | | |
618 | | // init _tablet_state |
619 | 934 | switch (tablet_meta_pb.tablet_state()) { |
620 | 26 | case PB_NOTREADY: |
621 | 26 | _tablet_state = TabletState::TABLET_NOTREADY; |
622 | 26 | break; |
623 | 683 | case PB_RUNNING: |
624 | 683 | _tablet_state = TabletState::TABLET_RUNNING; |
625 | 683 | break; |
626 | 0 | case PB_TOMBSTONED: |
627 | 0 | _tablet_state = TabletState::TABLET_TOMBSTONED; |
628 | 0 | break; |
629 | 0 | case PB_STOPPED: |
630 | 0 | _tablet_state = TabletState::TABLET_STOPPED; |
631 | 0 | break; |
632 | 225 | case PB_SHUTDOWN: |
633 | 225 | _tablet_state = TabletState::TABLET_SHUTDOWN; |
634 | 225 | break; |
635 | 0 | default: |
636 | 0 | LOG(WARNING) << "tablet has no state. tablet=" << tablet_id() |
637 | 0 | << ", schema_hash=" << schema_hash(); |
638 | 934 | } |
639 | | |
640 | | // init _schema |
641 | 934 | TabletSchemaSPtr schema = std::make_shared<TabletSchema>(); |
642 | 934 | schema->init_from_pb(tablet_meta_pb.schema()); |
643 | 934 | if (_handle) { |
644 | 3 | TabletSchemaCache::instance()->release(_handle); |
645 | 3 | } |
646 | 934 | auto pair = TabletSchemaCache::instance()->insert(schema->to_key()); |
647 | 934 | _handle = pair.first; |
648 | 934 | _schema = pair.second; |
649 | | |
650 | 934 | if (tablet_meta_pb.has_enable_unique_key_merge_on_write()) { |
651 | 914 | _enable_unique_key_merge_on_write = tablet_meta_pb.enable_unique_key_merge_on_write(); |
652 | 914 | } |
653 | | |
654 | | // init _rs_metas |
655 | 10.8k | for (auto& it : tablet_meta_pb.rs_metas()) { |
656 | 10.8k | RowsetMetaSharedPtr rs_meta(new RowsetMeta()); |
657 | 10.8k | rs_meta->init_from_pb(it); |
658 | 10.8k | _rs_metas.push_back(std::move(rs_meta)); |
659 | 10.8k | } |
660 | | |
661 | | // For mow table, delete bitmap of stale rowsets has not been persisted. |
662 | | // When be restart, query should not read the stale rowset, otherwise duplicate keys |
663 | | // will be read out. Therefore, we don't add them to _stale_rs_meta for mow table. |
664 | 934 | if (!config::skip_loading_stale_rowset_meta && !_enable_unique_key_merge_on_write) { |
665 | 899 | for (auto& it : tablet_meta_pb.stale_rs_metas()) { |
666 | 0 | RowsetMetaSharedPtr rs_meta(new RowsetMeta()); |
667 | 0 | rs_meta->init_from_pb(it); |
668 | 0 | _stale_rs_metas.push_back(std::move(rs_meta)); |
669 | 0 | } |
670 | 899 | } |
671 | | |
672 | 934 | if (tablet_meta_pb.has_in_restore_mode()) { |
673 | 914 | _in_restore_mode = tablet_meta_pb.in_restore_mode(); |
674 | 914 | } |
675 | | |
676 | 934 | if (tablet_meta_pb.has_preferred_rowset_type()) { |
677 | 471 | _preferred_rowset_type = tablet_meta_pb.preferred_rowset_type(); |
678 | 471 | } |
679 | | |
680 | 934 | _storage_policy_id = tablet_meta_pb.storage_policy_id(); |
681 | 934 | if (tablet_meta_pb.has_cooldown_meta_id()) { |
682 | 0 | _cooldown_meta_id = tablet_meta_pb.cooldown_meta_id(); |
683 | 0 | } |
684 | | |
685 | 934 | if (tablet_meta_pb.has_delete_bitmap()) { |
686 | 0 | int rst_ids_size = tablet_meta_pb.delete_bitmap().rowset_ids_size(); |
687 | 0 | int seg_ids_size = tablet_meta_pb.delete_bitmap().segment_ids_size(); |
688 | 0 | int versions_size = tablet_meta_pb.delete_bitmap().versions_size(); |
689 | 0 | int seg_maps_size = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps_size(); |
690 | 0 | CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size && |
691 | 0 | seg_maps_size == versions_size); |
692 | 0 | for (int i = 0; i < rst_ids_size; ++i) { |
693 | 0 | RowsetId rst_id; |
694 | 0 | rst_id.init(tablet_meta_pb.delete_bitmap().rowset_ids(i)); |
695 | 0 | auto seg_id = tablet_meta_pb.delete_bitmap().segment_ids(i); |
696 | 0 | auto ver = tablet_meta_pb.delete_bitmap().versions(i); |
697 | 0 | auto bitmap = tablet_meta_pb.delete_bitmap().segment_delete_bitmaps(i).data(); |
698 | 0 | delete_bitmap().delete_bitmap[{rst_id, seg_id, ver}] = roaring::Roaring::read(bitmap); |
699 | 0 | } |
700 | 0 | } |
701 | | |
702 | 934 | if (tablet_meta_pb.has_binlog_config()) { |
703 | 469 | _binlog_config = tablet_meta_pb.binlog_config(); |
704 | 469 | } |
705 | 934 | _compaction_policy = tablet_meta_pb.compaction_policy(); |
706 | 934 | _time_series_compaction_goal_size_mbytes = |
707 | 934 | tablet_meta_pb.time_series_compaction_goal_size_mbytes(); |
708 | 934 | _time_series_compaction_file_count_threshold = |
709 | 934 | tablet_meta_pb.time_series_compaction_file_count_threshold(); |
710 | 934 | _time_series_compaction_time_threshold_seconds = |
711 | 934 | tablet_meta_pb.time_series_compaction_time_threshold_seconds(); |
712 | 934 | _time_series_compaction_empty_rowsets_threshold = |
713 | 934 | tablet_meta_pb.time_series_compaction_empty_rowsets_threshold(); |
714 | 934 | _time_series_compaction_level_threshold = |
715 | 934 | tablet_meta_pb.time_series_compaction_level_threshold(); |
716 | 934 | } |
717 | | |
718 | 806 | void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { |
719 | 806 | tablet_meta_pb->set_table_id(table_id()); |
720 | 806 | tablet_meta_pb->set_index_id(index_id()); |
721 | 806 | tablet_meta_pb->set_partition_id(partition_id()); |
722 | 806 | tablet_meta_pb->set_tablet_id(tablet_id()); |
723 | 806 | tablet_meta_pb->set_replica_id(replica_id()); |
724 | 806 | tablet_meta_pb->set_schema_hash(schema_hash()); |
725 | 806 | tablet_meta_pb->set_shard_id(shard_id()); |
726 | 806 | tablet_meta_pb->set_creation_time(creation_time()); |
727 | 806 | tablet_meta_pb->set_cumulative_layer_point(cumulative_layer_point()); |
728 | 806 | *(tablet_meta_pb->mutable_tablet_uid()) = tablet_uid().to_proto(); |
729 | 806 | tablet_meta_pb->set_tablet_type(_tablet_type); |
730 | 806 | tablet_meta_pb->set_ttl_seconds(_ttl_seconds); |
731 | 806 | switch (tablet_state()) { |
732 | 8 | case TABLET_NOTREADY: |
733 | 8 | tablet_meta_pb->set_tablet_state(PB_NOTREADY); |
734 | 8 | break; |
735 | 320 | case TABLET_RUNNING: |
736 | 320 | tablet_meta_pb->set_tablet_state(PB_RUNNING); |
737 | 320 | break; |
738 | 0 | case TABLET_TOMBSTONED: |
739 | 0 | tablet_meta_pb->set_tablet_state(PB_TOMBSTONED); |
740 | 0 | break; |
741 | 0 | case TABLET_STOPPED: |
742 | 0 | tablet_meta_pb->set_tablet_state(PB_STOPPED); |
743 | 0 | break; |
744 | 478 | case TABLET_SHUTDOWN: |
745 | 478 | tablet_meta_pb->set_tablet_state(PB_SHUTDOWN); |
746 | 478 | break; |
747 | 806 | } |
748 | | |
749 | | // RowsetMetaPB is separated from TabletMetaPB |
750 | 806 | if (!config::is_cloud_mode()) { |
751 | 21.6k | for (auto& rs : _rs_metas) { |
752 | 21.6k | rs->to_rowset_pb(tablet_meta_pb->add_rs_metas()); |
753 | 21.6k | } |
754 | 806 | for (auto rs : _stale_rs_metas) { |
755 | 0 | rs->to_rowset_pb(tablet_meta_pb->add_stale_rs_metas()); |
756 | 0 | } |
757 | 806 | } |
758 | | |
759 | 806 | _schema->to_schema_pb(tablet_meta_pb->mutable_schema()); |
760 | | |
761 | 806 | tablet_meta_pb->set_in_restore_mode(in_restore_mode()); |
762 | | |
763 | | // to avoid modify tablet meta to the greatest extend |
764 | 806 | if (_preferred_rowset_type == BETA_ROWSET) { |
765 | 806 | tablet_meta_pb->set_preferred_rowset_type(_preferred_rowset_type); |
766 | 806 | } |
767 | 806 | if (_storage_policy_id > 0) { |
768 | 5 | tablet_meta_pb->set_storage_policy_id(_storage_policy_id); |
769 | 5 | } |
770 | 806 | if (_cooldown_meta_id.initialized()) { |
771 | 5 | tablet_meta_pb->mutable_cooldown_meta_id()->CopyFrom(_cooldown_meta_id.to_proto()); |
772 | 5 | } |
773 | | |
774 | 806 | tablet_meta_pb->set_enable_unique_key_merge_on_write(_enable_unique_key_merge_on_write); |
775 | | |
776 | 806 | if (_enable_unique_key_merge_on_write) { |
777 | 4 | std::set<RowsetId> stale_rs_ids; |
778 | 4 | for (const auto& rowset : _stale_rs_metas) { |
779 | 0 | stale_rs_ids.insert(rowset->rowset_id()); |
780 | 0 | } |
781 | 4 | DeleteBitmapPB* delete_bitmap_pb = tablet_meta_pb->mutable_delete_bitmap(); |
782 | 4 | for (auto& [id, bitmap] : delete_bitmap().snapshot().delete_bitmap) { |
783 | 2 | auto& [rowset_id, segment_id, ver] = id; |
784 | 2 | if (stale_rs_ids.count(rowset_id) != 0) { |
785 | 0 | continue; |
786 | 0 | } |
787 | 2 | delete_bitmap_pb->add_rowset_ids(rowset_id.to_string()); |
788 | 2 | delete_bitmap_pb->add_segment_ids(segment_id); |
789 | 2 | delete_bitmap_pb->add_versions(ver); |
790 | 2 | std::string bitmap_data(bitmap.getSizeInBytes(), '\0'); |
791 | 2 | bitmap.write(bitmap_data.data()); |
792 | 2 | *(delete_bitmap_pb->add_segment_delete_bitmaps()) = std::move(bitmap_data); |
793 | 2 | } |
794 | 4 | } |
795 | 806 | _binlog_config.to_pb(tablet_meta_pb->mutable_binlog_config()); |
796 | 806 | tablet_meta_pb->set_compaction_policy(compaction_policy()); |
797 | 806 | tablet_meta_pb->set_time_series_compaction_goal_size_mbytes( |
798 | 806 | time_series_compaction_goal_size_mbytes()); |
799 | 806 | tablet_meta_pb->set_time_series_compaction_file_count_threshold( |
800 | 806 | time_series_compaction_file_count_threshold()); |
801 | 806 | tablet_meta_pb->set_time_series_compaction_time_threshold_seconds( |
802 | 806 | time_series_compaction_time_threshold_seconds()); |
803 | 806 | tablet_meta_pb->set_time_series_compaction_empty_rowsets_threshold( |
804 | 806 | time_series_compaction_empty_rowsets_threshold()); |
805 | 806 | tablet_meta_pb->set_time_series_compaction_level_threshold( |
806 | 806 | time_series_compaction_level_threshold()); |
807 | 806 | } |
808 | | |
809 | 2 | void TabletMeta::to_json(string* json_string, json2pb::Pb2JsonOptions& options) { |
810 | 2 | TabletMetaPB tablet_meta_pb; |
811 | 2 | to_meta_pb(&tablet_meta_pb); |
812 | 2 | json2pb::ProtoMessageToJson(tablet_meta_pb, json_string, options); |
813 | 2 | } |
814 | | |
815 | 109 | Version TabletMeta::max_version() const { |
816 | 109 | Version max_version = {-1, 0}; |
817 | 177 | for (auto& rs_meta : _rs_metas) { |
818 | 177 | if (rs_meta->end_version() > max_version.second) { |
819 | 175 | max_version = rs_meta->version(); |
820 | 175 | } |
821 | 177 | } |
822 | 109 | return max_version; |
823 | 109 | } |
824 | | |
825 | 0 | size_t TabletMeta::version_count_cross_with_range(const Version& range) const { |
826 | 0 | size_t count = 0; |
827 | 0 | for (const auto& rs_meta : _rs_metas) { |
828 | 0 | if (!(range.first > rs_meta->version().second || range.second < rs_meta->version().first)) { |
829 | 0 | count++; |
830 | 0 | } |
831 | 0 | } |
832 | 0 | return count; |
833 | 0 | } |
834 | | |
835 | 11.1k | Status TabletMeta::add_rs_meta(const RowsetMetaSharedPtr& rs_meta) { |
836 | | // check RowsetMeta is valid |
837 | 357k | for (auto& rs : _rs_metas) { |
838 | 357k | if (rs->version() == rs_meta->version()) { |
839 | 0 | if (rs->rowset_id() != rs_meta->rowset_id()) { |
840 | 0 | return Status::Error<PUSH_VERSION_ALREADY_EXIST>( |
841 | 0 | "version already exist. rowset_id={}, version={}, tablet={}", |
842 | 0 | rs->rowset_id().to_string(), rs->version().to_string(), tablet_id()); |
843 | 0 | } else { |
844 | | // rowsetid,version is equal, it is a duplicate req, skip it |
845 | 0 | return Status::OK(); |
846 | 0 | } |
847 | 0 | } |
848 | 357k | } |
849 | 11.1k | _rs_metas.push_back(rs_meta); |
850 | 11.1k | return Status::OK(); |
851 | 11.1k | } |
852 | | |
853 | 0 | void TabletMeta::add_rowsets_unchecked(const std::vector<RowsetSharedPtr>& to_add) { |
854 | 0 | for (const auto& rs : to_add) { |
855 | 0 | _rs_metas.push_back(rs->rowset_meta()); |
856 | 0 | } |
857 | 0 | } |
858 | | |
859 | | void TabletMeta::delete_rs_meta_by_version(const Version& version, |
860 | 0 | std::vector<RowsetMetaSharedPtr>* deleted_rs_metas) { |
861 | 0 | auto it = _rs_metas.begin(); |
862 | 0 | while (it != _rs_metas.end()) { |
863 | 0 | if ((*it)->version() == version) { |
864 | 0 | if (deleted_rs_metas != nullptr) { |
865 | 0 | deleted_rs_metas->push_back(*it); |
866 | 0 | } |
867 | 0 | _rs_metas.erase(it); |
868 | 0 | return; |
869 | 0 | } else { |
870 | 0 | ++it; |
871 | 0 | } |
872 | 0 | } |
873 | 0 | } |
874 | | |
875 | | void TabletMeta::modify_rs_metas(const std::vector<RowsetMetaSharedPtr>& to_add, |
876 | | const std::vector<RowsetMetaSharedPtr>& to_delete, |
877 | 24 | bool same_version) { |
878 | | // Remove to_delete rowsets from _rs_metas |
879 | 37 | for (auto rs_to_del : to_delete) { |
880 | 37 | auto it = _rs_metas.begin(); |
881 | 199 | while (it != _rs_metas.end()) { |
882 | 199 | if (rs_to_del->version() == (*it)->version()) { |
883 | 37 | _rs_metas.erase(it); |
884 | | // there should be only one rowset match the version |
885 | 37 | break; |
886 | 162 | } else { |
887 | 162 | ++it; |
888 | 162 | } |
889 | 199 | } |
890 | 37 | } |
891 | 24 | if (!same_version) { |
892 | | // put to_delete rowsets in _stale_rs_metas. |
893 | 7 | _stale_rs_metas.insert(_stale_rs_metas.end(), to_delete.begin(), to_delete.end()); |
894 | 7 | } |
895 | | // put to_add rowsets in _rs_metas. |
896 | 24 | _rs_metas.insert(_rs_metas.end(), to_add.begin(), to_add.end()); |
897 | 24 | } |
898 | | |
899 | | // Use the passing "rs_metas" to replace the rs meta in this tablet meta |
900 | | // Also clear the _stale_rs_metas because this tablet meta maybe copyied from |
901 | | // an existing tablet before. Add after revise, only the passing "rs_metas" |
902 | | // is needed. |
903 | 4 | void TabletMeta::revise_rs_metas(std::vector<RowsetMetaSharedPtr>&& rs_metas) { |
904 | 4 | std::lock_guard<std::shared_mutex> wrlock(_meta_lock); |
905 | 4 | _rs_metas = std::move(rs_metas); |
906 | 4 | _stale_rs_metas.clear(); |
907 | 4 | } |
908 | | |
909 | | // This method should call after revise_rs_metas, since new rs_metas might be a subset |
910 | | // of original tablet, we should revise the delete_bitmap according to current rowset. |
911 | | // |
912 | | // Delete bitmap is protected by Tablet::_meta_lock, we don't need to acquire the |
913 | | // TabletMeta's _meta_lock |
914 | 1 | void TabletMeta::revise_delete_bitmap_unlocked(const DeleteBitmap& delete_bitmap) { |
915 | 1 | _delete_bitmap = std::make_unique<DeleteBitmap>(tablet_id()); |
916 | 2 | for (auto rs : _rs_metas) { |
917 | 2 | DeleteBitmap rs_bm(tablet_id()); |
918 | 2 | delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX}, |
919 | 2 | &rs_bm); |
920 | 2 | _delete_bitmap->merge(rs_bm); |
921 | 2 | } |
922 | 1 | for (auto rs : _stale_rs_metas) { |
923 | 0 | DeleteBitmap rs_bm(tablet_id()); |
924 | 0 | delete_bitmap.subset({rs->rowset_id(), 0, 0}, {rs->rowset_id(), UINT32_MAX, INT64_MAX}, |
925 | 0 | &rs_bm); |
926 | 0 | _delete_bitmap->merge(rs_bm); |
927 | 0 | } |
928 | 1 | } |
929 | | |
930 | 0 | void TabletMeta::delete_stale_rs_meta_by_version(const Version& version) { |
931 | 0 | auto it = _stale_rs_metas.begin(); |
932 | 0 | while (it != _stale_rs_metas.end()) { |
933 | 0 | if ((*it)->version() == version) { |
934 | 0 | if (_enable_unique_key_merge_on_write) { |
935 | | // remove rowset delete bitmap |
936 | 0 | delete_bitmap().remove({(*it)->rowset_id(), 0, 0}, |
937 | 0 | {(*it)->rowset_id(), UINT32_MAX, 0}); |
938 | 0 | } |
939 | 0 | it = _stale_rs_metas.erase(it); |
940 | 0 | } else { |
941 | 0 | it++; |
942 | 0 | } |
943 | 0 | } |
944 | 0 | } |
945 | | |
946 | 0 | RowsetMetaSharedPtr TabletMeta::acquire_rs_meta_by_version(const Version& version) const { |
947 | 0 | for (auto it : _rs_metas) { |
948 | 0 | if (it->version() == version) { |
949 | 0 | return it; |
950 | 0 | } |
951 | 0 | } |
952 | 0 | return nullptr; |
953 | 0 | } |
954 | | |
955 | 8 | RowsetMetaSharedPtr TabletMeta::acquire_stale_rs_meta_by_version(const Version& version) const { |
956 | 8 | for (auto it : _stale_rs_metas) { |
957 | 0 | if (it->version() == version) { |
958 | 0 | return it; |
959 | 0 | } |
960 | 0 | } |
961 | 8 | return nullptr; |
962 | 8 | } |
963 | | |
964 | 23 | Status TabletMeta::set_partition_id(int64_t partition_id) { |
965 | 23 | if ((_partition_id > 0 && _partition_id != partition_id) || partition_id < 1) { |
966 | 0 | LOG(WARNING) << "cur partition id=" << _partition_id << " new partition id=" << partition_id |
967 | 0 | << " not equal"; |
968 | 0 | } |
969 | 23 | _partition_id = partition_id; |
970 | 23 | return Status::OK(); |
971 | 23 | } |
972 | | |
973 | 1 | bool operator==(const TabletMeta& a, const TabletMeta& b) { |
974 | 1 | if (a._table_id != b._table_id) return false; |
975 | 1 | if (a._index_id != b._index_id) return false; |
976 | 1 | if (a._partition_id != b._partition_id) return false; |
977 | 1 | if (a._tablet_id != b._tablet_id) return false; |
978 | 1 | if (a._replica_id != b._replica_id) return false; |
979 | 1 | if (a._schema_hash != b._schema_hash) return false; |
980 | 1 | if (a._shard_id != b._shard_id) return false; |
981 | 1 | if (a._creation_time != b._creation_time) return false; |
982 | 1 | if (a._cumulative_layer_point != b._cumulative_layer_point) return false; |
983 | 1 | if (a._tablet_uid != b._tablet_uid) return false; |
984 | 1 | if (a._tablet_type != b._tablet_type) return false; |
985 | 1 | if (a._tablet_state != b._tablet_state) return false; |
986 | 1 | if (*a._schema != *b._schema) return false; |
987 | 1 | if (a._rs_metas.size() != b._rs_metas.size()) return false; |
988 | 1 | for (int i = 0; i < a._rs_metas.size(); ++i) { |
989 | 0 | if (a._rs_metas[i] != b._rs_metas[i]) return false; |
990 | 0 | } |
991 | 1 | if (a._in_restore_mode != b._in_restore_mode) return false; |
992 | 1 | if (a._preferred_rowset_type != b._preferred_rowset_type) return false; |
993 | 1 | if (a._storage_policy_id != b._storage_policy_id) return false; |
994 | 1 | if (a._compaction_policy != b._compaction_policy) return false; |
995 | 1 | if (a._time_series_compaction_goal_size_mbytes != b._time_series_compaction_goal_size_mbytes) |
996 | 0 | return false; |
997 | 1 | if (a._time_series_compaction_file_count_threshold != |
998 | 1 | b._time_series_compaction_file_count_threshold) |
999 | 0 | return false; |
1000 | 1 | if (a._time_series_compaction_time_threshold_seconds != |
1001 | 1 | b._time_series_compaction_time_threshold_seconds) |
1002 | 0 | return false; |
1003 | 1 | if (a._time_series_compaction_empty_rowsets_threshold != |
1004 | 1 | b._time_series_compaction_empty_rowsets_threshold) |
1005 | 0 | return false; |
1006 | 1 | if (a._time_series_compaction_level_threshold != b._time_series_compaction_level_threshold) |
1007 | 0 | return false; |
1008 | 1 | return true; |
1009 | 1 | } |
1010 | | |
1011 | 0 | bool operator!=(const TabletMeta& a, const TabletMeta& b) { |
1012 | 0 | return !(a == b); |
1013 | 0 | } |
1014 | | |
1015 | 1.01k | DeleteBitmap::DeleteBitmap(int64_t tablet_id) : _tablet_id(tablet_id) { |
1016 | | // The default delete bitmap cache is set to 100MB, |
1017 | | // which can be insufficient and cause performance issues when the amount of user data is large. |
1018 | | // To mitigate the problem of an inadequate cache, |
1019 | | // we will take the larger of 0.5% of the total memory and 100MB as the delete bitmap cache size. |
1020 | 1.01k | bool is_percent = false; |
1021 | 1.01k | int64_t delete_bitmap_agg_cache_cache_limit = |
1022 | 1.01k | ParseUtil::parse_mem_spec(config::delete_bitmap_dynamic_agg_cache_limit, |
1023 | 1.01k | MemInfo::mem_limit(), MemInfo::physical_mem(), &is_percent); |
1024 | 1.01k | _agg_cache.reset(new AggCache(delete_bitmap_agg_cache_cache_limit > |
1025 | 1.01k | config::delete_bitmap_agg_cache_capacity |
1026 | 1.01k | ? delete_bitmap_agg_cache_cache_limit |
1027 | 1.01k | : config::delete_bitmap_agg_cache_capacity)); |
1028 | 1.01k | } |
1029 | | |
1030 | 7 | DeleteBitmap::DeleteBitmap(const DeleteBitmap& o) { |
1031 | 7 | delete_bitmap = o.delete_bitmap; // just copy data |
1032 | 7 | _tablet_id = o._tablet_id; |
1033 | 7 | } |
1034 | | |
1035 | 0 | DeleteBitmap& DeleteBitmap::operator=(const DeleteBitmap& o) { |
1036 | 0 | delete_bitmap = o.delete_bitmap; // just copy data |
1037 | 0 | _tablet_id = o._tablet_id; |
1038 | 0 | return *this; |
1039 | 0 | } |
1040 | | |
1041 | 0 | DeleteBitmap::DeleteBitmap(DeleteBitmap&& o) { |
1042 | 0 | delete_bitmap = std::move(o.delete_bitmap); |
1043 | 0 | _tablet_id = o._tablet_id; |
1044 | 0 | } |
1045 | | |
1046 | 0 | DeleteBitmap& DeleteBitmap::operator=(DeleteBitmap&& o) { |
1047 | 0 | delete_bitmap = std::move(o.delete_bitmap); |
1048 | 0 | _tablet_id = o._tablet_id; |
1049 | 0 | return *this; |
1050 | 0 | } |
1051 | | |
1052 | 7 | DeleteBitmap DeleteBitmap::snapshot() const { |
1053 | 7 | std::shared_lock l(lock); |
1054 | 7 | return DeleteBitmap(*this); |
1055 | 7 | } |
1056 | | |
1057 | 3 | DeleteBitmap DeleteBitmap::snapshot(Version version) const { |
1058 | | // Take snapshot first, then remove keys greater than given version. |
1059 | 3 | DeleteBitmap snapshot = this->snapshot(); |
1060 | 3 | auto it = snapshot.delete_bitmap.begin(); |
1061 | 412 | while (it != snapshot.delete_bitmap.end()) { |
1062 | 409 | if (std::get<2>(it->first) > version) { |
1063 | 4 | it = snapshot.delete_bitmap.erase(it); |
1064 | 405 | } else { |
1065 | 405 | it++; |
1066 | 405 | } |
1067 | 409 | } |
1068 | 3 | return snapshot; |
1069 | 3 | } |
1070 | | |
1071 | 459k | void DeleteBitmap::add(const BitmapKey& bmk, uint32_t row_id) { |
1072 | 459k | std::lock_guard l(lock); |
1073 | 459k | delete_bitmap[bmk].add(row_id); |
1074 | 459k | } |
1075 | | |
1076 | 0 | int DeleteBitmap::remove(const BitmapKey& bmk, uint32_t row_id) { |
1077 | 0 | std::lock_guard l(lock); |
1078 | 0 | auto it = delete_bitmap.find(bmk); |
1079 | 0 | if (it == delete_bitmap.end()) return -1; |
1080 | 0 | it->second.remove(row_id); |
1081 | 0 | return 0; |
1082 | 0 | } |
1083 | | |
1084 | 8 | void DeleteBitmap::remove(const BitmapKey& start, const BitmapKey& end) { |
1085 | 8 | std::lock_guard l(lock); |
1086 | 107 | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end();) { |
1087 | 101 | auto& [k, _] = *it; |
1088 | 101 | if (k >= end) { |
1089 | 2 | break; |
1090 | 2 | } |
1091 | 99 | it = delete_bitmap.erase(it); |
1092 | 99 | } |
1093 | 8 | } |
1094 | | |
1095 | 6 | bool DeleteBitmap::contains(const BitmapKey& bmk, uint32_t row_id) const { |
1096 | 6 | std::shared_lock l(lock); |
1097 | 6 | auto it = delete_bitmap.find(bmk); |
1098 | 6 | return it != delete_bitmap.end() && it->second.contains(row_id); |
1099 | 6 | } |
1100 | | |
1101 | 2 | bool DeleteBitmap::contains_agg(const BitmapKey& bmk, uint32_t row_id) const { |
1102 | 2 | return get_agg(bmk)->contains(row_id); |
1103 | 2 | } |
1104 | | |
1105 | 0 | bool DeleteBitmap::empty() const { |
1106 | 0 | std::shared_lock l(lock); |
1107 | 0 | return delete_bitmap.empty(); |
1108 | 0 | } |
1109 | | |
1110 | 0 | uint64_t DeleteBitmap::cardinality() const { |
1111 | 0 | std::shared_lock l(lock); |
1112 | 0 | uint64_t res = 0; |
1113 | 0 | for (auto entry : delete_bitmap) { |
1114 | 0 | if (std::get<1>(entry.first) != DeleteBitmap::INVALID_SEGMENT_ID) { |
1115 | 0 | res += entry.second.cardinality(); |
1116 | 0 | } |
1117 | 0 | } |
1118 | 0 | return res; |
1119 | 0 | } |
1120 | | |
1121 | 0 | uint64_t DeleteBitmap::get_size() const { |
1122 | 0 | std::shared_lock l(lock); |
1123 | 0 | uint64_t charge = 0; |
1124 | 0 | for (auto& [k, v] : delete_bitmap) { |
1125 | 0 | if (std::get<1>(k) != DeleteBitmap::INVALID_SEGMENT_ID) { |
1126 | 0 | charge += v.getSizeInBytes(); |
1127 | 0 | } |
1128 | 0 | } |
1129 | 0 | return charge; |
1130 | 0 | } |
1131 | | |
1132 | 1 | bool DeleteBitmap::contains_agg_without_cache(const BitmapKey& bmk, uint32_t row_id) const { |
1133 | 1 | std::shared_lock l(lock); |
1134 | 1 | DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), 0}; |
1135 | 1 | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1136 | 0 | auto& [k, bm] = *it; |
1137 | 0 | if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) || |
1138 | 0 | std::get<2>(k) > std::get<2>(bmk)) { |
1139 | 0 | break; |
1140 | 0 | } |
1141 | 0 | if (bm.contains(row_id)) { |
1142 | 0 | return true; |
1143 | 0 | } |
1144 | 0 | } |
1145 | 1 | return false; |
1146 | 1 | } |
1147 | | |
1148 | 0 | void DeleteBitmap::remove_sentinel_marks() { |
1149 | 0 | for (auto it = delete_bitmap.begin(), end = delete_bitmap.end(); it != end;) { |
1150 | 0 | if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) { |
1151 | 0 | it = delete_bitmap.erase(it); |
1152 | 0 | } else { |
1153 | 0 | ++it; |
1154 | 0 | } |
1155 | 0 | } |
1156 | 0 | } |
1157 | | |
1158 | 38 | int DeleteBitmap::set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) { |
1159 | 38 | std::lock_guard l(lock); |
1160 | 38 | auto [_, inserted] = delete_bitmap.insert_or_assign(bmk, segment_delete_bitmap); |
1161 | 38 | return inserted; |
1162 | 38 | } |
1163 | | |
1164 | 3 | int DeleteBitmap::get(const BitmapKey& bmk, roaring::Roaring* segment_delete_bitmap) const { |
1165 | 3 | std::shared_lock l(lock); |
1166 | 3 | auto it = delete_bitmap.find(bmk); |
1167 | 3 | if (it == delete_bitmap.end()) return -1; |
1168 | 3 | *segment_delete_bitmap = it->second; // copy |
1169 | 3 | return 0; |
1170 | 3 | } |
1171 | | |
1172 | 54 | const roaring::Roaring* DeleteBitmap::get(const BitmapKey& bmk) const { |
1173 | 54 | std::shared_lock l(lock); |
1174 | 54 | auto it = delete_bitmap.find(bmk); |
1175 | 54 | if (it == delete_bitmap.end()) return nullptr; |
1176 | 41 | return &(it->second); // get address |
1177 | 54 | } |
1178 | | |
1179 | | void DeleteBitmap::subset(const BitmapKey& start, const BitmapKey& end, |
1180 | 3 | DeleteBitmap* subset_rowset_map) const { |
1181 | 3 | roaring::Roaring roaring; |
1182 | 3 | DCHECK(start < end); |
1183 | 3 | std::shared_lock l(lock); |
1184 | 26 | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1185 | 25 | auto& [k, bm] = *it; |
1186 | 25 | if (k >= end) { |
1187 | 2 | break; |
1188 | 2 | } |
1189 | 23 | subset_rowset_map->set(k, bm); |
1190 | 23 | } |
1191 | 3 | } |
1192 | | |
1193 | 0 | size_t DeleteBitmap::get_count_with_range(const BitmapKey& start, const BitmapKey& end) const { |
1194 | 0 | DCHECK(start < end); |
1195 | 0 | size_t count = 0; |
1196 | 0 | std::shared_lock l(lock); |
1197 | 0 | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1198 | 0 | auto& [k, bm] = *it; |
1199 | 0 | if (k >= end) { |
1200 | 0 | break; |
1201 | 0 | } |
1202 | 0 | count++; |
1203 | 0 | } |
1204 | 0 | return count; |
1205 | 0 | } |
1206 | | |
1207 | 2 | void DeleteBitmap::merge(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) { |
1208 | 2 | std::lock_guard l(lock); |
1209 | 2 | auto [iter, succ] = delete_bitmap.emplace(bmk, segment_delete_bitmap); |
1210 | 2 | if (!succ) { |
1211 | 0 | iter->second |= segment_delete_bitmap; |
1212 | 0 | } |
1213 | 2 | } |
1214 | | |
1215 | 9 | void DeleteBitmap::merge(const DeleteBitmap& other) { |
1216 | 9 | std::lock_guard l(lock); |
1217 | 29 | for (auto& i : other.delete_bitmap) { |
1218 | 29 | auto [j, succ] = this->delete_bitmap.insert(i); |
1219 | 29 | if (!succ) j->second |= i.second; |
1220 | 29 | } |
1221 | 9 | } |
1222 | | |
1223 | | void DeleteBitmap::add_to_remove_queue( |
1224 | | const std::string& version_str, |
1225 | | const std::vector<std::tuple<int64_t, DeleteBitmap::BitmapKey, DeleteBitmap::BitmapKey>>& |
1226 | 0 | vector) { |
1227 | 0 | std::shared_lock l(stale_delete_bitmap_lock); |
1228 | 0 | _stale_delete_bitmap.emplace(version_str, vector); |
1229 | 0 | } |
1230 | | |
1231 | 1 | void DeleteBitmap::remove_stale_delete_bitmap_from_queue(const std::vector<std::string>& vector) { |
1232 | 1 | if (!config::enable_delete_bitmap_merge_on_compaction) { |
1233 | 1 | return; |
1234 | 1 | } |
1235 | 0 | std::shared_lock l(stale_delete_bitmap_lock); |
1236 | | //<rowset_id, start_version, end_version> |
1237 | 0 | std::vector<std::tuple<std::string, uint64_t, uint64_t>> to_delete; |
1238 | 0 | int64_t tablet_id = -1; |
1239 | 0 | for (auto& version_str : vector) { |
1240 | 0 | auto it = _stale_delete_bitmap.find(version_str); |
1241 | 0 | if (it != _stale_delete_bitmap.end()) { |
1242 | 0 | auto delete_bitmap_vector = it->second; |
1243 | 0 | for (auto& delete_bitmap_tuple : it->second) { |
1244 | 0 | if (tablet_id < 0) { |
1245 | 0 | tablet_id = std::get<0>(delete_bitmap_tuple); |
1246 | 0 | } |
1247 | 0 | auto start_bmk = std::get<1>(delete_bitmap_tuple); |
1248 | 0 | auto end_bmk = std::get<2>(delete_bitmap_tuple); |
1249 | | // the key range of to be removed is [start_bmk,end_bmk), |
1250 | | // due to the different definitions of the right boundary, |
1251 | | // so use end_bmk as right boundary when removing local delete bitmap, |
1252 | | // use (end_bmk - 1) as right boundary when removing ms delete bitmap |
1253 | 0 | remove(start_bmk, end_bmk); |
1254 | 0 | to_delete.emplace_back(std::make_tuple(std::get<0>(start_bmk).to_string(), 0, |
1255 | 0 | std::get<2>(end_bmk) - 1)); |
1256 | 0 | } |
1257 | 0 | _stale_delete_bitmap.erase(version_str); |
1258 | 0 | } |
1259 | 0 | } |
1260 | 0 | if (tablet_id == -1 || to_delete.empty() || !config::is_cloud_mode()) { |
1261 | 0 | return; |
1262 | 0 | } |
1263 | 0 | CloudStorageEngine& engine = ExecEnv::GetInstance()->storage_engine().to_cloud(); |
1264 | 0 | auto st = engine.meta_mgr().remove_old_version_delete_bitmap(tablet_id, to_delete); |
1265 | 0 | if (!st.ok()) { |
1266 | 0 | LOG(WARNING) << "fail to remove_stale_delete_bitmap_from_queue for tablet=" << tablet_id |
1267 | 0 | << ",st=" << st; |
1268 | 0 | } |
1269 | 0 | } |
1270 | | |
1271 | 63 | uint64_t DeleteBitmap::get_delete_bitmap_count() { |
1272 | 63 | std::shared_lock l(lock); |
1273 | 63 | uint64_t count = 0; |
1274 | 63 | for (auto it = delete_bitmap.begin(); it != delete_bitmap.end(); it++) { |
1275 | 0 | if (std::get<1>(it->first) != DeleteBitmap::INVALID_SEGMENT_ID) { |
1276 | 0 | count++; |
1277 | 0 | } |
1278 | 0 | } |
1279 | 63 | return count; |
1280 | 63 | } |
1281 | | |
1282 | 0 | bool DeleteBitmap::has_calculated_for_multi_segments(const RowsetId& rowset_id) const { |
1283 | 0 | return contains({rowset_id, INVALID_SEGMENT_ID, TEMP_VERSION_COMMON}, ROWSET_SENTINEL_MARK); |
1284 | 0 | } |
1285 | | |
1286 | | // We cannot just copy the underlying memory to construct a string |
1287 | | // due to equivalent objects may have different padding bytes. |
1288 | | // Reading padding bytes is undefined behavior, neither copy nor |
1289 | | // placement new will help simplify the code. |
1290 | | // Refer to C11 standards §6.2.6.1/6 and §6.7.9/21 for more info. |
1291 | 44 | static std::string agg_cache_key(int64_t tablet_id, const DeleteBitmap::BitmapKey& bmk) { |
1292 | 44 | std::string ret(sizeof(tablet_id) + sizeof(bmk), '\0'); |
1293 | 44 | *reinterpret_cast<int64_t*>(ret.data()) = tablet_id; |
1294 | 44 | auto t = reinterpret_cast<DeleteBitmap::BitmapKey*>(ret.data() + sizeof(tablet_id)); |
1295 | 44 | std::get<RowsetId>(*t).version = std::get<RowsetId>(bmk).version; |
1296 | 44 | std::get<RowsetId>(*t).hi = std::get<RowsetId>(bmk).hi; |
1297 | 44 | std::get<RowsetId>(*t).mi = std::get<RowsetId>(bmk).mi; |
1298 | 44 | std::get<RowsetId>(*t).lo = std::get<RowsetId>(bmk).lo; |
1299 | 44 | std::get<1>(*t) = std::get<1>(bmk); |
1300 | 44 | std::get<2>(*t) = std::get<2>(bmk); |
1301 | 44 | return ret; |
1302 | 44 | } |
1303 | | |
1304 | 44 | std::shared_ptr<roaring::Roaring> DeleteBitmap::get_agg(const BitmapKey& bmk) const { |
1305 | 44 | std::string key_str = agg_cache_key(_tablet_id, bmk); // Cache key container |
1306 | 44 | CacheKey key(key_str); |
1307 | 44 | Cache::Handle* handle = _agg_cache->repr()->lookup(key); |
1308 | | |
1309 | 44 | AggCache::Value* val = |
1310 | 44 | handle == nullptr |
1311 | 44 | ? nullptr |
1312 | 44 | : reinterpret_cast<AggCache::Value*>(_agg_cache->repr()->value(handle)); |
1313 | | // FIXME: do we need a mutex here to get rid of duplicated initializations |
1314 | | // of cache entries in some cases? |
1315 | 44 | if (val == nullptr) { // Renew if needed, put a new Value to cache |
1316 | 38 | val = new AggCache::Value(); |
1317 | 38 | { |
1318 | 38 | std::shared_lock l(lock); |
1319 | 38 | DeleteBitmap::BitmapKey start {std::get<0>(bmk), std::get<1>(bmk), 0}; |
1320 | 69 | for (auto it = delete_bitmap.lower_bound(start); it != delete_bitmap.end(); ++it) { |
1321 | 66 | auto& [k, bm] = *it; |
1322 | 66 | if (std::get<0>(k) != std::get<0>(bmk) || std::get<1>(k) != std::get<1>(bmk) || |
1323 | 66 | std::get<2>(k) > std::get<2>(bmk)) { |
1324 | 35 | break; |
1325 | 35 | } |
1326 | 31 | val->bitmap |= bm; |
1327 | 31 | } |
1328 | 38 | } |
1329 | 38 | size_t charge = val->bitmap.getSizeInBytes() + sizeof(AggCache::Value); |
1330 | 38 | handle = _agg_cache->repr()->insert(key, val, charge, charge, CachePriority::NORMAL); |
1331 | 38 | } |
1332 | | |
1333 | | // It is natural for the cache to reclaim the underlying memory |
1334 | 44 | return std::shared_ptr<roaring::Roaring>( |
1335 | 44 | &val->bitmap, [this, handle](...) { _agg_cache->repr()->release(handle); }); |
1336 | 44 | } |
1337 | | |
1338 | | std::atomic<DeleteBitmap::AggCachePolicy*> DeleteBitmap::AggCache::s_repr {nullptr}; |
1339 | | |
1340 | 0 | std::string tablet_state_name(TabletState state) { |
1341 | 0 | switch (state) { |
1342 | 0 | case TABLET_NOTREADY: |
1343 | 0 | return "TABLET_NOTREADY"; |
1344 | | |
1345 | 0 | case TABLET_RUNNING: |
1346 | 0 | return "TABLET_RUNNING"; |
1347 | | |
1348 | 0 | case TABLET_TOMBSTONED: |
1349 | 0 | return "TABLET_TOMBSTONED"; |
1350 | | |
1351 | 0 | case TABLET_STOPPED: |
1352 | 0 | return "TABLET_STOPPED"; |
1353 | | |
1354 | 0 | case TABLET_SHUTDOWN: |
1355 | 0 | return "TABLET_SHUTDOWN"; |
1356 | | |
1357 | 0 | default: |
1358 | 0 | return "TabletState(" + std::to_string(state) + ")"; |
1359 | 0 | } |
1360 | 0 | } |
1361 | | |
1362 | | #include "common/compile_check_end.h" |
1363 | | } // namespace doris |