/root/doris/be/src/cloud/cloud_tablet.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "cloud/cloud_tablet.h" |
19 | | |
20 | | #include <gen_cpp/olap_file.pb.h> |
21 | | #include <rapidjson/document.h> |
22 | | #include <rapidjson/encodings.h> |
23 | | #include <rapidjson/prettywriter.h> |
24 | | #include <rapidjson/rapidjson.h> |
25 | | #include <rapidjson/stringbuffer.h> |
26 | | |
27 | | #include <atomic> |
28 | | #include <memory> |
29 | | #include <shared_mutex> |
30 | | #include <unordered_map> |
31 | | #include <vector> |
32 | | |
33 | | #include "cloud/cloud_meta_mgr.h" |
34 | | #include "cloud/cloud_storage_engine.h" |
35 | | #include "cloud/cloud_tablet_mgr.h" |
36 | | #include "common/config.h" |
37 | | #include "common/logging.h" |
38 | | #include "io/cache/block_file_cache_downloader.h" |
39 | | #include "io/cache/block_file_cache_factory.h" |
40 | | #include "olap/compaction.h" |
41 | | #include "olap/cumulative_compaction_time_series_policy.h" |
42 | | #include "olap/olap_define.h" |
43 | | #include "olap/rowset/beta_rowset.h" |
44 | | #include "olap/rowset/rowset.h" |
45 | | #include "olap/rowset/rowset_factory.h" |
46 | | #include "olap/rowset/rowset_fwd.h" |
47 | | #include "olap/rowset/rowset_writer.h" |
48 | | #include "olap/rowset/segment_v2/inverted_index_desc.h" |
49 | | #include "olap/storage_policy.h" |
50 | | #include "olap/tablet_schema.h" |
51 | | #include "olap/txn_manager.h" |
52 | | #include "util/debug_points.h" |
53 | | #include "vec/common/schema_util.h" |
54 | | |
55 | | namespace doris { |
56 | | #include "common/compile_check_begin.h" |
57 | | using namespace ErrorCode; |
58 | | |
59 | | static constexpr int LOAD_INITIATOR_ID = -1; |
60 | | |
61 | | CloudTablet::CloudTablet(CloudStorageEngine& engine, TabletMetaSharedPtr tablet_meta) |
62 | 1 | : BaseTablet(std::move(tablet_meta)), _engine(engine) {} |
63 | | |
64 | 1 | CloudTablet::~CloudTablet() = default; |
65 | | |
66 | 0 | bool CloudTablet::exceed_version_limit(int32_t limit) { |
67 | 0 | return _approximate_num_rowsets.load(std::memory_order_relaxed) > limit; |
68 | 0 | } |
69 | | |
70 | | Status CloudTablet::capture_consistent_rowsets_unlocked( |
71 | 0 | const Version& spec_version, std::vector<RowsetSharedPtr>* rowsets) const { |
72 | 0 | Versions version_path; |
73 | 0 | auto st = _timestamped_version_tracker.capture_consistent_versions(spec_version, &version_path); |
74 | 0 | if (!st.ok()) { |
75 | | // Check no missed versions or req version is merged |
76 | 0 | auto missed_versions = get_missed_versions(spec_version.second); |
77 | 0 | if (missed_versions.empty()) { |
78 | 0 | st.set_code(VERSION_ALREADY_MERGED); // Reset error code |
79 | 0 | } |
80 | 0 | st.append(" tablet_id=" + std::to_string(tablet_id())); |
81 | 0 | return st; |
82 | 0 | } |
83 | 0 | VLOG_DEBUG << "capture consitent versions: " << version_path; |
84 | 0 | return _capture_consistent_rowsets_unlocked(version_path, rowsets); |
85 | 0 | } |
86 | | |
87 | | Status CloudTablet::capture_rs_readers(const Version& spec_version, |
88 | | std::vector<RowSetSplits>* rs_splits, |
89 | 0 | bool skip_missing_version) { |
90 | 0 | DBUG_EXECUTE_IF("CloudTablet.capture_rs_readers.return.e-230", { |
91 | 0 | LOG_WARNING("CloudTablet.capture_rs_readers.return e-230").tag("tablet_id", tablet_id()); |
92 | 0 | return Status::Error<false>(-230, "injected error"); |
93 | 0 | }); |
94 | 0 | Versions version_path; |
95 | 0 | std::shared_lock rlock(_meta_lock); |
96 | 0 | auto st = _timestamped_version_tracker.capture_consistent_versions(spec_version, &version_path); |
97 | 0 | if (!st.ok()) { |
98 | 0 | rlock.unlock(); // avoid logging in lock range |
99 | | // Check no missed versions or req version is merged |
100 | 0 | auto missed_versions = get_missed_versions(spec_version.second); |
101 | 0 | if (missed_versions.empty()) { |
102 | 0 | st.set_code(VERSION_ALREADY_MERGED); // Reset error code |
103 | 0 | st.append(" versions are already compacted, "); |
104 | 0 | } |
105 | 0 | st.append(" tablet_id=" + std::to_string(tablet_id())); |
106 | | // clang-format off |
107 | 0 | LOG(WARNING) << st << '\n' << [this]() { std::string json; get_compaction_status(&json); return json; }(); |
108 | | // clang-format on |
109 | 0 | return st; |
110 | 0 | } |
111 | 0 | VLOG_DEBUG << "capture consitent versions: " << version_path; |
112 | 0 | return capture_rs_readers_unlocked(version_path, rs_splits); |
113 | 0 | } |
114 | | |
115 | 0 | Status CloudTablet::merge_rowsets_schema() { |
116 | | // Find the rowset with the max version |
117 | 0 | auto max_version_rowset = |
118 | 0 | std::max_element( |
119 | 0 | _rs_version_map.begin(), _rs_version_map.end(), |
120 | 0 | [](const auto& a, const auto& b) { |
121 | 0 | return !a.second->tablet_schema() |
122 | 0 | ? true |
123 | 0 | : (!b.second->tablet_schema() |
124 | 0 | ? false |
125 | 0 | : a.second->tablet_schema()->schema_version() < |
126 | 0 | b.second->tablet_schema() |
127 | 0 | ->schema_version()); |
128 | 0 | }) |
129 | 0 | ->second; |
130 | 0 | TabletSchemaSPtr max_version_schema = max_version_rowset->tablet_schema(); |
131 | | // If the schema has variant columns, perform a merge to create a wide tablet schema |
132 | 0 | if (max_version_schema->num_variant_columns() > 0) { |
133 | 0 | std::vector<TabletSchemaSPtr> schemas; |
134 | 0 | std::transform(_rs_version_map.begin(), _rs_version_map.end(), std::back_inserter(schemas), |
135 | 0 | [](const auto& rs_meta) { return rs_meta.second->tablet_schema(); }); |
136 | | // Merge the collected schemas to obtain the least common schema |
137 | 0 | RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema(schemas, nullptr, |
138 | 0 | max_version_schema)); |
139 | 0 | VLOG_DEBUG << "dump schema: " << max_version_schema->dump_full_schema(); |
140 | 0 | _merged_tablet_schema = max_version_schema; |
141 | 0 | } |
142 | 0 | return Status::OK(); |
143 | 0 | } |
144 | | |
145 | | // There are only two tablet_states RUNNING and NOT_READY in cloud mode |
146 | | // This function will erase the tablet from `CloudTabletMgr` when it can't find this tablet in MS. |
147 | 0 | Status CloudTablet::sync_rowsets(const SyncOptions& options, SyncRowsetStats* stats) { |
148 | 0 | RETURN_IF_ERROR(sync_if_not_running(stats)); |
149 | | |
150 | 0 | if (options.query_version > 0) { |
151 | 0 | std::shared_lock rlock(_meta_lock); |
152 | 0 | if (_max_version >= options.query_version) { |
153 | 0 | return Status::OK(); |
154 | 0 | } |
155 | 0 | } |
156 | | |
157 | | // serially execute sync to reduce unnecessary network overhead |
158 | 0 | std::unique_lock lock(_sync_meta_lock); |
159 | 0 | if (options.query_version > 0) { |
160 | 0 | std::shared_lock rlock(_meta_lock); |
161 | 0 | if (_max_version >= options.query_version) { |
162 | 0 | return Status::OK(); |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | 0 | auto st = _engine.meta_mgr().sync_tablet_rowsets_unlocked(this, lock, options, stats); |
167 | 0 | if (st.is<ErrorCode::NOT_FOUND>()) { |
168 | 0 | clear_cache(); |
169 | 0 | } |
170 | |
|
171 | 0 | return st; |
172 | 0 | } |
173 | | |
174 | | // Sync tablet meta and all rowset meta if not running. |
175 | | // This could happen when BE didn't finish schema change job and another BE committed this schema change job. |
176 | | // It should be a quite rare situation. |
177 | 0 | Status CloudTablet::sync_if_not_running(SyncRowsetStats* stats) { |
178 | 0 | if (tablet_state() == TABLET_RUNNING) { |
179 | 0 | return Status::OK(); |
180 | 0 | } |
181 | | |
182 | | // Serially execute sync to reduce unnecessary network overhead |
183 | 0 | std::unique_lock lock(_sync_meta_lock); |
184 | |
|
185 | 0 | { |
186 | 0 | std::shared_lock rlock(_meta_lock); |
187 | 0 | if (tablet_state() == TABLET_RUNNING) { |
188 | 0 | return Status::OK(); |
189 | 0 | } |
190 | 0 | } |
191 | | |
192 | 0 | TabletMetaSharedPtr tablet_meta; |
193 | 0 | auto st = _engine.meta_mgr().get_tablet_meta(tablet_id(), &tablet_meta); |
194 | 0 | if (!st.ok()) { |
195 | 0 | if (st.is<ErrorCode::NOT_FOUND>()) { |
196 | 0 | clear_cache(); |
197 | 0 | } |
198 | 0 | return st; |
199 | 0 | } |
200 | | |
201 | 0 | if (tablet_meta->tablet_state() != TABLET_RUNNING) [[unlikely]] { |
202 | | // MoW may go to here when load while schema change |
203 | 0 | return Status::OK(); |
204 | 0 | } |
205 | | |
206 | 0 | TimestampedVersionTracker empty_tracker; |
207 | 0 | { |
208 | 0 | std::lock_guard wlock(_meta_lock); |
209 | 0 | RETURN_IF_ERROR(set_tablet_state(TABLET_RUNNING)); |
210 | 0 | _rs_version_map.clear(); |
211 | 0 | _stale_rs_version_map.clear(); |
212 | 0 | std::swap(_timestamped_version_tracker, empty_tracker); |
213 | 0 | _tablet_meta->clear_rowsets(); |
214 | 0 | _tablet_meta->clear_stale_rowset(); |
215 | 0 | _max_version = -1; |
216 | 0 | } |
217 | | |
218 | 0 | st = _engine.meta_mgr().sync_tablet_rowsets_unlocked(this, lock, {}, stats); |
219 | 0 | if (st.is<ErrorCode::NOT_FOUND>()) { |
220 | 0 | clear_cache(); |
221 | 0 | } |
222 | 0 | return st; |
223 | 0 | } |
224 | | |
225 | 0 | TabletSchemaSPtr CloudTablet::merged_tablet_schema() const { |
226 | 0 | std::shared_lock rlock(_meta_lock); |
227 | 0 | return _merged_tablet_schema; |
228 | 0 | } |
229 | | |
230 | | void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool version_overlap, |
231 | | std::unique_lock<std::shared_mutex>& meta_lock, |
232 | 0 | bool warmup_delta_data) { |
233 | 0 | if (to_add.empty()) { |
234 | 0 | return; |
235 | 0 | } |
236 | | |
237 | 0 | auto add_rowsets_directly = [=, this](std::vector<RowsetSharedPtr>& rowsets) { |
238 | 0 | for (auto& rs : rowsets) { |
239 | 0 | if (version_overlap || warmup_delta_data) { |
240 | | #ifndef BE_TEST |
241 | | // Warmup rowset data in background |
242 | | for (int seg_id = 0; seg_id < rs->num_segments(); ++seg_id) { |
243 | | const auto& rowset_meta = rs->rowset_meta(); |
244 | | constexpr int64_t interval = 600; // 10 mins |
245 | | // When BE restart and receive the `load_sync` rpc, it will sync all historical rowsets first time. |
246 | | // So we need to filter out the old rowsets avoid to download the whole table. |
247 | | if (warmup_delta_data && |
248 | | ::time(nullptr) - rowset_meta->newest_write_timestamp() >= interval) { |
249 | | continue; |
250 | | } |
251 | | |
252 | | auto storage_resource = rowset_meta->remote_storage_resource(); |
253 | | if (!storage_resource) { |
254 | | LOG(WARNING) << storage_resource.error(); |
255 | | continue; |
256 | | } |
257 | | |
258 | | int64_t expiration_time = |
259 | | _tablet_meta->ttl_seconds() == 0 || |
260 | | rowset_meta->newest_write_timestamp() <= 0 |
261 | | ? 0 |
262 | | : rowset_meta->newest_write_timestamp() + |
263 | | _tablet_meta->ttl_seconds(); |
264 | | // clang-format off |
265 | | _engine.file_cache_block_downloader().submit_download_task(io::DownloadFileMeta { |
266 | | .path = storage_resource.value()->remote_segment_path(*rowset_meta, seg_id), |
267 | | .file_size = rs->rowset_meta()->segment_file_size(seg_id), |
268 | | .file_system = storage_resource.value()->fs, |
269 | | .ctx = |
270 | | { |
271 | | .expiration_time = expiration_time, |
272 | | .is_dryrun = config::enable_reader_dryrun_when_download_file_cache, |
273 | | }, |
274 | | .download_done {}, |
275 | | }); |
276 | | |
277 | | auto download_idx_file = [&](const io::Path& idx_path) { |
278 | | io::DownloadFileMeta meta { |
279 | | .path = idx_path, |
280 | | .file_size = -1, |
281 | | .file_system = storage_resource.value()->fs, |
282 | | .ctx = |
283 | | { |
284 | | .expiration_time = expiration_time, |
285 | | .is_dryrun = config::enable_reader_dryrun_when_download_file_cache, |
286 | | }, |
287 | | .download_done {}, |
288 | | }; |
289 | | _engine.file_cache_block_downloader().submit_download_task(std::move(meta)); |
290 | | }; |
291 | | // clang-format on |
292 | | auto schema_ptr = rowset_meta->tablet_schema(); |
293 | | auto idx_version = schema_ptr->get_inverted_index_storage_format(); |
294 | | if (idx_version == InvertedIndexStorageFormatPB::V1) { |
295 | | for (const auto& index : schema_ptr->inverted_indexes()) { |
296 | | auto idx_path = storage_resource.value()->remote_idx_v1_path( |
297 | | *rowset_meta, seg_id, index->index_id(), |
298 | | index->get_index_suffix()); |
299 | | download_idx_file(idx_path); |
300 | | } |
301 | | } else { |
302 | | if (schema_ptr->has_inverted_index()) { |
303 | | auto idx_path = storage_resource.value()->remote_idx_v2_path( |
304 | | *rowset_meta, seg_id); |
305 | | download_idx_file(idx_path); |
306 | | } |
307 | | } |
308 | | } |
309 | | #endif |
310 | 0 | } |
311 | 0 | _rs_version_map.emplace(rs->version(), rs); |
312 | 0 | _timestamped_version_tracker.add_version(rs->version()); |
313 | 0 | _max_version = std::max(rs->end_version(), _max_version); |
314 | 0 | update_base_size(*rs); |
315 | 0 | } |
316 | 0 | _tablet_meta->add_rowsets_unchecked(rowsets); |
317 | 0 | }; |
318 | |
|
319 | 0 | if (!version_overlap) { |
320 | 0 | add_rowsets_directly(to_add); |
321 | 0 | return; |
322 | 0 | } |
323 | | |
324 | | // Filter out existed rowsets |
325 | 0 | auto remove_it = |
326 | 0 | std::remove_if(to_add.begin(), to_add.end(), [this](const RowsetSharedPtr& rs) { |
327 | 0 | if (auto find_it = _rs_version_map.find(rs->version()); |
328 | 0 | find_it == _rs_version_map.end()) { |
329 | 0 | return false; |
330 | 0 | } else if (find_it->second->rowset_id() == rs->rowset_id()) { |
331 | 0 | return true; // Same rowset |
332 | 0 | } |
333 | | |
334 | | // If version of rowset in `to_add` is equal to rowset in tablet but rowset_id is not equal, |
335 | | // replace existed rowset with `to_add` rowset. This may occur when: |
336 | | // 1. schema change converts rowsets which have been double written to new tablet |
337 | | // 2. cumu compaction picks single overlapping input rowset to perform compaction |
338 | 0 | _tablet_meta->delete_rs_meta_by_version(rs->version(), nullptr); |
339 | 0 | _rs_version_map[rs->version()] = rs; |
340 | 0 | _tablet_meta->add_rowsets_unchecked({rs}); |
341 | 0 | update_base_size(*rs); |
342 | 0 | return true; |
343 | 0 | }); |
344 | |
|
345 | 0 | to_add.erase(remove_it, to_add.end()); |
346 | | |
347 | | // delete rowsets with overlapped version |
348 | 0 | std::vector<RowsetSharedPtr> to_add_directly; |
349 | 0 | for (auto& to_add_rs : to_add) { |
350 | | // delete rowsets with overlapped version |
351 | 0 | std::vector<RowsetSharedPtr> to_delete; |
352 | 0 | Version to_add_v = to_add_rs->version(); |
353 | | // if start_version > max_version, we can skip checking overlap here. |
354 | 0 | if (to_add_v.first > _max_version) { |
355 | | // if start_version > max_version, we can skip checking overlap here. |
356 | 0 | to_add_directly.push_back(to_add_rs); |
357 | 0 | } else { |
358 | 0 | to_add_directly.push_back(to_add_rs); |
359 | 0 | for (auto& [v, rs] : _rs_version_map) { |
360 | 0 | if (to_add_v.contains(v)) { |
361 | 0 | to_delete.push_back(rs); |
362 | 0 | } |
363 | 0 | } |
364 | 0 | delete_rowsets(to_delete, meta_lock); |
365 | 0 | } |
366 | 0 | } |
367 | |
|
368 | 0 | add_rowsets_directly(to_add_directly); |
369 | 0 | } |
370 | | |
371 | | void CloudTablet::delete_rowsets(const std::vector<RowsetSharedPtr>& to_delete, |
372 | 0 | std::unique_lock<std::shared_mutex>&) { |
373 | 0 | if (to_delete.empty()) { |
374 | 0 | return; |
375 | 0 | } |
376 | 0 | std::vector<RowsetMetaSharedPtr> rs_metas; |
377 | 0 | rs_metas.reserve(to_delete.size()); |
378 | 0 | for (auto&& rs : to_delete) { |
379 | 0 | rs_metas.push_back(rs->rowset_meta()); |
380 | 0 | _stale_rs_version_map[rs->version()] = rs; |
381 | 0 | } |
382 | 0 | _timestamped_version_tracker.add_stale_path_version(rs_metas); |
383 | 0 | for (auto&& rs : to_delete) { |
384 | 0 | _rs_version_map.erase(rs->version()); |
385 | 0 | } |
386 | |
|
387 | 0 | _tablet_meta->modify_rs_metas({}, rs_metas, false); |
388 | 0 | } |
389 | | |
390 | 0 | uint64_t CloudTablet::delete_expired_stale_rowsets() { |
391 | 0 | if (config::enable_mow_verbose_log) { |
392 | 0 | LOG_INFO("begin delete_expired_stale_rowset for tablet={}", tablet_id()); |
393 | 0 | } |
394 | 0 | std::vector<RowsetSharedPtr> expired_rowsets; |
395 | | // ATTN: trick, Use stale_rowsets to temporarily increase the reference count of the rowset shared pointer in _stale_rs_version_map so that in the recycle_cached_data function, it checks if the reference count is 2. |
396 | 0 | std::vector<RowsetSharedPtr> stale_rowsets; |
397 | 0 | int64_t expired_stale_sweep_endtime = |
398 | 0 | ::time(nullptr) - config::tablet_rowset_stale_sweep_time_sec; |
399 | 0 | std::vector<std::string> version_to_delete; |
400 | 0 | { |
401 | 0 | std::unique_lock wlock(_meta_lock); |
402 | |
|
403 | 0 | std::vector<int64_t> path_ids; |
404 | | // capture the path version to delete |
405 | 0 | _timestamped_version_tracker.capture_expired_paths(expired_stale_sweep_endtime, &path_ids); |
406 | |
|
407 | 0 | if (path_ids.empty()) { |
408 | 0 | return 0; |
409 | 0 | } |
410 | | |
411 | 0 | for (int64_t path_id : path_ids) { |
412 | 0 | int64_t start_version = -1; |
413 | 0 | int64_t end_version = -1; |
414 | | // delete stale versions in version graph |
415 | 0 | auto version_path = _timestamped_version_tracker.fetch_and_delete_path_by_id(path_id); |
416 | 0 | for (auto& v_ts : version_path->timestamped_versions()) { |
417 | 0 | auto rs_it = _stale_rs_version_map.find(v_ts->version()); |
418 | 0 | if (rs_it != _stale_rs_version_map.end()) { |
419 | 0 | expired_rowsets.push_back(rs_it->second); |
420 | 0 | stale_rowsets.push_back(rs_it->second); |
421 | 0 | LOG(INFO) << "erase stale rowset, tablet_id=" << tablet_id() |
422 | 0 | << " rowset_id=" << rs_it->second->rowset_id().to_string() |
423 | 0 | << " version=" << rs_it->first.to_string(); |
424 | 0 | _stale_rs_version_map.erase(rs_it); |
425 | 0 | } else { |
426 | 0 | LOG(WARNING) << "cannot find stale rowset " << v_ts->version() << " in tablet " |
427 | 0 | << tablet_id(); |
428 | | // clang-format off |
429 | 0 | DCHECK(false) << [this, &wlock]() { wlock.unlock(); std::string json; get_compaction_status(&json); return json; }(); |
430 | | // clang-format on |
431 | 0 | } |
432 | 0 | if (start_version < 0) { |
433 | 0 | start_version = v_ts->version().first; |
434 | 0 | } |
435 | 0 | end_version = v_ts->version().second; |
436 | 0 | _tablet_meta->delete_stale_rs_meta_by_version(v_ts->version()); |
437 | 0 | } |
438 | 0 | Version version(start_version, end_version); |
439 | 0 | version_to_delete.emplace_back(version.to_string()); |
440 | 0 | } |
441 | 0 | _reconstruct_version_tracker_if_necessary(); |
442 | 0 | } |
443 | 0 | _tablet_meta->delete_bitmap().remove_stale_delete_bitmap_from_queue(version_to_delete); |
444 | 0 | recycle_cached_data(expired_rowsets); |
445 | 0 | if (config::enable_mow_verbose_log) { |
446 | 0 | LOG_INFO("finish delete_expired_stale_rowset for tablet={}", tablet_id()); |
447 | 0 | } |
448 | 0 | return expired_rowsets.size(); |
449 | 0 | } |
450 | | |
451 | 0 | void CloudTablet::update_base_size(const Rowset& rs) { |
452 | | // Define base rowset as the rowset of version [2-x] |
453 | 0 | if (rs.start_version() == 2) { |
454 | 0 | _base_size = rs.total_disk_size(); |
455 | 0 | } |
456 | 0 | } |
457 | | |
458 | 0 | void CloudTablet::clear_cache() { |
459 | 0 | CloudTablet::recycle_cached_data(get_snapshot_rowset(true)); |
460 | 0 | _engine.tablet_mgr().erase_tablet(tablet_id()); |
461 | 0 | } |
462 | | |
463 | 0 | void CloudTablet::recycle_cached_data(const std::vector<RowsetSharedPtr>& rowsets) { |
464 | 0 | for (const auto& rs : rowsets) { |
465 | | // rowsets and tablet._rs_version_map each hold a rowset shared_ptr, so at this point, the reference count of the shared_ptr is at least 2. |
466 | 0 | if (rs.use_count() > 2) { |
467 | 0 | LOG(WARNING) << "Rowset " << rs->rowset_id().to_string() << " has " << rs.use_count() |
468 | 0 | << " references. File Cache won't be recycled when query is using it."; |
469 | 0 | return; |
470 | 0 | } |
471 | 0 | rs->clear_cache(); |
472 | 0 | } |
473 | 0 | } |
474 | | |
475 | | void CloudTablet::reset_approximate_stats(int64_t num_rowsets, int64_t num_segments, |
476 | 0 | int64_t num_rows, int64_t data_size) { |
477 | 0 | _approximate_num_rowsets.store(num_rowsets, std::memory_order_relaxed); |
478 | 0 | _approximate_num_segments.store(num_segments, std::memory_order_relaxed); |
479 | 0 | _approximate_num_rows.store(num_rows, std::memory_order_relaxed); |
480 | 0 | _approximate_data_size.store(data_size, std::memory_order_relaxed); |
481 | 0 | int64_t cumu_num_deltas = 0; |
482 | 0 | int64_t cumu_num_rowsets = 0; |
483 | 0 | auto cp = _cumulative_point.load(std::memory_order_relaxed); |
484 | 0 | for (auto& [v, r] : _rs_version_map) { |
485 | 0 | if (v.second < cp) { |
486 | 0 | continue; |
487 | 0 | } |
488 | | |
489 | 0 | cumu_num_deltas += r->is_segments_overlapping() ? r->num_segments() : 1; |
490 | 0 | ++cumu_num_rowsets; |
491 | 0 | } |
492 | 0 | _approximate_cumu_num_rowsets.store(cumu_num_rowsets, std::memory_order_relaxed); |
493 | 0 | _approximate_cumu_num_deltas.store(cumu_num_deltas, std::memory_order_relaxed); |
494 | 0 | } |
495 | | |
496 | | Result<std::unique_ptr<RowsetWriter>> CloudTablet::create_rowset_writer( |
497 | 0 | RowsetWriterContext& context, bool vertical) { |
498 | 0 | context.rowset_id = _engine.next_rowset_id(); |
499 | | // FIXME(plat1ko): Seems `tablet_id` and `index_id` has been set repeatedly |
500 | 0 | context.tablet_id = tablet_id(); |
501 | 0 | context.index_id = index_id(); |
502 | 0 | context.partition_id = partition_id(); |
503 | 0 | context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write(); |
504 | 0 | return RowsetFactory::create_rowset_writer(_engine, context, vertical); |
505 | 0 | } |
506 | | |
507 | | // create a rowset writer with rowset_id and seg_id |
508 | | // after writer, merge this transient rowset with original rowset |
509 | | Result<std::unique_ptr<RowsetWriter>> CloudTablet::create_transient_rowset_writer( |
510 | | const Rowset& rowset, std::shared_ptr<PartialUpdateInfo> partial_update_info, |
511 | 0 | int64_t txn_expiration) { |
512 | 0 | if (rowset.rowset_meta_state() != RowsetStatePB::BEGIN_PARTIAL_UPDATE && |
513 | 0 | rowset.rowset_meta_state() != RowsetStatePB::COMMITTED) [[unlikely]] { |
514 | 0 | auto msg = fmt::format( |
515 | 0 | "wrong rowset state when create_transient_rowset_writer, rowset state should be " |
516 | 0 | "BEGIN_PARTIAL_UPDATE or COMMITTED, but found {}, rowset_id={}, tablet_id={}", |
517 | 0 | RowsetStatePB_Name(rowset.rowset_meta_state()), rowset.rowset_id().to_string(), |
518 | 0 | tablet_id()); |
519 | | // see `CloudRowsetWriter::build` for detail. |
520 | | // if this is in a retry task, the rowset state may have been changed to RowsetStatePB::COMMITTED |
521 | | // in `RowsetMeta::merge_rowset_meta()` in previous trials. |
522 | 0 | LOG(WARNING) << msg; |
523 | 0 | DCHECK(false) << msg; |
524 | 0 | } |
525 | 0 | RowsetWriterContext context; |
526 | 0 | context.rowset_state = PREPARED; |
527 | 0 | context.segments_overlap = OVERLAPPING; |
528 | | // During a partial update, the extracted columns of a variant should not be included in the tablet schema. |
529 | | // This is because the partial update for a variant needs to ignore the extracted columns. |
530 | | // Otherwise, the schema types in different rowsets might be inconsistent. When performing a partial update, |
531 | | // the complete variant is constructed by reading all the sub-columns of the variant. |
532 | 0 | context.tablet_schema = rowset.tablet_schema()->copy_without_variant_extracted_columns(); |
533 | 0 | context.newest_write_timestamp = UnixSeconds(); |
534 | 0 | context.tablet_id = table_id(); |
535 | 0 | context.enable_segcompaction = false; |
536 | 0 | context.write_type = DataWriteType::TYPE_DIRECT; |
537 | 0 | context.partial_update_info = std::move(partial_update_info); |
538 | 0 | context.is_transient_rowset_writer = true; |
539 | 0 | context.rowset_id = rowset.rowset_id(); |
540 | 0 | context.tablet_id = tablet_id(); |
541 | 0 | context.index_id = index_id(); |
542 | 0 | context.partition_id = partition_id(); |
543 | 0 | context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write(); |
544 | 0 | context.txn_expiration = txn_expiration; |
545 | |
|
546 | 0 | auto storage_resource = rowset.rowset_meta()->remote_storage_resource(); |
547 | 0 | if (!storage_resource) { |
548 | 0 | return ResultError(std::move(storage_resource.error())); |
549 | 0 | } |
550 | | |
551 | 0 | context.storage_resource = *storage_resource.value(); |
552 | |
|
553 | 0 | return RowsetFactory::create_rowset_writer(_engine, context, false) |
554 | 0 | .transform([&](auto&& writer) { |
555 | 0 | writer->set_segment_start_id(cast_set<int32_t>(rowset.num_segments())); |
556 | 0 | return writer; |
557 | 0 | }); |
558 | 0 | } |
559 | | |
560 | 0 | int64_t CloudTablet::get_cloud_base_compaction_score() const { |
561 | 0 | if (_tablet_meta->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) { |
562 | 0 | bool has_delete = false; |
563 | 0 | int64_t point = cumulative_layer_point(); |
564 | 0 | std::shared_lock<std::shared_mutex> rlock(_meta_lock); |
565 | 0 | for (const auto& rs_meta : _tablet_meta->all_rs_metas()) { |
566 | 0 | if (rs_meta->start_version() >= point) { |
567 | 0 | continue; |
568 | 0 | } |
569 | 0 | if (rs_meta->has_delete_predicate()) { |
570 | 0 | has_delete = true; |
571 | 0 | break; |
572 | 0 | } |
573 | 0 | } |
574 | 0 | if (!has_delete) { |
575 | 0 | return 0; |
576 | 0 | } |
577 | 0 | } |
578 | | |
579 | 0 | return _approximate_num_rowsets.load(std::memory_order_relaxed) - |
580 | 0 | _approximate_cumu_num_rowsets.load(std::memory_order_relaxed); |
581 | 0 | } |
582 | | |
583 | 0 | int64_t CloudTablet::get_cloud_cumu_compaction_score() const { |
584 | | // TODO(plat1ko): Propose an algorithm that considers tablet's key type, number of delete rowsets, |
585 | | // number of tablet versions simultaneously. |
586 | 0 | return _approximate_cumu_num_deltas.load(std::memory_order_relaxed); |
587 | 0 | } |
588 | | |
589 | | // return a json string to show the compaction status of this tablet |
590 | 0 | void CloudTablet::get_compaction_status(std::string* json_result) { |
591 | 0 | rapidjson::Document root; |
592 | 0 | root.SetObject(); |
593 | |
|
594 | 0 | rapidjson::Document path_arr; |
595 | 0 | path_arr.SetArray(); |
596 | |
|
597 | 0 | std::vector<RowsetSharedPtr> rowsets; |
598 | 0 | std::vector<RowsetSharedPtr> stale_rowsets; |
599 | 0 | { |
600 | 0 | std::shared_lock rdlock(_meta_lock); |
601 | 0 | rowsets.reserve(_rs_version_map.size()); |
602 | 0 | for (auto& it : _rs_version_map) { |
603 | 0 | rowsets.push_back(it.second); |
604 | 0 | } |
605 | 0 | stale_rowsets.reserve(_stale_rs_version_map.size()); |
606 | 0 | for (auto& it : _stale_rs_version_map) { |
607 | 0 | stale_rowsets.push_back(it.second); |
608 | 0 | } |
609 | 0 | } |
610 | 0 | std::sort(rowsets.begin(), rowsets.end(), Rowset::comparator); |
611 | 0 | std::sort(stale_rowsets.begin(), stale_rowsets.end(), Rowset::comparator); |
612 | | |
613 | | // get snapshot version path json_doc |
614 | 0 | _timestamped_version_tracker.get_stale_version_path_json_doc(path_arr); |
615 | 0 | root.AddMember("cumulative point", _cumulative_point.load(), root.GetAllocator()); |
616 | | |
617 | | // print all rowsets' version as an array |
618 | 0 | rapidjson::Document versions_arr; |
619 | 0 | rapidjson::Document missing_versions_arr; |
620 | 0 | versions_arr.SetArray(); |
621 | 0 | missing_versions_arr.SetArray(); |
622 | 0 | int64_t last_version = -1; |
623 | 0 | for (auto& rowset : rowsets) { |
624 | 0 | const Version& ver = rowset->version(); |
625 | 0 | if (ver.first != last_version + 1) { |
626 | 0 | rapidjson::Value miss_value; |
627 | 0 | miss_value.SetString(fmt::format("[{}-{}]", last_version + 1, ver.first - 1).c_str(), |
628 | 0 | missing_versions_arr.GetAllocator()); |
629 | 0 | missing_versions_arr.PushBack(miss_value, missing_versions_arr.GetAllocator()); |
630 | 0 | } |
631 | 0 | rapidjson::Value value; |
632 | 0 | std::string version_str = rowset->get_rowset_info_str(); |
633 | 0 | value.SetString(version_str.c_str(), cast_set<uint32_t>(version_str.length()), |
634 | 0 | versions_arr.GetAllocator()); |
635 | 0 | versions_arr.PushBack(value, versions_arr.GetAllocator()); |
636 | 0 | last_version = ver.second; |
637 | 0 | } |
638 | 0 | root.AddMember("rowsets", versions_arr, root.GetAllocator()); |
639 | 0 | root.AddMember("missing_rowsets", missing_versions_arr, root.GetAllocator()); |
640 | | |
641 | | // print all stale rowsets' version as an array |
642 | 0 | rapidjson::Document stale_versions_arr; |
643 | 0 | stale_versions_arr.SetArray(); |
644 | 0 | for (auto& rowset : stale_rowsets) { |
645 | 0 | rapidjson::Value value; |
646 | 0 | std::string version_str = rowset->get_rowset_info_str(); |
647 | 0 | value.SetString(version_str.c_str(), cast_set<uint32_t>(version_str.length()), |
648 | 0 | stale_versions_arr.GetAllocator()); |
649 | 0 | stale_versions_arr.PushBack(value, stale_versions_arr.GetAllocator()); |
650 | 0 | } |
651 | 0 | root.AddMember("stale_rowsets", stale_versions_arr, root.GetAllocator()); |
652 | | |
653 | | // add stale version rowsets |
654 | 0 | root.AddMember("stale version path", path_arr, root.GetAllocator()); |
655 | | |
656 | | // to json string |
657 | 0 | rapidjson::StringBuffer strbuf; |
658 | 0 | rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(strbuf); |
659 | 0 | root.Accept(writer); |
660 | 0 | *json_result = std::string(strbuf.GetString()); |
661 | 0 | } |
662 | | |
663 | 0 | void CloudTablet::set_cumulative_layer_point(int64_t new_point) { |
664 | 0 | if (new_point == Tablet::K_INVALID_CUMULATIVE_POINT || new_point >= _cumulative_point) { |
665 | 0 | _cumulative_point = new_point; |
666 | 0 | return; |
667 | 0 | } |
668 | | // cumulative point should only be reset to -1, or be increased |
669 | | // FIXME: could happen in currently unresolved race conditions |
670 | 0 | LOG(WARNING) << "Unexpected cumulative point: " << new_point |
671 | 0 | << ", origin: " << _cumulative_point.load(); |
672 | 0 | } |
673 | | |
674 | 0 | std::vector<RowsetSharedPtr> CloudTablet::pick_candidate_rowsets_to_base_compaction() { |
675 | 0 | std::vector<RowsetSharedPtr> candidate_rowsets; |
676 | 0 | { |
677 | 0 | std::shared_lock rlock(_meta_lock); |
678 | 0 | for (const auto& [version, rs] : _rs_version_map) { |
679 | 0 | if (version.first != 0 && version.first < _cumulative_point && |
680 | 0 | (_alter_version == -1 || version.second <= _alter_version)) { |
681 | 0 | candidate_rowsets.push_back(rs); |
682 | 0 | } |
683 | 0 | } |
684 | 0 | } |
685 | 0 | std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), Rowset::comparator); |
686 | 0 | return candidate_rowsets; |
687 | 0 | } |
688 | | |
689 | 0 | std::vector<RowsetSharedPtr> CloudTablet::pick_candidate_rowsets_to_full_compaction() { |
690 | 0 | std::vector<RowsetSharedPtr> candidate_rowsets; |
691 | 0 | { |
692 | 0 | std::shared_lock rlock(_meta_lock); |
693 | 0 | for (auto& [v, rs] : _rs_version_map) { |
694 | | // MUST NOT compact rowset [0-1] for some historical reasons (see cloud_schema_change) |
695 | 0 | if (v.first != 0) { |
696 | 0 | candidate_rowsets.push_back(rs); |
697 | 0 | } |
698 | 0 | } |
699 | 0 | } |
700 | 0 | std::sort(candidate_rowsets.begin(), candidate_rowsets.end(), Rowset::comparator); |
701 | 0 | return candidate_rowsets; |
702 | 0 | } |
703 | | |
704 | 0 | CalcDeleteBitmapExecutor* CloudTablet::calc_delete_bitmap_executor() { |
705 | 0 | return _engine.calc_delete_bitmap_executor(); |
706 | 0 | } |
707 | | |
708 | | Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t txn_id, |
709 | | DeleteBitmapPtr delete_bitmap, RowsetWriter* rowset_writer, |
710 | | const RowsetIdUnorderedSet& cur_rowset_ids, int64_t lock_id, |
711 | 0 | int64_t next_visible_version) { |
712 | 0 | RowsetSharedPtr rowset = txn_info->rowset; |
713 | 0 | int64_t cur_version = rowset->start_version(); |
714 | | // update delete bitmap info, in order to avoid recalculation when trying again |
715 | 0 | RETURN_IF_ERROR(_engine.txn_delete_bitmap_cache().update_tablet_txn_info( |
716 | 0 | txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, PublishStatus::PREPARE)); |
717 | | |
718 | 0 | if (txn_info->partial_update_info && txn_info->partial_update_info->is_partial_update() && |
719 | 0 | rowset_writer->num_rows() > 0) { |
720 | 0 | DBUG_EXECUTE_IF("CloudTablet::save_delete_bitmap.update_tmp_rowset.error", { |
721 | 0 | return Status::InternalError<false>("injected update_tmp_rowset error."); |
722 | 0 | }); |
723 | 0 | const auto& rowset_meta = rowset->rowset_meta(); |
724 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().update_tmp_rowset(*rowset_meta)); |
725 | 0 | } |
726 | | |
727 | 0 | RETURN_IF_ERROR(save_delete_bitmap_to_ms(cur_version, txn_id, delete_bitmap, lock_id, |
728 | 0 | next_visible_version)); |
729 | | |
730 | | // store the delete bitmap with sentinel marks in txn_delete_bitmap_cache because if the txn is retried for some reason, |
731 | | // it will use the delete bitmap from txn_delete_bitmap_cache when re-calculating the delete bitmap, during which it will do |
732 | | // delete bitmap correctness check. If we store the new_delete_bitmap, the delete bitmap correctness check will fail |
733 | 0 | RETURN_IF_ERROR(_engine.txn_delete_bitmap_cache().update_tablet_txn_info( |
734 | 0 | txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, PublishStatus::SUCCEED, |
735 | 0 | txn_info->publish_info)); |
736 | | |
737 | 0 | DBUG_EXECUTE_IF("CloudTablet::save_delete_bitmap.enable_sleep", { |
738 | 0 | auto sleep_sec = dp->param<int>("sleep", 5); |
739 | 0 | std::this_thread::sleep_for(std::chrono::seconds(sleep_sec)); |
740 | 0 | }); |
741 | |
|
742 | 0 | DBUG_EXECUTE_IF("CloudTablet::save_delete_bitmap.injected_error", { |
743 | 0 | auto retry = dp->param<bool>("retry", false); |
744 | 0 | auto sleep_sec = dp->param<int>("sleep", 0); |
745 | 0 | std::this_thread::sleep_for(std::chrono::seconds(sleep_sec)); |
746 | 0 | if (retry) { // return DELETE_BITMAP_LOCK_ERROR to let it retry |
747 | 0 | return Status::Error<ErrorCode::DELETE_BITMAP_LOCK_ERROR>( |
748 | 0 | "injected DELETE_BITMAP_LOCK_ERROR"); |
749 | 0 | } else { |
750 | 0 | return Status::InternalError<false>("injected non-retryable error"); |
751 | 0 | } |
752 | 0 | }); |
753 | |
|
754 | 0 | return Status::OK(); |
755 | 0 | } |
756 | | |
757 | | Status CloudTablet::save_delete_bitmap_to_ms(int64_t cur_version, int64_t txn_id, |
758 | | DeleteBitmapPtr delete_bitmap, int64_t lock_id, |
759 | 0 | int64_t next_visible_version) { |
760 | 0 | DeleteBitmapPtr new_delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id()); |
761 | 0 | for (auto iter = delete_bitmap->delete_bitmap.begin(); |
762 | 0 | iter != delete_bitmap->delete_bitmap.end(); ++iter) { |
763 | | // skip sentinel mark, which is used for delete bitmap correctness check |
764 | 0 | if (std::get<1>(iter->first) != DeleteBitmap::INVALID_SEGMENT_ID) { |
765 | 0 | new_delete_bitmap->merge( |
766 | 0 | {std::get<0>(iter->first), std::get<1>(iter->first), cur_version}, |
767 | 0 | iter->second); |
768 | 0 | } |
769 | 0 | } |
770 | | // lock_id != -1 means this is in an explict txn |
771 | 0 | bool is_explicit_txn = (lock_id != -1); |
772 | 0 | auto ms_lock_id = !is_explicit_txn ? txn_id : lock_id; |
773 | |
|
774 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().update_delete_bitmap(*this, ms_lock_id, LOAD_INITIATOR_ID, |
775 | 0 | new_delete_bitmap.get(), txn_id, |
776 | 0 | is_explicit_txn, next_visible_version)); |
777 | 0 | return Status::OK(); |
778 | 0 | } |
779 | | |
780 | 0 | Versions CloudTablet::calc_missed_versions(int64_t spec_version, Versions existing_versions) const { |
781 | 0 | DCHECK(spec_version > 0) << "invalid spec_version: " << spec_version; |
782 | | |
783 | | // sort the existing versions in ascending order |
784 | 0 | std::sort(existing_versions.begin(), existing_versions.end(), |
785 | 0 | [](const Version& a, const Version& b) { |
786 | | // simple because 2 versions are certainly not overlapping |
787 | 0 | return a.first < b.first; |
788 | 0 | }); |
789 | | |
790 | | // From the first version(=0), find the missing version until spec_version |
791 | 0 | int64_t last_version = -1; |
792 | 0 | Versions missed_versions; |
793 | 0 | for (const Version& version : existing_versions) { |
794 | 0 | if (version.first > last_version + 1) { |
795 | | // there is a hole between versions |
796 | 0 | missed_versions.emplace_back(last_version + 1, std::min(version.first, spec_version)); |
797 | 0 | } |
798 | 0 | last_version = version.second; |
799 | 0 | if (last_version >= spec_version) { |
800 | 0 | break; |
801 | 0 | } |
802 | 0 | } |
803 | 0 | if (last_version < spec_version) { |
804 | | // there is a hole between the last version and the specificed version. |
805 | 0 | missed_versions.emplace_back(last_version + 1, spec_version); |
806 | 0 | } |
807 | 0 | return missed_versions; |
808 | 0 | } |
809 | | |
810 | | Status CloudTablet::calc_delete_bitmap_for_compaction( |
811 | | const std::vector<RowsetSharedPtr>& input_rowsets, const RowsetSharedPtr& output_rowset, |
812 | | const RowIdConversion& rowid_conversion, ReaderType compaction_type, int64_t merged_rows, |
813 | | int64_t filtered_rows, int64_t initiator, DeleteBitmapPtr& output_rowset_delete_bitmap, |
814 | 0 | bool allow_delete_in_cumu_compaction) { |
815 | 0 | output_rowset_delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id()); |
816 | 0 | std::unique_ptr<RowLocationSet> missed_rows; |
817 | 0 | if ((config::enable_missing_rows_correctness_check || |
818 | 0 | config::enable_mow_compaction_correctness_check_core) && |
819 | 0 | !allow_delete_in_cumu_compaction && |
820 | 0 | compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION) { |
821 | 0 | missed_rows = std::make_unique<RowLocationSet>(); |
822 | 0 | LOG(INFO) << "RowLocation Set inited succ for tablet:" << tablet_id(); |
823 | 0 | } |
824 | |
|
825 | 0 | std::unique_ptr<std::map<RowsetSharedPtr, RowLocationPairList>> location_map; |
826 | 0 | if (config::enable_rowid_conversion_correctness_check && |
827 | 0 | tablet_schema()->cluster_key_uids().empty()) { |
828 | 0 | location_map = std::make_unique<std::map<RowsetSharedPtr, RowLocationPairList>>(); |
829 | 0 | LOG(INFO) << "Location Map inited succ for tablet:" << tablet_id(); |
830 | 0 | } |
831 | | |
832 | | // 1. calc delete bitmap for historical data |
833 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this)); |
834 | 0 | Version version = max_version(); |
835 | 0 | std::size_t missed_rows_size = 0; |
836 | 0 | calc_compaction_output_rowset_delete_bitmap( |
837 | 0 | input_rowsets, rowid_conversion, 0, version.second + 1, missed_rows.get(), |
838 | 0 | location_map.get(), tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); |
839 | 0 | if (missed_rows) { |
840 | 0 | missed_rows_size = missed_rows->size(); |
841 | 0 | if (!allow_delete_in_cumu_compaction) { |
842 | 0 | if (compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION && |
843 | 0 | tablet_state() == TABLET_RUNNING) { |
844 | 0 | if (merged_rows + filtered_rows >= 0 && |
845 | 0 | merged_rows + filtered_rows != missed_rows_size) { |
846 | 0 | std::string err_msg = fmt::format( |
847 | 0 | "cumulative compaction: the merged rows({}), the filtered rows({}) is " |
848 | 0 | "not equal to missed rows({}) in rowid conversion, tablet_id: {}, " |
849 | 0 | "table_id:{}", |
850 | 0 | merged_rows, filtered_rows, missed_rows_size, tablet_id(), table_id()); |
851 | 0 | if (config::enable_mow_compaction_correctness_check_core) { |
852 | 0 | CHECK(false) << err_msg; |
853 | 0 | } else { |
854 | 0 | DCHECK(false) << err_msg; |
855 | 0 | } |
856 | 0 | LOG(WARNING) << err_msg; |
857 | 0 | } |
858 | 0 | } |
859 | 0 | } |
860 | 0 | } |
861 | 0 | if (location_map) { |
862 | 0 | RETURN_IF_ERROR(check_rowid_conversion(output_rowset, *location_map)); |
863 | 0 | location_map->clear(); |
864 | 0 | } |
865 | | |
866 | | // 2. calc delete bitmap for incremental data |
867 | 0 | int64_t t1 = MonotonicMicros(); |
868 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().get_delete_bitmap_update_lock( |
869 | 0 | *this, COMPACTION_DELETE_BITMAP_LOCK_ID, initiator)); |
870 | 0 | int64_t t2 = MonotonicMicros(); |
871 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this)); |
872 | 0 | int64_t t3 = MonotonicMicros(); |
873 | |
|
874 | 0 | calc_compaction_output_rowset_delete_bitmap( |
875 | 0 | input_rowsets, rowid_conversion, version.second, UINT64_MAX, missed_rows.get(), |
876 | 0 | location_map.get(), tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); |
877 | 0 | int64_t t4 = MonotonicMicros(); |
878 | 0 | if (location_map) { |
879 | 0 | RETURN_IF_ERROR(check_rowid_conversion(output_rowset, *location_map)); |
880 | 0 | } |
881 | 0 | int64_t t5 = MonotonicMicros(); |
882 | 0 | if (missed_rows) { |
883 | 0 | DCHECK_EQ(missed_rows->size(), missed_rows_size); |
884 | 0 | if (missed_rows->size() != missed_rows_size) { |
885 | 0 | LOG(WARNING) << "missed rows don't match, before: " << missed_rows_size |
886 | 0 | << " after: " << missed_rows->size(); |
887 | 0 | } |
888 | 0 | } |
889 | | |
890 | | // 3. store delete bitmap |
891 | 0 | auto st = _engine.meta_mgr().update_delete_bitmap(*this, -1, initiator, |
892 | 0 | output_rowset_delete_bitmap.get()); |
893 | 0 | int64_t t6 = MonotonicMicros(); |
894 | 0 | LOG(INFO) << "calc_delete_bitmap_for_compaction, tablet_id=" << tablet_id() |
895 | 0 | << ", get lock cost " << (t2 - t1) << " us, sync rowsets cost " << (t3 - t2) |
896 | 0 | << " us, calc delete bitmap cost " << (t4 - t3) << " us, check rowid conversion cost " |
897 | 0 | << (t5 - t4) << " us, store delete bitmap cost " << (t6 - t5) |
898 | 0 | << " us, st=" << st.to_string(); |
899 | 0 | return st; |
900 | 0 | } |
901 | | |
902 | 0 | Status CloudTablet::sync_meta() { |
903 | 0 | if (!config::enable_file_cache) { |
904 | 0 | return Status::OK(); |
905 | 0 | } |
906 | | |
907 | 0 | TabletMetaSharedPtr tablet_meta; |
908 | 0 | auto st = _engine.meta_mgr().get_tablet_meta(tablet_id(), &tablet_meta); |
909 | 0 | if (!st.ok()) { |
910 | 0 | if (st.is<ErrorCode::NOT_FOUND>()) { |
911 | 0 | clear_cache(); |
912 | 0 | } |
913 | 0 | return st; |
914 | 0 | } |
915 | | |
916 | 0 | auto new_ttl_seconds = tablet_meta->ttl_seconds(); |
917 | 0 | if (_tablet_meta->ttl_seconds() != new_ttl_seconds) { |
918 | 0 | _tablet_meta->set_ttl_seconds(new_ttl_seconds); |
919 | 0 | int64_t cur_time = UnixSeconds(); |
920 | 0 | std::shared_lock rlock(_meta_lock); |
921 | 0 | for (auto& [_, rs] : _rs_version_map) { |
922 | 0 | for (int seg_id = 0; seg_id < rs->num_segments(); ++seg_id) { |
923 | 0 | int64_t new_expiration_time = |
924 | 0 | new_ttl_seconds + rs->rowset_meta()->newest_write_timestamp(); |
925 | 0 | new_expiration_time = new_expiration_time > cur_time ? new_expiration_time : 0; |
926 | 0 | auto file_key = Segment::file_cache_key(rs->rowset_id().to_string(), seg_id); |
927 | 0 | auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key); |
928 | 0 | file_cache->modify_expiration_time(file_key, new_expiration_time); |
929 | 0 | } |
930 | 0 | } |
931 | 0 | } |
932 | |
|
933 | 0 | auto new_compaction_policy = tablet_meta->compaction_policy(); |
934 | 0 | if (_tablet_meta->compaction_policy() != new_compaction_policy) { |
935 | 0 | _tablet_meta->set_compaction_policy(new_compaction_policy); |
936 | 0 | } |
937 | 0 | auto new_time_series_compaction_goal_size_mbytes = |
938 | 0 | tablet_meta->time_series_compaction_goal_size_mbytes(); |
939 | 0 | if (_tablet_meta->time_series_compaction_goal_size_mbytes() != |
940 | 0 | new_time_series_compaction_goal_size_mbytes) { |
941 | 0 | _tablet_meta->set_time_series_compaction_goal_size_mbytes( |
942 | 0 | new_time_series_compaction_goal_size_mbytes); |
943 | 0 | } |
944 | 0 | auto new_time_series_compaction_file_count_threshold = |
945 | 0 | tablet_meta->time_series_compaction_file_count_threshold(); |
946 | 0 | if (_tablet_meta->time_series_compaction_file_count_threshold() != |
947 | 0 | new_time_series_compaction_file_count_threshold) { |
948 | 0 | _tablet_meta->set_time_series_compaction_file_count_threshold( |
949 | 0 | new_time_series_compaction_file_count_threshold); |
950 | 0 | } |
951 | 0 | auto new_time_series_compaction_time_threshold_seconds = |
952 | 0 | tablet_meta->time_series_compaction_time_threshold_seconds(); |
953 | 0 | if (_tablet_meta->time_series_compaction_time_threshold_seconds() != |
954 | 0 | new_time_series_compaction_time_threshold_seconds) { |
955 | 0 | _tablet_meta->set_time_series_compaction_time_threshold_seconds( |
956 | 0 | new_time_series_compaction_time_threshold_seconds); |
957 | 0 | } |
958 | 0 | auto new_time_series_compaction_empty_rowsets_threshold = |
959 | 0 | tablet_meta->time_series_compaction_empty_rowsets_threshold(); |
960 | 0 | if (_tablet_meta->time_series_compaction_empty_rowsets_threshold() != |
961 | 0 | new_time_series_compaction_empty_rowsets_threshold) { |
962 | 0 | _tablet_meta->set_time_series_compaction_empty_rowsets_threshold( |
963 | 0 | new_time_series_compaction_empty_rowsets_threshold); |
964 | 0 | } |
965 | 0 | auto new_time_series_compaction_level_threshold = |
966 | 0 | tablet_meta->time_series_compaction_level_threshold(); |
967 | 0 | if (_tablet_meta->time_series_compaction_level_threshold() != |
968 | 0 | new_time_series_compaction_level_threshold) { |
969 | 0 | _tablet_meta->set_time_series_compaction_level_threshold( |
970 | 0 | new_time_series_compaction_level_threshold); |
971 | 0 | } |
972 | |
|
973 | 0 | return Status::OK(); |
974 | 0 | } |
975 | | |
976 | 0 | void CloudTablet::build_tablet_report_info(TTabletInfo* tablet_info) { |
977 | 0 | std::shared_lock rdlock(_meta_lock); |
978 | 0 | tablet_info->__set_total_version_count(_tablet_meta->version_count()); |
979 | 0 | tablet_info->__set_tablet_id(_tablet_meta->tablet_id()); |
980 | | // Currently, this information will not be used by the cloud report, |
981 | | // but it may be used in the future. |
982 | 0 | } |
983 | | |
984 | | Status CloudTablet::check_delete_bitmap_cache(int64_t txn_id, |
985 | 0 | DeleteBitmap* expected_delete_bitmap) { |
986 | 0 | DeleteBitmapPtr cached_delete_bitmap; |
987 | 0 | CloudStorageEngine& engine = ExecEnv::GetInstance()->storage_engine().to_cloud(); |
988 | 0 | Status st = engine.txn_delete_bitmap_cache().get_delete_bitmap( |
989 | 0 | txn_id, tablet_id(), &cached_delete_bitmap, nullptr, nullptr); |
990 | 0 | if (st.ok()) { |
991 | 0 | bool res = (expected_delete_bitmap->cardinality() == cached_delete_bitmap->cardinality()); |
992 | 0 | auto msg = fmt::format( |
993 | 0 | "delete bitmap cache check failed, cur_cardinality={}, cached_cardinality={}" |
994 | 0 | "txn_id={}, tablet_id={}", |
995 | 0 | expected_delete_bitmap->cardinality(), cached_delete_bitmap->cardinality(), txn_id, |
996 | 0 | tablet_id()); |
997 | 0 | if (!res) { |
998 | 0 | DCHECK(res) << msg; |
999 | 0 | return Status::InternalError<false>(msg); |
1000 | 0 | } |
1001 | 0 | } |
1002 | 0 | return Status::OK(); |
1003 | 0 | } |
1004 | | |
1005 | | #include "common/compile_check_end.h" |
1006 | | } // namespace doris |