be/src/storage/snapshot/snapshot_manager.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/snapshot/snapshot_manager.h" |
19 | | |
20 | | #include <fmt/format.h> |
21 | | #include <gen_cpp/AgentService_types.h> |
22 | | #include <gen_cpp/Types_constants.h> |
23 | | #include <gen_cpp/olap_file.pb.h> |
24 | | #include <thrift/protocol/TDebugProtocol.h> |
25 | | |
26 | | #include <algorithm> |
27 | | #include <atomic> |
28 | | #include <ctime> |
29 | | #include <filesystem> |
30 | | #include <list> |
31 | | #include <map> |
32 | | #include <new> |
33 | | #include <ostream> |
34 | | #include <set> |
35 | | #include <shared_mutex> |
36 | | #include <unordered_map> |
37 | | #include <utility> |
38 | | |
39 | | #include "common/config.h" |
40 | | #include "common/logging.h" |
41 | | #include "common/status.h" |
42 | | #include "io/fs/local_file_system.h" |
43 | | #include "runtime/memory/mem_tracker_limiter.h" |
44 | | #include "runtime/thread_context.h" |
45 | | #include "storage/data_dir.h" |
46 | | #include "storage/olap_common.h" |
47 | | #include "storage/olap_define.h" |
48 | | #include "storage/pb_helper.h" |
49 | | #include "storage/rowset/rowset.h" |
50 | | #include "storage/rowset/rowset_factory.h" |
51 | | #include "storage/rowset/rowset_meta.h" |
52 | | #include "storage/rowset/rowset_writer.h" |
53 | | #include "storage/rowset/rowset_writer_context.h" |
54 | | #include "storage/storage_engine.h" |
55 | | #include "storage/tablet/tablet.h" |
56 | | #include "storage/tablet/tablet_manager.h" |
57 | | #include "storage/tablet/tablet_meta.h" |
58 | | #include "storage/tablet/tablet_schema.h" |
59 | | #include "storage/tablet/tablet_schema_cache.h" |
60 | | #include "storage/utils.h" |
61 | | #include "util/uid_util.h" |
62 | | |
63 | | using std::nothrow; |
64 | | using std::string; |
65 | | using std::stringstream; |
66 | | using std::vector; |
67 | | |
68 | | namespace doris { |
69 | | using namespace ErrorCode; |
70 | | |
71 | 4 | LocalSnapshotLockGuard LocalSnapshotLock::acquire(const std::string& path) { |
72 | 4 | std::unique_lock<std::mutex> l(_lock); |
73 | 4 | auto& ctx = _local_snapshot_contexts[path]; |
74 | 4 | while (ctx._is_locked) { |
75 | 0 | ctx._waiting_count++; |
76 | 0 | ctx._cv.wait(l); |
77 | 0 | ctx._waiting_count--; |
78 | 0 | } |
79 | | |
80 | 4 | ctx._is_locked = true; |
81 | 4 | return {path}; |
82 | 4 | } |
83 | | |
84 | 4 | void LocalSnapshotLock::release(const std::string& path) { |
85 | 4 | std::lock_guard<std::mutex> l(_lock); |
86 | 4 | auto iter = _local_snapshot_contexts.find(path); |
87 | 4 | if (iter == _local_snapshot_contexts.end()) { |
88 | 0 | return; |
89 | 0 | } |
90 | | |
91 | 4 | auto& ctx = iter->second; |
92 | 4 | ctx._is_locked = false; |
93 | 4 | if (ctx._waiting_count > 0) { |
94 | 0 | ctx._cv.notify_one(); |
95 | 4 | } else { |
96 | 4 | _local_snapshot_contexts.erase(iter); |
97 | 4 | } |
98 | 4 | } |
99 | | |
100 | 377 | SnapshotManager::SnapshotManager(StorageEngine& engine) : _engine(engine) { |
101 | 377 | _mem_tracker = |
102 | 377 | MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::OTHER, "SnapshotManager"); |
103 | 377 | } |
104 | | |
105 | 377 | SnapshotManager::~SnapshotManager() = default; |
106 | | |
107 | | Status SnapshotManager::make_snapshot(const TSnapshotRequest& request, string* snapshot_path, |
108 | 3 | bool* allow_incremental_clone) { |
109 | 3 | SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_mem_tracker); |
110 | 3 | Status res = Status::OK(); |
111 | 3 | if (snapshot_path == nullptr) { |
112 | 1 | return Status::Error<INVALID_ARGUMENT>("output parameter cannot be null"); |
113 | 1 | } |
114 | | |
115 | 2 | TabletSharedPtr target_tablet = _engine.tablet_manager()->get_tablet(request.tablet_id); |
116 | | |
117 | 2 | DBUG_EXECUTE_IF("SnapshotManager::make_snapshot.inject_failure", { target_tablet = nullptr; }) |
118 | | |
119 | 2 | if (target_tablet == nullptr) { |
120 | 0 | return Status::Error<TABLE_NOT_FOUND>("failed to get tablet. tablet={}", request.tablet_id); |
121 | 0 | } |
122 | | |
123 | 2 | TabletSharedPtr ref_tablet = target_tablet; |
124 | 2 | if (request.__isset.ref_tablet_id) { |
125 | 0 | int64_t ref_tablet_id = request.ref_tablet_id; |
126 | 0 | TabletSharedPtr base_tablet = _engine.tablet_manager()->get_tablet(ref_tablet_id); |
127 | | |
128 | | // Some tasks, like medium migration, cause the target tablet and base tablet to stay on |
129 | | // different disks. In this case, we fall through to the normal restore path. |
130 | | // |
131 | | // Otherwise, we can directly link the rowset files from the base tablet to the target tablet. |
132 | 0 | if (base_tablet != nullptr && |
133 | 0 | base_tablet->data_dir()->path() == target_tablet->data_dir()->path()) { |
134 | 0 | ref_tablet = std::move(base_tablet); |
135 | 0 | } |
136 | 0 | } |
137 | | |
138 | 2 | res = _create_snapshot_files(ref_tablet, target_tablet, request, snapshot_path, |
139 | 2 | allow_incremental_clone); |
140 | | |
141 | 2 | if (!res.ok()) { |
142 | 0 | LOG(WARNING) << "failed to make snapshot. res=" << res << " tablet=" << request.tablet_id; |
143 | 0 | return res; |
144 | 0 | } |
145 | | |
146 | 2 | LOG(INFO) << "success to make snapshot. path=['" << *snapshot_path << "']"; |
147 | 2 | return res; |
148 | 2 | } |
149 | | |
150 | 1 | Status SnapshotManager::release_snapshot(const string& snapshot_path) { |
151 | 1 | auto local_snapshot_guard = LocalSnapshotLock::instance().acquire(snapshot_path); |
152 | | |
153 | | // If the requested snapshot_path is located in the root/snapshot folder, it is considered legal and can be deleted. |
154 | | // Otherwise, it is considered an illegal request and returns an error result. |
155 | 1 | SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_mem_tracker); |
156 | 1 | auto stores = _engine.get_stores(); |
157 | 1 | for (auto* store : stores) { |
158 | 0 | std::string abs_path; |
159 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(store->path(), &abs_path)); |
160 | 0 | if (snapshot_path.compare(0, abs_path.size(), abs_path) == 0 && |
161 | 0 | snapshot_path.compare(abs_path.size() + 1, SNAPSHOT_PREFIX.size(), SNAPSHOT_PREFIX) == |
162 | 0 | 0) { |
163 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(snapshot_path)); |
164 | 0 | LOG(INFO) << "success to release snapshot path. [path='" << snapshot_path << "']"; |
165 | 0 | return Status::OK(); |
166 | 0 | } |
167 | 0 | } |
168 | | |
169 | 1 | return Status::Error<CE_CMD_PARAMS_ERROR>("released snapshot path illegal. [path='{}']", |
170 | 1 | snapshot_path); |
171 | 1 | } |
172 | | |
173 | | Result<std::vector<PendingRowsetGuard>> SnapshotManager::convert_rowset_ids( |
174 | | const std::string& clone_dir, int64_t tablet_id, int64_t replica_id, int64_t table_id, |
175 | 7 | int64_t partition_id, int32_t schema_hash) { |
176 | 7 | SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_mem_tracker); |
177 | 7 | std::vector<PendingRowsetGuard> guards; |
178 | | // check clone dir existed |
179 | 7 | bool exists = true; |
180 | 7 | RETURN_IF_ERROR_RESULT(io::global_local_filesystem()->exists(clone_dir, &exists)); |
181 | 7 | if (!exists) { |
182 | 1 | return unexpected(Status::Error<DIR_NOT_EXIST>( |
183 | 1 | "clone dir not existed when convert rowsetids. clone_dir={}", clone_dir)); |
184 | 1 | } |
185 | | |
186 | 6 | TabletSharedPtr target_tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
187 | 6 | TabletSchemaSPtr target_tablet_schema = nullptr; |
188 | 6 | if (target_tablet != nullptr) { |
189 | 5 | target_tablet_schema = std::make_shared<TabletSchema>(); |
190 | 5 | target_tablet_schema->copy_from(*target_tablet->tablet_schema()); |
191 | 5 | } |
192 | | |
193 | | // load original tablet meta |
194 | 6 | auto cloned_meta_file = fmt::format("{}/{}.hdr", clone_dir, tablet_id); |
195 | 6 | TabletMetaPB cloned_tablet_meta_pb; |
196 | 6 | RETURN_IF_ERROR_RESULT(TabletMeta::load_from_file(cloned_meta_file, &cloned_tablet_meta_pb)); |
197 | | |
198 | 6 | TabletMetaPB new_tablet_meta_pb; |
199 | 6 | new_tablet_meta_pb = cloned_tablet_meta_pb; |
200 | 6 | new_tablet_meta_pb.clear_rs_metas(); |
201 | | // inc_rs_meta is deprecated since 0.13. |
202 | | // keep this just for safety |
203 | 6 | new_tablet_meta_pb.clear_inc_rs_metas(); |
204 | 6 | new_tablet_meta_pb.clear_stale_rs_metas(); |
205 | 6 | new_tablet_meta_pb.clear_row_binlog_rs_metas(); |
206 | | |
207 | | // should modify tablet id and schema hash because in restore process the tablet id is not |
208 | | // equal to tablet id in meta |
209 | 6 | new_tablet_meta_pb.set_tablet_id(tablet_id); |
210 | 6 | *new_tablet_meta_pb.mutable_tablet_uid() = TabletUid::gen_uid().to_proto(); |
211 | 6 | new_tablet_meta_pb.set_replica_id(replica_id); |
212 | 6 | if (table_id > 0) { |
213 | 2 | new_tablet_meta_pb.set_table_id(table_id); |
214 | 2 | } |
215 | 6 | if (partition_id != -1) { |
216 | 6 | new_tablet_meta_pb.set_partition_id(partition_id); |
217 | 6 | } |
218 | 6 | new_tablet_meta_pb.set_schema_hash(schema_hash); |
219 | 6 | TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); |
220 | 6 | tablet_schema->init_from_pb(new_tablet_meta_pb.schema()); |
221 | 6 | TabletSchemaSPtr row_binlog_tablet_schema = std::make_shared<TabletSchema>(); |
222 | 6 | row_binlog_tablet_schema->init_from_pb(new_tablet_meta_pb.row_binlog_schema()); |
223 | | |
224 | 6 | std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map; |
225 | 6 | std::unordered_map<RowsetId, RowsetId> rowset_id_mapping; |
226 | 6 | guards.reserve(cloned_tablet_meta_pb.rs_metas_size() + |
227 | 6 | cloned_tablet_meta_pb.stale_rs_metas_size() + |
228 | 6 | cloned_tablet_meta_pb.row_binlog_rs_metas_size()); |
229 | 9 | for (auto&& visible_rowset : cloned_tablet_meta_pb.rs_metas()) { |
230 | 9 | RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_rs_metas(); |
231 | 9 | if (!visible_rowset.has_resource_id()) { |
232 | | // src be local rowset |
233 | 9 | RowsetId rowset_id = _engine.next_rowset_id(); |
234 | 9 | guards.push_back(_engine.pending_local_rowsets().add(rowset_id)); |
235 | 9 | RETURN_IF_ERROR_RESULT(_rename_rowset_id( |
236 | 9 | visible_rowset, clone_dir, tablet_schema, rowset_id, rowset_meta, |
237 | 9 | new_tablet_meta_pb.enable_unique_key_merge_on_write())); |
238 | 9 | RowsetId src_rs_id; |
239 | 9 | if (visible_rowset.rowset_id() > 0) { |
240 | 1 | src_rs_id.init(visible_rowset.rowset_id()); |
241 | 8 | } else { |
242 | 8 | src_rs_id.init(visible_rowset.rowset_id_v2()); |
243 | 8 | } |
244 | 9 | rowset_id_mapping[src_rs_id] = rowset_id; |
245 | 9 | rowset_meta->set_source_rowset_id(visible_rowset.rowset_id_v2()); |
246 | 9 | rowset_meta->set_source_tablet_id(cloned_tablet_meta_pb.tablet_id()); |
247 | 9 | } else { |
248 | | // remote rowset |
249 | 0 | *rowset_meta = visible_rowset; |
250 | 0 | } |
251 | | |
252 | 9 | rowset_meta->set_tablet_id(tablet_id); |
253 | 9 | if (partition_id != -1) { |
254 | 9 | rowset_meta->set_partition_id(partition_id); |
255 | 9 | } |
256 | | |
257 | 9 | if (rowset_meta->has_tablet_schema() && rowset_meta->tablet_schema().index_size() > 0) { |
258 | 1 | RETURN_IF_ERROR_RESULT( |
259 | 1 | _rename_index_ids(*rowset_meta->mutable_tablet_schema(), target_tablet_schema)); |
260 | 1 | } |
261 | | |
262 | 9 | Version rowset_version = {visible_rowset.start_version(), visible_rowset.end_version()}; |
263 | 9 | rs_version_map[rowset_version] = rowset_meta; |
264 | 9 | } |
265 | | |
266 | 6 | for (auto&& stale_rowset : cloned_tablet_meta_pb.stale_rs_metas()) { |
267 | 0 | Version rowset_version = {stale_rowset.start_version(), stale_rowset.end_version()}; |
268 | 0 | auto exist_rs = rs_version_map.find(rowset_version); |
269 | 0 | if (exist_rs != rs_version_map.end()) { |
270 | 0 | continue; |
271 | 0 | } |
272 | 0 | RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_stale_rs_metas(); |
273 | |
|
274 | 0 | if (!stale_rowset.has_resource_id()) { |
275 | | // src be local rowset |
276 | 0 | RowsetId rowset_id = _engine.next_rowset_id(); |
277 | 0 | guards.push_back(_engine.pending_local_rowsets().add(rowset_id)); |
278 | 0 | RETURN_IF_ERROR_RESULT(_rename_rowset_id( |
279 | 0 | stale_rowset, clone_dir, tablet_schema, rowset_id, rowset_meta, |
280 | 0 | new_tablet_meta_pb.enable_unique_key_merge_on_write())); |
281 | 0 | RowsetId src_rs_id; |
282 | 0 | if (stale_rowset.rowset_id() > 0) { |
283 | 0 | src_rs_id.init(stale_rowset.rowset_id()); |
284 | 0 | } else { |
285 | 0 | src_rs_id.init(stale_rowset.rowset_id_v2()); |
286 | 0 | } |
287 | 0 | rowset_id_mapping[src_rs_id] = rowset_id; |
288 | 0 | rowset_meta->set_source_rowset_id(stale_rowset.rowset_id_v2()); |
289 | 0 | rowset_meta->set_source_tablet_id(cloned_tablet_meta_pb.tablet_id()); |
290 | 0 | } else { |
291 | | // remote rowset |
292 | 0 | *rowset_meta = stale_rowset; |
293 | 0 | } |
294 | | |
295 | 0 | rowset_meta->set_tablet_id(tablet_id); |
296 | 0 | if (partition_id != -1) { |
297 | 0 | rowset_meta->set_partition_id(partition_id); |
298 | 0 | } |
299 | |
|
300 | 0 | if (rowset_meta->has_tablet_schema() && rowset_meta->tablet_schema().index_size() > 0) { |
301 | 0 | RETURN_IF_ERROR_RESULT( |
302 | 0 | _rename_index_ids(*rowset_meta->mutable_tablet_schema(), target_tablet_schema)); |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | 6 | std::string row_binlog_dir = fmt::format("{}/{}", clone_dir, FDRowBinlogSuffix); |
307 | 6 | for (auto&& row_binlog_rowset : cloned_tablet_meta_pb.row_binlog_rs_metas()) { |
308 | 1 | RowsetMetaPB* rowset_meta = new_tablet_meta_pb.add_row_binlog_rs_metas(); |
309 | | |
310 | 1 | if (!row_binlog_rowset.has_resource_id()) { |
311 | 1 | RowsetId rowset_id = _engine.next_rowset_id(); |
312 | 1 | guards.push_back(_engine.pending_local_rowsets().add(rowset_id)); |
313 | 1 | RETURN_IF_ERROR_RESULT(_rename_rowset_id( |
314 | 1 | row_binlog_rowset, row_binlog_dir, row_binlog_tablet_schema, rowset_id, |
315 | 1 | rowset_meta, new_tablet_meta_pb.enable_unique_key_merge_on_write())); |
316 | 1 | RowsetId src_rs_id; |
317 | 1 | if (row_binlog_rowset.rowset_id() > 0) { |
318 | 1 | src_rs_id.init(row_binlog_rowset.rowset_id()); |
319 | 1 | } else { |
320 | 0 | src_rs_id.init(row_binlog_rowset.rowset_id_v2()); |
321 | 0 | } |
322 | 1 | rowset_id_mapping[src_rs_id] = rowset_id; |
323 | 1 | rowset_meta->set_source_rowset_id(row_binlog_rowset.rowset_id_v2()); |
324 | 1 | rowset_meta->set_source_tablet_id(cloned_tablet_meta_pb.tablet_id()); |
325 | 1 | } else { |
326 | 0 | *rowset_meta = row_binlog_rowset; |
327 | 0 | } |
328 | | |
329 | 1 | rowset_meta->set_tablet_id(tablet_id); |
330 | 1 | if (partition_id != -1) { |
331 | 1 | rowset_meta->set_partition_id(partition_id); |
332 | 1 | } |
333 | 1 | } |
334 | | |
335 | 6 | if (new_tablet_meta_pb.schema().index_size() > 0) { |
336 | 1 | RETURN_IF_ERROR_RESULT( |
337 | 1 | _rename_index_ids(*new_tablet_meta_pb.mutable_schema(), target_tablet_schema)); |
338 | 1 | } |
339 | | |
340 | 6 | if (!rowset_id_mapping.empty() && cloned_tablet_meta_pb.has_delete_bitmap()) { |
341 | 0 | const auto& cloned_del_bitmap_pb = cloned_tablet_meta_pb.delete_bitmap(); |
342 | 0 | DeleteBitmapPB* new_del_bitmap_pb = new_tablet_meta_pb.mutable_delete_bitmap(); |
343 | 0 | int rst_ids_size = cloned_del_bitmap_pb.rowset_ids_size(); |
344 | 0 | for (int i = 0; i < rst_ids_size; ++i) { |
345 | 0 | RowsetId rst_id; |
346 | 0 | rst_id.init(cloned_del_bitmap_pb.rowset_ids(i)); |
347 | | // It should not happen, if we can't convert some rowid in delete bitmap, the |
348 | | // data might be inconsist. |
349 | 0 | CHECK(rowset_id_mapping.find(rst_id) != rowset_id_mapping.end()) |
350 | 0 | << "can't find rowset_id " << rst_id.to_string() << " in convert_rowset_ids"; |
351 | 0 | new_del_bitmap_pb->set_rowset_ids(i, rowset_id_mapping[rst_id].to_string()); |
352 | 0 | } |
353 | 0 | } |
354 | | |
355 | 6 | RETURN_IF_ERROR_RESULT(TabletMeta::save(cloned_meta_file, new_tablet_meta_pb)); |
356 | | |
357 | 6 | return guards; |
358 | 6 | } |
359 | | |
360 | | Status SnapshotManager::_rename_rowset_id(const RowsetMetaPB& rs_meta_pb, |
361 | | const std::string& new_tablet_path, |
362 | | TabletSchemaSPtr tablet_schema, const RowsetId& rowset_id, |
363 | | RowsetMetaPB* new_rs_meta_pb, |
364 | 10 | bool enable_unique_key_merge_on_write) { |
365 | 10 | Status st = Status::OK(); |
366 | 10 | RowsetMetaSharedPtr rowset_meta(new RowsetMeta()); |
367 | 10 | rowset_meta->init_from_pb(rs_meta_pb); |
368 | 10 | RowsetSharedPtr org_rowset; |
369 | 10 | RETURN_IF_ERROR( |
370 | 10 | RowsetFactory::create_rowset(tablet_schema, new_tablet_path, rowset_meta, &org_rowset)); |
371 | | // do not use cache to load index |
372 | | // because the index file may conflict |
373 | | // and the cached fd may be invalid |
374 | 10 | RETURN_IF_ERROR(org_rowset->load(false)); |
375 | 10 | RowsetMetaSharedPtr org_rowset_meta = org_rowset->rowset_meta(); |
376 | 10 | RowsetWriterContext context; |
377 | 10 | context.rowset_id = rowset_id; |
378 | 10 | context.tablet_id = org_rowset_meta->tablet_id(); |
379 | 10 | context.partition_id = org_rowset_meta->partition_id(); |
380 | 10 | context.tablet_schema_hash = org_rowset_meta->tablet_schema_hash(); |
381 | 10 | context.rowset_type = org_rowset_meta->rowset_type(); |
382 | 10 | context.tablet_path = new_tablet_path; |
383 | 10 | context.tablet_schema = |
384 | 10 | org_rowset_meta->tablet_schema() ? org_rowset_meta->tablet_schema() : tablet_schema; |
385 | 10 | context.rowset_state = org_rowset_meta->rowset_state(); |
386 | 10 | context.version = org_rowset_meta->version(); |
387 | 10 | context.newest_write_timestamp = org_rowset_meta->newest_write_timestamp(); |
388 | | // keep segments_overlap same as origin rowset |
389 | 10 | context.segments_overlap = rowset_meta->segments_overlap(); |
390 | 10 | context.compaction_level = org_rowset_meta->compaction_level(); |
391 | 10 | if (org_rowset_meta->is_row_binlog()) { |
392 | 1 | context.write_binlog_opt().enable = true; |
393 | 1 | } |
394 | | // propagate MOW flag so that non-MOW key-bounds aggregation is not applied |
395 | | // when restoring a MOW tablet's rowset |
396 | 10 | context.enable_unique_key_merge_on_write = enable_unique_key_merge_on_write; |
397 | | |
398 | 10 | auto rs_writer = DORIS_TRY(RowsetFactory::create_rowset_writer(_engine, context, false)); |
399 | | |
400 | 10 | st = rs_writer->add_rowset(org_rowset); |
401 | 10 | if (!st.ok()) { |
402 | 0 | LOG(WARNING) << "failed to add rowset " |
403 | 0 | << " id = " << org_rowset->rowset_id() << " to rowset " << rowset_id; |
404 | 0 | return st; |
405 | 0 | } |
406 | 10 | RowsetSharedPtr new_rowset; |
407 | 10 | RETURN_NOT_OK_STATUS_WITH_WARN(rs_writer->build(new_rowset), |
408 | 10 | "failed to build rowset when rename rowset id"); |
409 | 10 | RETURN_IF_ERROR(new_rowset->load(false)); |
410 | 10 | new_rowset->rowset_meta()->to_rowset_pb(new_rs_meta_pb); |
411 | 10 | RETURN_IF_ERROR(org_rowset->remove()); |
412 | 10 | return Status::OK(); |
413 | 10 | } |
414 | | |
415 | | Status SnapshotManager::_rename_index_ids(TabletSchemaPB& schema_pb, |
416 | 2 | const TabletSchemaSPtr& tablet_schema) const { |
417 | 2 | if (tablet_schema == nullptr) { |
418 | 0 | return Status::OK(); |
419 | 0 | } |
420 | | |
421 | 4 | for (int i = 0; i < schema_pb.index_size(); ++i) { |
422 | 2 | TabletIndexPB* index_pb = schema_pb.mutable_index(i); |
423 | 2 | for (int32_t col_unique_id : index_pb->col_unique_id()) { |
424 | 2 | auto local_index = tablet_schema->get_index(col_unique_id, index_pb->index_type(), |
425 | 2 | index_pb->index_suffix_name()); |
426 | 2 | if (local_index) { |
427 | 2 | if (index_pb->index_id() != local_index->index_id()) { |
428 | 2 | index_pb->set_index_id(local_index->index_id()); |
429 | 2 | } |
430 | 2 | break; |
431 | 2 | } |
432 | 2 | } |
433 | 2 | } |
434 | 2 | return Status::OK(); |
435 | 2 | } |
436 | | |
437 | | // get snapshot path: curtime.seq.timeout |
438 | | // eg: 20190819221234.3.86400 |
439 | | Status SnapshotManager::_calc_snapshot_id_path(const TabletSharedPtr& tablet, int64_t timeout_s, |
440 | 2 | std::string* out_path) { |
441 | 2 | Status res = Status::OK(); |
442 | 2 | if (out_path == nullptr) { |
443 | 0 | return Status::Error<INVALID_ARGUMENT>("output parameter cannot be null"); |
444 | 0 | } |
445 | | |
446 | | // get current timestamp string |
447 | 2 | string time_str; |
448 | 2 | if ((res = gen_timestamp_string(&time_str)) != Status::OK()) { |
449 | 0 | LOG(WARNING) << "failed to generate time_string when move file to trash." |
450 | 0 | << "err code=" << res; |
451 | 0 | return res; |
452 | 0 | } |
453 | | |
454 | 2 | uint64_t sid = _snapshot_base_id.fetch_add(1, std::memory_order_relaxed) - 1; |
455 | 2 | *out_path = fmt::format("{}/{}/{}.{}.{}", tablet->data_dir()->path(), SNAPSHOT_PREFIX, time_str, |
456 | 2 | sid, timeout_s); |
457 | 2 | return res; |
458 | 2 | } |
459 | | |
460 | | // prefix: /path/to/data/DATA_PREFIX/shard_id |
461 | | // return: /path/to/data/DATA_PREFIX/shard_id/tablet_id/schema_hash |
462 | | std::string SnapshotManager::get_schema_hash_full_path(const TabletSharedPtr& ref_tablet, |
463 | 4 | const std::string& prefix) { |
464 | 4 | return fmt::format("{}/{}/{}", prefix, ref_tablet->tablet_id(), ref_tablet->schema_hash()); |
465 | 4 | } |
466 | | |
467 | | std::string SnapshotManager::_get_header_full_path(const TabletSharedPtr& ref_tablet, |
468 | 2 | const std::string& schema_hash_path) const { |
469 | 2 | return fmt::format("{}/{}.hdr", schema_hash_path, ref_tablet->tablet_id()); |
470 | 2 | } |
471 | | |
472 | 2 | std::string SnapshotManager::_get_row_binlog_full_path(const std::string& schema_hash_path) const { |
473 | 2 | return fmt::format("{}/{}", schema_hash_path, FDRowBinlogSuffix); |
474 | 2 | } |
475 | | |
476 | | std::string SnapshotManager::_get_json_header_full_path(const TabletSharedPtr& ref_tablet, |
477 | 2 | const std::string& schema_hash_path) const { |
478 | 2 | return fmt::format("{}/{}.hdr.json", schema_hash_path, ref_tablet->tablet_id()); |
479 | 2 | } |
480 | | |
481 | | Status SnapshotManager::_link_index_and_data_files( |
482 | | const std::string& schema_hash_path, const TabletSharedPtr& ref_tablet, |
483 | 0 | const std::vector<RowsetSharedPtr>& consistent_rowsets) { |
484 | 0 | Status res = Status::OK(); |
485 | 0 | for (auto& rs : consistent_rowsets) { |
486 | 0 | RETURN_IF_ERROR(rs->link_files_to(schema_hash_path, rs->rowset_id())); |
487 | 0 | } |
488 | 0 | return res; |
489 | 0 | } |
490 | | |
491 | | Status SnapshotManager::_create_snapshot_files(const TabletSharedPtr& ref_tablet, |
492 | | const TabletSharedPtr& target_tablet, |
493 | | const TSnapshotRequest& request, |
494 | | string* snapshot_path, |
495 | 2 | bool* allow_incremental_clone) { |
496 | 2 | int32_t snapshot_version = request.preferred_snapshot_version; |
497 | 2 | LOG(INFO) << "receive a make snapshot request" |
498 | 2 | << ", request detail is " << apache::thrift::ThriftDebugString(request) |
499 | 2 | << ", snapshot_version is " << snapshot_version; |
500 | 2 | Status res = Status::OK(); |
501 | 2 | if (snapshot_path == nullptr) { |
502 | 0 | return Status::Error<INVALID_ARGUMENT>("output parameter cannot be null"); |
503 | 0 | } |
504 | | |
505 | | // snapshot_id_path: |
506 | | // /data/shard_id/tablet_id/snapshot/time_str/id.timeout/ |
507 | 2 | int64_t timeout_s = config::snapshot_expire_time_sec; |
508 | 2 | if (request.__isset.timeout) { |
509 | 0 | timeout_s = request.timeout; |
510 | 0 | } |
511 | 2 | std::string snapshot_id_path; |
512 | 2 | res = _calc_snapshot_id_path(target_tablet, timeout_s, &snapshot_id_path); |
513 | 2 | if (!res.ok()) { |
514 | 0 | LOG(WARNING) << "failed to calc snapshot_id_path, tablet=" |
515 | 0 | << target_tablet->data_dir()->path(); |
516 | 0 | return res; |
517 | 0 | } |
518 | | |
519 | 2 | bool is_copy_binlog = request.__isset.is_copy_binlog ? request.is_copy_binlog : false; |
520 | 2 | int32_t copy_type = is_copy_binlog ? TabletCopyType::DEFAULT : TTabletCopyType::DATA; |
521 | 2 | if (request.__isset.copy_type) { |
522 | 2 | copy_type = request.copy_type; |
523 | 2 | } |
524 | 2 | RETURN_IF_ERROR(TabletCopyType::validate(copy_type)); |
525 | 2 | bool need_row_binlog = TabletCopyType::has(copy_type, TTabletCopyType::ROW_BINLOG); |
526 | 2 | bool need_ccr_binlog = TabletCopyType::has(copy_type, TTabletCopyType::CCR_BINLOG); |
527 | | |
528 | | // schema_full_path_desc.filepath: |
529 | | // /snapshot_id_path/tablet_id/schema_hash/ |
530 | 2 | auto schema_full_path = get_schema_hash_full_path(target_tablet, snapshot_id_path); |
531 | 2 | auto row_binlog_full_path = _get_row_binlog_full_path(schema_full_path); |
532 | | // header_path: |
533 | | // /schema_full_path/tablet_id.hdr |
534 | 2 | auto header_path = _get_header_full_path(target_tablet, schema_full_path); |
535 | | // /schema_full_path/tablet_id.hdr.json |
536 | 2 | auto json_header_path = _get_json_header_full_path(target_tablet, schema_full_path); |
537 | 2 | bool exists = true; |
538 | 2 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(schema_full_path, &exists)); |
539 | 2 | if (exists) { |
540 | 0 | VLOG_TRACE << "remove the old schema_full_path." << schema_full_path; |
541 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(schema_full_path)); |
542 | 0 | } |
543 | | |
544 | 2 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(schema_full_path)); |
545 | 2 | string snapshot_id; |
546 | 2 | RETURN_IF_ERROR(io::global_local_filesystem()->canonicalize(snapshot_id_path, &snapshot_id)); |
547 | | |
548 | 2 | std::vector<RowsetSharedPtr> consistent_rowsets; |
549 | 2 | std::vector<RowsetSharedPtr> row_binlog_rowsets; |
550 | 2 | do { |
551 | 2 | TabletMetaSharedPtr new_tablet_meta(new (nothrow) TabletMeta()); |
552 | 2 | if (new_tablet_meta == nullptr) { |
553 | 0 | res = Status::Error<MEM_ALLOC_FAILED>("fail to malloc TabletMeta."); |
554 | 0 | break; |
555 | 0 | } |
556 | 2 | DeleteBitmap delete_bitmap_snapshot(new_tablet_meta->tablet_id()); |
557 | 2 | DeleteBitmap binlog_delvec_snapshot(new_tablet_meta->tablet_id()); |
558 | | |
559 | | /// If set missing_version, try to get all missing version. |
560 | | /// If some of them not exist in tablet, we will fall back to |
561 | | /// make the full snapshot of the tablet. |
562 | 2 | { |
563 | 2 | std::shared_lock rdlock(ref_tablet->get_header_lock()); |
564 | 2 | if (ref_tablet->tablet_state() == TABLET_SHUTDOWN) { |
565 | 0 | return Status::Aborted("tablet has shutdown"); |
566 | 0 | } |
567 | 2 | if (need_row_binlog && !ref_tablet->enable_row_binlog()) { |
568 | 0 | res = Status::InternalError( |
569 | 0 | "tablet {} does not enable row binlog, but ROW_BINLOG copy is requested", |
570 | 0 | ref_tablet->tablet_id()); |
571 | 0 | break; |
572 | 0 | } |
573 | 2 | bool is_single_rowset_clone = |
574 | 2 | (request.__isset.start_version && request.__isset.end_version); |
575 | 2 | if (is_single_rowset_clone) { |
576 | 0 | LOG(INFO) << "handle compaction clone make snapshot, tablet_id: " |
577 | 0 | << ref_tablet->tablet_id(); |
578 | 0 | Version version(request.start_version, request.end_version); |
579 | 0 | const RowsetSharedPtr rowset = ref_tablet->get_rowset_by_version(version, false); |
580 | 0 | if (rowset && rowset->is_local()) { |
581 | 0 | consistent_rowsets.push_back(rowset); |
582 | 0 | } else { |
583 | 0 | LOG(WARNING) << "failed to find local version when do compaction snapshot. " |
584 | 0 | << " tablet=" << request.tablet_id |
585 | 0 | << " schema_hash=" << request.schema_hash |
586 | 0 | << " version=" << version; |
587 | 0 | res = Status::InternalError( |
588 | 0 | "failed to find version when do compaction snapshot"); |
589 | 0 | break; |
590 | 0 | } |
591 | 0 | if (need_row_binlog) { |
592 | 0 | RowsetSharedPtr row_binlog_rowset = |
593 | 0 | ref_tablet->get_row_binlog_rowset_by_version(version); |
594 | 0 | if (row_binlog_rowset != nullptr) { |
595 | 0 | row_binlog_rowsets.push_back(row_binlog_rowset); |
596 | 0 | } else { |
597 | 0 | LOG(WARNING) |
598 | 0 | << "failed to find row binlog when do compaction snapshot. " |
599 | 0 | << " tablet=" << request.tablet_id |
600 | 0 | << " schema_hash=" << request.schema_hash << " version=" << version; |
601 | 0 | res = Status::InternalError( |
602 | 0 | "failed to find row binlog when do compaction snapshot. tablet={}, " |
603 | 0 | "schema_hash={}, version={}", |
604 | 0 | request.tablet_id, request.schema_hash, version.to_string()); |
605 | 0 | break; |
606 | 0 | } |
607 | 0 | } |
608 | 0 | } |
609 | | // be would definitely set it as true no matter has missed version or not |
610 | | // but it would take no effects on the following range loop |
611 | 2 | if (!is_single_rowset_clone && request.__isset.missing_version) { |
612 | 0 | for (int64_t missed_version : request.missing_version) { |
613 | 0 | Version version = {missed_version, missed_version}; |
614 | | // find rowset in both rs_meta and stale_rs_meta |
615 | 0 | const RowsetSharedPtr rowset = ref_tablet->get_rowset_by_version(version, true); |
616 | 0 | if (rowset != nullptr) { |
617 | 0 | if (!rowset->is_local()) { |
618 | | // MUST make full snapshot to ensure `cooldown_meta_id` is consistent with the cooldowned rowsets after clone. |
619 | 0 | res = Status::Error<ErrorCode::INTERNAL_ERROR>( |
620 | 0 | "missed version is a cooldowned rowset, must make full " |
621 | 0 | "snapshot. missed_version={}, tablet_id={}", |
622 | 0 | missed_version, ref_tablet->tablet_id()); |
623 | 0 | break; |
624 | 0 | } |
625 | 0 | consistent_rowsets.push_back(rowset); |
626 | 0 | } else { |
627 | 0 | res = Status::InternalError( |
628 | 0 | "failed to find missed version when snapshot. tablet={}, " |
629 | 0 | "schema_hash={}, version={}", |
630 | 0 | request.tablet_id, request.schema_hash, version.to_string()); |
631 | 0 | break; |
632 | 0 | } |
633 | 0 | if (need_row_binlog) { |
634 | 0 | RowsetSharedPtr row_binlog_rowset = |
635 | 0 | ref_tablet->get_row_binlog_rowset_by_version(version); |
636 | 0 | if (row_binlog_rowset != nullptr) { |
637 | 0 | row_binlog_rowsets.push_back(row_binlog_rowset); |
638 | 0 | } else { |
639 | 0 | res = Status::InternalError( |
640 | 0 | "failed to find row binlog when snapshot. tablet={}, " |
641 | 0 | "schema_hash={}, version={}", |
642 | 0 | request.tablet_id, request.schema_hash, version.to_string()); |
643 | 0 | break; |
644 | 0 | } |
645 | 0 | } |
646 | 0 | } |
647 | 0 | } |
648 | | |
649 | 2 | DBUG_EXECUTE_IF("SnapshotManager.create_snapshot_files.allow_inc_clone", { |
650 | 2 | auto tablet_id = dp->param("tablet_id", 0); |
651 | 2 | auto is_full_clone = dp->param("is_full_clone", false); |
652 | 2 | if (ref_tablet->tablet_id() == tablet_id && is_full_clone) { |
653 | 2 | LOG(INFO) << "injected full clone for tabelt: " << tablet_id; |
654 | 2 | res = Status::InternalError("fault injection error"); |
655 | 2 | } |
656 | 2 | }); |
657 | | |
658 | | // be would definitely set it as true no matter has missed version or not, we could |
659 | | // just check whether the missed version is empty or not |
660 | 2 | int64_t version = -1; |
661 | 2 | if (!is_single_rowset_clone && (!res.ok() || request.missing_version.empty())) { |
662 | 2 | if (!request.__isset.missing_version && |
663 | 2 | ref_tablet->tablet_meta()->cooldown_meta_id().initialized()) { |
664 | 0 | LOG(WARNING) << "currently not support backup tablet with cooldowned remote " |
665 | 0 | "data. tablet=" |
666 | 0 | << request.tablet_id; |
667 | 0 | return Status::NotSupported( |
668 | 0 | "currently not support backup tablet with cooldowned remote data"); |
669 | 0 | } |
670 | | /// not all missing versions are found, fall back to full snapshot. |
671 | 2 | res = Status::OK(); // reset res |
672 | 2 | consistent_rowsets.clear(); // reset vector |
673 | 2 | row_binlog_rowsets.clear(); |
674 | | |
675 | | // get latest version |
676 | 2 | const RowsetSharedPtr last_version = ref_tablet->get_rowset_with_max_version(); |
677 | 2 | if (last_version == nullptr) { |
678 | 0 | res = Status::InternalError("tablet has not any version. path={}", |
679 | 0 | ref_tablet->tablet_id()); |
680 | 0 | break; |
681 | 0 | } |
682 | | // get snapshot version, use request.version if specified |
683 | 2 | version = last_version->end_version(); |
684 | 2 | if (request.__isset.version) { |
685 | 0 | if (last_version->end_version() < request.version) { |
686 | 0 | res = Status::Error<INVALID_ARGUMENT>( |
687 | 0 | "invalid make snapshot request. version={}, req_version={}", |
688 | 0 | last_version->version().to_string(), request.version); |
689 | 0 | break; |
690 | 0 | } |
691 | 0 | version = request.version; |
692 | 0 | } |
693 | 2 | if (ref_tablet->tablet_meta()->cooldown_meta_id().initialized()) { |
694 | | // Tablet has cooldowned data, MUST pick consistent rowsets with continuous cooldowned version |
695 | | // Get max cooldowned version |
696 | 0 | int64_t max_cooldowned_version = -1; |
697 | 0 | for (auto& [v, rs] : ref_tablet->rowset_map()) { |
698 | 0 | if (rs->is_local()) { |
699 | 0 | continue; |
700 | 0 | } |
701 | 0 | consistent_rowsets.push_back(rs); |
702 | 0 | max_cooldowned_version = std::max(max_cooldowned_version, v.second); |
703 | 0 | } |
704 | 0 | DCHECK_GE(max_cooldowned_version, 1) << "tablet_id=" << ref_tablet->tablet_id(); |
705 | 0 | std::sort(consistent_rowsets.begin(), consistent_rowsets.end(), |
706 | 0 | Rowset::comparator); |
707 | 0 | res = check_version_continuity(consistent_rowsets); |
708 | 0 | if (res.ok() && max_cooldowned_version < version) { |
709 | | // Pick consistent rowsets of remaining required version |
710 | 0 | auto ret = ref_tablet->capture_consistent_rowsets_unlocked( |
711 | 0 | {max_cooldowned_version + 1, version}, CaptureRowsetOps {}); |
712 | 0 | if (ret) { |
713 | 0 | consistent_rowsets = std::move(ret->rowsets); |
714 | 0 | } else { |
715 | 0 | res = std::move(ret.error()); |
716 | 0 | } |
717 | 0 | } |
718 | 2 | } else { |
719 | 2 | auto ret = ref_tablet->capture_consistent_rowsets_unlocked(Version(0, version), |
720 | 2 | CaptureRowsetOps {}); |
721 | 2 | if (ret) { |
722 | 2 | consistent_rowsets = std::move(ret->rowsets); |
723 | 2 | } else { |
724 | 0 | res = std::move(ret.error()); |
725 | 0 | } |
726 | 2 | } |
727 | 2 | if (!res.ok()) { |
728 | 0 | LOG(WARNING) << "fail to select versions to span. res=" << res; |
729 | 0 | break; |
730 | 0 | } |
731 | 2 | if (need_row_binlog) { |
732 | | // row binlog does not support cooldown yet. |
733 | 0 | auto ret = ref_tablet->capture_consistent_rowsets_unlocked( |
734 | 0 | Version(0, version), CaptureRowsetOps {.capture_row_binlog = true}); |
735 | 0 | if (ret) { |
736 | 0 | row_binlog_rowsets = std::move(ret->rowsets); |
737 | 0 | } else { |
738 | 0 | res = std::move(ret.error()); |
739 | 0 | } |
740 | 0 | if (!res.ok()) { |
741 | 0 | LOG(WARNING) << "fail to select row binlogs to span. res=" << res; |
742 | 0 | break; |
743 | 0 | } |
744 | 0 | } |
745 | 2 | *allow_incremental_clone = false; |
746 | 2 | } else { |
747 | 0 | version = ref_tablet->max_version_unlocked(); |
748 | 0 | *allow_incremental_clone = true; |
749 | 0 | } |
750 | | |
751 | | // copy the tablet meta to new_tablet_meta inside header lock |
752 | 2 | CHECK(res.ok()) << res; |
753 | 2 | ref_tablet->generate_tablet_meta_copy_unlocked(*new_tablet_meta, false); |
754 | | // The delete bitmap update operation and the add_inc_rowset operation is not atomic, |
755 | | // so delete bitmap may contains some data generated by invisible rowset, we should |
756 | | // get rid of these useless bitmaps when doing snapshot. |
757 | 2 | if (ref_tablet->keys_type() == UNIQUE_KEYS && |
758 | 2 | ref_tablet->enable_unique_key_merge_on_write()) { |
759 | 0 | delete_bitmap_snapshot = |
760 | 0 | ref_tablet->tablet_meta()->delete_bitmap().snapshot(version); |
761 | 0 | if (need_row_binlog) { |
762 | 0 | binlog_delvec_snapshot = |
763 | 0 | ref_tablet->tablet_meta()->binlog_delvec().snapshot(version); |
764 | 0 | } |
765 | 0 | } |
766 | 2 | } |
767 | | |
768 | 0 | std::vector<RowsetMetaSharedPtr> rs_metas; |
769 | 4 | for (auto& rs : consistent_rowsets) { |
770 | 4 | if (rs->is_local()) { |
771 | | // local rowset |
772 | 4 | res = rs->link_files_to(schema_full_path, rs->rowset_id()); |
773 | 4 | if (!res.ok()) { |
774 | 0 | break; |
775 | 0 | } |
776 | 4 | } |
777 | 4 | rs_metas.push_back(rs->rowset_meta()); |
778 | 4 | VLOG_NOTICE << "add rowset meta to clone list. " |
779 | 0 | << " start version " << rs->rowset_meta()->start_version() |
780 | 0 | << " end version " << rs->rowset_meta()->end_version() << " empty " |
781 | 0 | << rs->rowset_meta()->empty(); |
782 | 4 | } |
783 | 2 | if (!res.ok()) { |
784 | 0 | LOG(WARNING) << "fail to create hard link. path=" << snapshot_id_path |
785 | 0 | << " tablet=" << target_tablet->tablet_id() |
786 | 0 | << " ref tablet=" << ref_tablet->tablet_id(); |
787 | 0 | break; |
788 | 0 | } |
789 | | |
790 | 2 | std::vector<RowsetMetaSharedPtr> row_binlog_rs_metas; |
791 | 2 | if (need_row_binlog) { |
792 | 0 | res = io::global_local_filesystem()->create_directory(row_binlog_full_path); |
793 | 0 | if (!res.ok()) { |
794 | 0 | break; |
795 | 0 | } |
796 | 0 | for (auto& rs : row_binlog_rowsets) { |
797 | 0 | if (rs->is_local()) { |
798 | | // local row binlog |
799 | 0 | res = rs->link_files_to(row_binlog_full_path, rs->rowset_id()); |
800 | 0 | if (!res.ok()) { |
801 | 0 | break; |
802 | 0 | } |
803 | 0 | } |
804 | 0 | row_binlog_rs_metas.push_back(rs->rowset_meta()); |
805 | 0 | VLOG_NOTICE << "add row binlog rowset meta to clone list. " |
806 | 0 | << " start version " << rs->rowset_meta()->start_version() |
807 | 0 | << " end version " << rs->rowset_meta()->end_version() << " empty " |
808 | 0 | << rs->rowset_meta()->empty(); |
809 | 0 | } |
810 | 0 | if (!res.ok()) { |
811 | 0 | LOG(WARNING) << "fail to create row binlog hard link. path=" << snapshot_id_path |
812 | 0 | << " tablet=" << target_tablet->tablet_id() |
813 | 0 | << " ref tablet=" << ref_tablet->tablet_id(); |
814 | 0 | break; |
815 | 0 | } |
816 | 0 | } |
817 | | |
818 | | // The inc_rs_metas is deprecated since Doris version 0.13. |
819 | | // Clear it for safety reason. |
820 | | // Whether it is incremental or full snapshot, rowset information is stored in rs_meta. |
821 | 2 | new_tablet_meta->revise_rs_metas(std::move(rs_metas)); |
822 | 2 | if (need_row_binlog) { |
823 | 0 | new_tablet_meta->revise_row_binlog_rs_metas(std::move(row_binlog_rs_metas)); |
824 | 0 | } |
825 | 2 | if (ref_tablet->keys_type() == UNIQUE_KEYS && |
826 | 2 | ref_tablet->enable_unique_key_merge_on_write()) { |
827 | 0 | new_tablet_meta->revise_delete_bitmap_unlocked(delete_bitmap_snapshot); |
828 | 0 | if (need_row_binlog) { |
829 | 0 | new_tablet_meta->revise_binlog_delvec_unlocked(binlog_delvec_snapshot); |
830 | 0 | } |
831 | 0 | } |
832 | | |
833 | 2 | if (snapshot_version == g_Types_constants.TSNAPSHOT_REQ_VERSION2) { |
834 | 2 | res = new_tablet_meta->save(header_path); |
835 | 2 | if (res.ok() && request.__isset.is_copy_tablet_task && request.is_copy_tablet_task) { |
836 | 0 | res = new_tablet_meta->save_as_json(json_header_path); |
837 | 0 | } |
838 | 2 | } else { |
839 | 0 | res = Status::Error<INVALID_SNAPSHOT_VERSION>( |
840 | 0 | "snapshot_version not equal to g_Types_constants.TSNAPSHOT_REQ_VERSION2"); |
841 | 0 | } |
842 | | |
843 | 2 | if (!res.ok()) { |
844 | 0 | LOG(WARNING) << "convert rowset failed, res:" << res |
845 | 0 | << ", tablet:" << new_tablet_meta->tablet_id() |
846 | 0 | << ", schema hash:" << new_tablet_meta->schema_hash() |
847 | 0 | << ", snapshot_version:" << snapshot_version |
848 | 0 | << ", is incremental:" << request.__isset.missing_version; |
849 | 0 | break; |
850 | 0 | } |
851 | | |
852 | 2 | } while (false); |
853 | | |
854 | | // link all binlog files to snapshot path |
855 | 2 | do { |
856 | 2 | if (!res.ok()) { |
857 | 0 | break; |
858 | 0 | } |
859 | | |
860 | 2 | if (!need_ccr_binlog || !target_tablet->enable_ccr_binlog()) { |
861 | 2 | break; |
862 | 2 | } |
863 | | |
864 | 0 | RowsetBinlogMetasPB rowset_binlog_metas_pb; |
865 | 0 | for (auto& rs : consistent_rowsets) { |
866 | 0 | if (!rs->is_local()) { |
867 | 0 | continue; |
868 | 0 | } |
869 | 0 | res = ref_tablet->get_rowset_binlog_metas(rs->version(), &rowset_binlog_metas_pb); |
870 | 0 | if (!res.ok()) { |
871 | 0 | break; |
872 | 0 | } |
873 | 0 | } |
874 | 0 | if (!res.ok() || rowset_binlog_metas_pb.rowset_binlog_metas_size() == 0) { |
875 | 0 | break; |
876 | 0 | } |
877 | | |
878 | | // write to pb file |
879 | 0 | auto rowset_binlog_metas_pb_filename = |
880 | 0 | fmt::format("{}/rowset_binlog_metas.pb", schema_full_path); |
881 | 0 | res = write_pb(rowset_binlog_metas_pb_filename, rowset_binlog_metas_pb); |
882 | 0 | if (!res.ok()) { |
883 | 0 | break; |
884 | 0 | } |
885 | | |
886 | 0 | for (const auto& rowset_binlog_meta : rowset_binlog_metas_pb.rowset_binlog_metas()) { |
887 | 0 | std::string segment_file_path; |
888 | 0 | auto num_segments = rowset_binlog_meta.num_segments(); |
889 | 0 | std::string_view rowset_id = rowset_binlog_meta.rowset_id(); |
890 | |
|
891 | 0 | RowsetMetaPB rowset_meta_pb; |
892 | 0 | if (!rowset_meta_pb.ParseFromString(rowset_binlog_meta.data())) { |
893 | 0 | auto err_msg = fmt::format("fail to parse binlog meta data value:{}", |
894 | 0 | rowset_binlog_meta.data()); |
895 | 0 | res = Status::InternalError(err_msg); |
896 | 0 | LOG(WARNING) << err_msg; |
897 | 0 | return res; |
898 | 0 | } |
899 | 0 | const auto& tablet_schema_pb = rowset_meta_pb.tablet_schema(); |
900 | 0 | TabletSchema tablet_schema; |
901 | 0 | tablet_schema.init_from_pb(tablet_schema_pb); |
902 | |
|
903 | 0 | std::vector<string> linked_success_files; |
904 | 0 | Defer remove_linked_files {[&]() { // clear linked files if errors happen |
905 | 0 | if (!res.ok()) { |
906 | 0 | LOG(WARNING) << "will delete linked success files due to error " << res; |
907 | 0 | std::vector<io::Path> paths; |
908 | 0 | for (auto& file : linked_success_files) { |
909 | 0 | paths.emplace_back(file); |
910 | 0 | LOG(WARNING) |
911 | 0 | << "will delete linked success file " << file << " due to error"; |
912 | 0 | } |
913 | 0 | static_cast<void>(io::global_local_filesystem()->batch_delete(paths)); |
914 | 0 | LOG(WARNING) << "done delete linked success files due to error " << res; |
915 | 0 | } |
916 | 0 | }}; |
917 | | |
918 | | // link segment files and index files |
919 | 0 | for (int64_t segment_index = 0; segment_index < num_segments; ++segment_index) { |
920 | 0 | segment_file_path = ref_tablet->get_segment_filepath(rowset_id, segment_index); |
921 | 0 | auto snapshot_segment_file_path = |
922 | 0 | fmt::format("{}/{}_{}.binlog", schema_full_path, rowset_id, segment_index); |
923 | |
|
924 | 0 | res = io::global_local_filesystem()->link_file(segment_file_path, |
925 | 0 | snapshot_segment_file_path); |
926 | 0 | if (!res.ok()) { |
927 | 0 | LOG(WARNING) << "fail to link binlog file. [src=" << segment_file_path |
928 | 0 | << ", dest=" << snapshot_segment_file_path << "]"; |
929 | 0 | break; |
930 | 0 | } |
931 | 0 | linked_success_files.push_back(snapshot_segment_file_path); |
932 | |
|
933 | 0 | if (tablet_schema.get_inverted_index_storage_format() == |
934 | 0 | InvertedIndexStorageFormatPB::V1) { |
935 | 0 | for (const auto& index : tablet_schema.inverted_indexes()) { |
936 | 0 | auto index_id = index->index_id(); |
937 | 0 | auto index_file = InvertedIndexDescriptor::get_index_file_path_v1( |
938 | 0 | InvertedIndexDescriptor::get_index_file_path_prefix( |
939 | 0 | segment_file_path), |
940 | 0 | index_id, index->get_index_suffix()); |
941 | 0 | auto snapshot_segment_index_file_path = |
942 | 0 | fmt::format("{}/{}_{}_{}.binlog-index", schema_full_path, rowset_id, |
943 | 0 | segment_index, index_id); |
944 | 0 | VLOG_DEBUG << "link " << index_file << " to " |
945 | 0 | << snapshot_segment_index_file_path; |
946 | 0 | res = io::global_local_filesystem()->link_file( |
947 | 0 | index_file, snapshot_segment_index_file_path); |
948 | 0 | if (!res.ok()) { |
949 | 0 | LOG(WARNING) << "fail to link binlog index file. [src=" << index_file |
950 | 0 | << ", dest=" << snapshot_segment_index_file_path << "]"; |
951 | 0 | break; |
952 | 0 | } |
953 | 0 | linked_success_files.push_back(snapshot_segment_index_file_path); |
954 | 0 | } |
955 | 0 | } else { |
956 | 0 | if (tablet_schema.has_inverted_index() || tablet_schema.has_ann_index()) { |
957 | 0 | auto index_file = InvertedIndexDescriptor::get_index_file_path_v2( |
958 | 0 | InvertedIndexDescriptor::get_index_file_path_prefix( |
959 | 0 | segment_file_path)); |
960 | 0 | auto snapshot_segment_index_file_path = |
961 | 0 | fmt::format("{}/{}_{}.binlog-index", schema_full_path, rowset_id, |
962 | 0 | segment_index); |
963 | 0 | VLOG_DEBUG << "link " << index_file << " to " |
964 | 0 | << snapshot_segment_index_file_path; |
965 | 0 | res = io::global_local_filesystem()->link_file( |
966 | 0 | index_file, snapshot_segment_index_file_path); |
967 | 0 | if (!res.ok()) { |
968 | 0 | LOG(WARNING) << "fail to link binlog index file. [src=" << index_file |
969 | 0 | << ", dest=" << snapshot_segment_index_file_path << "]"; |
970 | 0 | break; |
971 | 0 | } |
972 | 0 | linked_success_files.push_back(snapshot_segment_index_file_path); |
973 | 0 | } |
974 | 0 | } |
975 | 0 | } |
976 | |
|
977 | 0 | if (!res.ok()) { |
978 | 0 | break; |
979 | 0 | } |
980 | 0 | } |
981 | 0 | } while (false); |
982 | | |
983 | 2 | if (!res.ok()) { |
984 | 0 | LOG(WARNING) << "fail to make snapshot, try to delete the snapshot path. path=" |
985 | 0 | << snapshot_id_path.c_str(); |
986 | |
|
987 | 0 | bool exist = true; |
988 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(snapshot_id_path, &exist)); |
989 | 0 | if (exist) { |
990 | 0 | VLOG_NOTICE << "remove snapshot path. [path=" << snapshot_id_path << "]"; |
991 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(snapshot_id_path)); |
992 | 0 | } |
993 | 2 | } else { |
994 | 2 | *snapshot_path = snapshot_id; |
995 | 2 | } |
996 | | |
997 | 2 | return res; |
998 | 2 | } |
999 | | } // namespace doris |