be/src/cloud/cloud_snapshot_mgr.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "cloud/cloud_snapshot_mgr.h" |
19 | | |
20 | | #include <fmt/format.h> |
21 | | #include <gen_cpp/olap_file.pb.h> |
22 | | |
23 | | #include <map> |
24 | | #include <unordered_map> |
25 | | |
26 | | #include "cloud/cloud_meta_mgr.h" |
27 | | #include "cloud/cloud_storage_engine.h" |
28 | | #include "cloud/cloud_tablet_mgr.h" |
29 | | #include "common/cast_set.h" |
30 | | #include "common/config.h" |
31 | | #include "common/logging.h" |
32 | | #include "common/status.h" |
33 | | #include "io/fs/local_file_system.h" |
34 | | #include "runtime/memory/mem_tracker_limiter.h" |
35 | | #include "runtime/thread_context.h" |
36 | | #include "storage/data_dir.h" |
37 | | #include "storage/olap_common.h" |
38 | | #include "storage/olap_define.h" |
39 | | #include "storage/pb_helper.h" |
40 | | #include "storage/rowset/rowset.h" |
41 | | #include "storage/rowset/rowset_factory.h" |
42 | | #include "storage/rowset/rowset_meta.h" |
43 | | #include "storage/rowset/rowset_writer.h" |
44 | | #include "storage/rowset/rowset_writer_context.h" |
45 | | #include "storage/storage_policy.h" |
46 | | #include "storage/tablet/tablet_meta.h" |
47 | | #include "storage/tablet/tablet_schema.h" |
48 | | #include "storage/tablet/tablet_schema_cache.h" |
49 | | #include "storage/utils.h" |
50 | | #include "util/slice.h" |
51 | | #include "util/uid_util.h" |
52 | | |
53 | | namespace doris { |
54 | | #include "common/compile_check_begin.h" |
55 | | using namespace ErrorCode; |
56 | | |
57 | 3 | CloudSnapshotMgr::CloudSnapshotMgr(CloudStorageEngine& engine) : _engine(engine) { |
58 | 3 | _mem_tracker = |
59 | 3 | MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::OTHER, "CloudSnapshotMgr"); |
60 | 3 | } |
61 | | |
62 | | Status CloudSnapshotMgr::make_snapshot(int64_t target_tablet_id, StorageResource& storage_resource, |
63 | | std::unordered_map<std::string, std::string>& file_mapping, |
64 | 0 | bool is_restore, const Slice* slice) { |
65 | 0 | SCOPED_ATTACH_TASK(_mem_tracker); |
66 | 0 | if (is_restore && slice == nullptr) { |
67 | 0 | return Status::Error<INVALID_ARGUMENT>("slice cannot be null in restore."); |
68 | 0 | } |
69 | | |
70 | 0 | CloudTabletSPtr target_tablet = DORIS_TRY(_engine.tablet_mgr().get_tablet(target_tablet_id)); |
71 | 0 | if (target_tablet == nullptr) { |
72 | 0 | return Status::Error<TABLE_NOT_FOUND>("failed to get tablet. tablet={}", target_tablet_id); |
73 | 0 | } |
74 | | |
75 | 0 | TabletMeta tablet_meta; |
76 | 0 | if (is_restore) { |
77 | | // 1. deserialize tablet meta from memory |
78 | 0 | RETURN_IF_ERROR(tablet_meta.create_from_buffer((const uint8_t*)slice->data, slice->size)); |
79 | 0 | TabletMetaPB tablet_meta_pb; |
80 | 0 | tablet_meta.to_meta_pb(&tablet_meta_pb, false); |
81 | |
|
82 | 0 | tablet_meta_pb.clear_rs_metas(); // copy the rs meta |
83 | 0 | if (tablet_meta.all_rs_metas().size() > 0) { |
84 | 0 | tablet_meta_pb.mutable_inc_rs_metas()->Reserve( |
85 | 0 | cast_set<int>(tablet_meta.all_rs_metas().size())); |
86 | 0 | for (auto& [_, rs] : tablet_meta.all_rs_metas()) { |
87 | 0 | rs->to_rowset_pb(tablet_meta_pb.add_rs_metas()); |
88 | 0 | } |
89 | 0 | } |
90 | 0 | tablet_meta_pb.clear_stale_rs_metas(); // strip off the stale rs meta |
91 | | |
92 | | // 2. convert rowsets |
93 | 0 | TabletMetaPB new_tablet_meta_pb; |
94 | 0 | RETURN_IF_ERROR(convert_rowsets(&new_tablet_meta_pb, tablet_meta_pb, target_tablet_id, |
95 | 0 | target_tablet, storage_resource, file_mapping)); |
96 | | |
97 | | // 3. send make snapshot request |
98 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().prepare_restore_job(new_tablet_meta_pb)); |
99 | 0 | return Status::OK(); |
100 | 0 | } |
101 | | |
102 | | // backup not implemented |
103 | | |
104 | 0 | LOG(INFO) << "success to make snapshot. [tablet_id=" << target_tablet_id << "]"; |
105 | 0 | return Status::OK(); |
106 | 0 | } |
107 | | |
108 | 0 | Status CloudSnapshotMgr::commit_snapshot(int64_t tablet_id) { |
109 | 0 | SCOPED_ATTACH_TASK(_mem_tracker); |
110 | 0 | CloudTabletSPtr tablet = DORIS_TRY(_engine.tablet_mgr().get_tablet(tablet_id)); |
111 | 0 | if (tablet == nullptr) { |
112 | 0 | return Status::Error<TABLE_NOT_FOUND>("failed to get tablet. tablet={}", tablet_id); |
113 | 0 | } |
114 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().commit_restore_job(tablet_id)); |
115 | 0 | tablet->clear_cache(); |
116 | 0 | LOG(INFO) << "success to commit snapshot. [tablet_id=" << tablet_id << "]"; |
117 | 0 | return Status::OK(); |
118 | 0 | } |
119 | | |
120 | 0 | Status CloudSnapshotMgr::release_snapshot(int64_t tablet_id, bool is_completed) { |
121 | 0 | SCOPED_ATTACH_TASK(_mem_tracker); |
122 | 0 | RETURN_IF_ERROR(_engine.meta_mgr().finish_restore_job(tablet_id, is_completed)); |
123 | 0 | LOG(INFO) << "success to release snapshot. [tablet_id=" << tablet_id << "]"; |
124 | 0 | return Status::OK(); |
125 | 0 | } |
126 | | |
127 | | Status CloudSnapshotMgr::convert_rowsets( |
128 | | TabletMetaPB* out, const TabletMetaPB& in, int64_t tablet_id, |
129 | | CloudTabletSPtr& target_tablet, StorageResource& storage_resource, |
130 | 1 | std::unordered_map<std::string, std::string>& file_mapping) { |
131 | 1 | SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_mem_tracker); |
132 | | // deep copy |
133 | 1 | *out = in; |
134 | | |
135 | 1 | out->clear_rs_metas(); |
136 | 1 | out->clear_inc_rs_metas(); |
137 | 1 | out->clear_stale_rs_metas(); |
138 | | // modify id |
139 | 1 | out->set_tablet_id(tablet_id); |
140 | 1 | *out->mutable_tablet_uid() = TabletUid::gen_uid().to_proto(); |
141 | 1 | out->set_table_id(target_tablet->table_id()); |
142 | 1 | out->set_partition_id(target_tablet->partition_id()); |
143 | 1 | out->set_index_id(target_tablet->index_id()); |
144 | 1 | PUniqueId* cooldown_meta_id = out->mutable_cooldown_meta_id(); |
145 | 1 | cooldown_meta_id->set_hi(0); |
146 | 1 | cooldown_meta_id->set_lo(0); |
147 | | |
148 | 1 | TabletSchemaSPtr target_tablet_schema = std::make_shared<TabletSchema>(); |
149 | 1 | target_tablet_schema->copy_from(*target_tablet->tablet_schema()); |
150 | | |
151 | 1 | TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>(); |
152 | 1 | tablet_schema->init_from_pb(in.schema()); |
153 | | |
154 | 1 | std::unordered_map<Version, RowsetMetaPB*, HashOfVersion> rs_version_map; |
155 | 1 | std::unordered_map<RowsetId, RowsetId> rowset_id_mapping; |
156 | 1 | for (auto&& rowset_meta_pb : in.rs_metas()) { |
157 | 1 | RowsetMetaPB* new_rowset_meta_pb = out->add_rs_metas(); |
158 | 1 | RETURN_IF_ERROR(_create_rowset_meta(new_rowset_meta_pb, rowset_meta_pb, tablet_id, |
159 | 1 | target_tablet, storage_resource, tablet_schema, |
160 | 1 | file_mapping, rowset_id_mapping)); |
161 | 1 | if (new_rowset_meta_pb->has_tablet_schema() && |
162 | 1 | new_rowset_meta_pb->tablet_schema().index_size() > 0) { |
163 | 1 | RETURN_IF_ERROR(_rename_index_ids(*new_rowset_meta_pb->mutable_tablet_schema(), |
164 | 1 | target_tablet_schema)); |
165 | 1 | } |
166 | 1 | Version rowset_version = {rowset_meta_pb.start_version(), rowset_meta_pb.end_version()}; |
167 | 1 | rs_version_map[rowset_version] = new_rowset_meta_pb; |
168 | 1 | } |
169 | | |
170 | 1 | if (out->schema().index_size() > 0) { |
171 | 1 | RETURN_IF_ERROR(_rename_index_ids(*out->mutable_schema(), target_tablet_schema)); |
172 | 1 | } |
173 | | |
174 | 1 | if (!rowset_id_mapping.empty() && in.has_delete_bitmap()) { |
175 | 0 | const auto& old_del_bitmap_pb = in.delete_bitmap(); |
176 | 0 | DeleteBitmapPB* new_del_bitmap_pb = out->mutable_delete_bitmap(); |
177 | 0 | const int rst_ids_size = old_del_bitmap_pb.rowset_ids_size(); |
178 | 0 | if (rst_ids_size > 0) { |
179 | 0 | new_del_bitmap_pb->mutable_rowset_ids()->Reserve(rst_ids_size); |
180 | 0 | } |
181 | 0 | LOG(INFO) << "convert delete bitmap rowset_ids. [rowset_ids_size=" << rst_ids_size << "]"; |
182 | 0 | for (size_t i = 0; i < rst_ids_size; ++i) { |
183 | 0 | RowsetId rst_id; |
184 | 0 | rst_id.init(old_del_bitmap_pb.rowset_ids(cast_set<int>(i))); |
185 | 0 | auto it = rowset_id_mapping.find(rst_id); |
186 | | // It should not happen, if we can't convert some rowid in delete bitmap, the |
187 | | // data might be inconsist. |
188 | 0 | CHECK(it != rowset_id_mapping.end()) |
189 | 0 | << "can't find rowset_id " << rst_id.to_string() << " in convert_rowset_ids"; |
190 | 0 | new_del_bitmap_pb->set_rowset_ids(cast_set<int>(i), it->second.to_string()); |
191 | 0 | } |
192 | 0 | } |
193 | | |
194 | 1 | return Status::OK(); |
195 | 1 | } |
196 | | |
197 | | Status CloudSnapshotMgr::_create_rowset_meta( |
198 | | RowsetMetaPB* new_rowset_meta_pb, const RowsetMetaPB& source_meta_pb, |
199 | | int64_t target_tablet_id, CloudTabletSPtr& target_tablet, StorageResource& storage_resource, |
200 | | TabletSchemaSPtr tablet_schema, std::unordered_map<std::string, std::string>& file_mapping, |
201 | 1 | std::unordered_map<RowsetId, RowsetId>& rowset_id_mapping) { |
202 | 1 | RowsetId dst_rs_id = _engine.next_rowset_id(); |
203 | 1 | RowsetWriterContext context; |
204 | 1 | context.rowset_id = dst_rs_id; |
205 | 1 | context.tablet_id = target_tablet_id; |
206 | 1 | context.partition_id = target_tablet->partition_id(); |
207 | 1 | context.index_id = target_tablet->index_id(); |
208 | | // Note: use origin txn id |
209 | 1 | context.txn_id = source_meta_pb.txn_id(); |
210 | 1 | context.txn_expiration = 0; |
211 | 1 | context.rowset_state = source_meta_pb.rowset_state(); |
212 | 1 | context.storage_resource = storage_resource; |
213 | 1 | context.tablet = target_tablet; |
214 | 1 | context.version = {source_meta_pb.start_version(), source_meta_pb.end_version()}; |
215 | 1 | context.segments_overlap = source_meta_pb.segments_overlap_pb(); |
216 | 1 | context.tablet_schema_hash = source_meta_pb.tablet_schema_hash(); |
217 | 1 | if (source_meta_pb.has_tablet_schema()) { |
218 | 1 | context.tablet_schema = std::make_shared<TabletSchema>(); |
219 | 1 | context.tablet_schema->init_from_pb(source_meta_pb.tablet_schema()); |
220 | 1 | } else { |
221 | 0 | context.tablet_schema = tablet_schema; |
222 | 0 | } |
223 | 1 | context.newest_write_timestamp = source_meta_pb.newest_write_timestamp(); |
224 | | |
225 | 1 | auto rs_writer = DORIS_TRY(RowsetFactory::create_rowset_writer(_engine, context, false)); |
226 | 1 | rs_writer->rowset_meta()->to_rowset_pb(new_rowset_meta_pb); |
227 | | |
228 | | // build file mapping |
229 | 1 | RowsetId src_rs_id; |
230 | 1 | if (source_meta_pb.rowset_id() > 0) { |
231 | 0 | src_rs_id.init(source_meta_pb.rowset_id()); |
232 | 1 | } else { |
233 | 1 | src_rs_id.init(source_meta_pb.rowset_id_v2()); |
234 | 1 | } |
235 | 1 | rowset_id_mapping[src_rs_id] = dst_rs_id; |
236 | | |
237 | 4 | for (int i = 0; i < source_meta_pb.num_segments(); ++i) { |
238 | 3 | std::string src_segment_file = fmt::format("{}_{}.dat", src_rs_id.to_string(), i); |
239 | 3 | std::string dst_segment_file = fmt::format("{}_{}.dat", dst_rs_id.to_string(), i); |
240 | 3 | file_mapping[src_segment_file] = dst_segment_file; |
241 | 3 | if (context.tablet_schema->get_inverted_index_storage_format() == |
242 | 3 | InvertedIndexStorageFormatPB::V1) { |
243 | 3 | for (const auto& index : context.tablet_schema->inverted_indexes()) { |
244 | 3 | auto index_id = index->index_id(); |
245 | 3 | std::string src_index_file = InvertedIndexDescriptor::get_index_file_path_v1( |
246 | 3 | InvertedIndexDescriptor::get_index_file_path_prefix(src_segment_file), |
247 | 3 | index_id, index->get_index_suffix()); |
248 | 3 | std::string dst_index_file = InvertedIndexDescriptor::get_index_file_path_v1( |
249 | 3 | InvertedIndexDescriptor::get_index_file_path_prefix(dst_segment_file), |
250 | 3 | index_id, index->get_index_suffix()); |
251 | 3 | file_mapping[src_index_file] = dst_index_file; |
252 | 3 | } |
253 | 3 | } else { |
254 | 0 | if (context.tablet_schema->has_inverted_index() || |
255 | 0 | context.tablet_schema->has_ann_index()) { |
256 | 0 | std::string src_index_file = InvertedIndexDescriptor::get_index_file_path_v2( |
257 | 0 | InvertedIndexDescriptor::get_index_file_path_prefix(src_segment_file)); |
258 | 0 | std::string dst_index_file = InvertedIndexDescriptor::get_index_file_path_v2( |
259 | 0 | InvertedIndexDescriptor::get_index_file_path_prefix(dst_segment_file)); |
260 | 0 | file_mapping[src_index_file] = dst_index_file; |
261 | 0 | } |
262 | 0 | } |
263 | 3 | } |
264 | | |
265 | | // build rowset meta |
266 | 1 | new_rowset_meta_pb->set_num_rows(source_meta_pb.num_rows()); |
267 | 1 | new_rowset_meta_pb->set_total_disk_size(source_meta_pb.total_disk_size()); |
268 | 1 | new_rowset_meta_pb->set_data_disk_size(source_meta_pb.data_disk_size()); |
269 | 1 | new_rowset_meta_pb->set_index_disk_size(source_meta_pb.index_disk_size()); |
270 | 1 | new_rowset_meta_pb->set_empty(source_meta_pb.num_rows() == 0); |
271 | 1 | new_rowset_meta_pb->set_creation_time(time(nullptr)); |
272 | 1 | new_rowset_meta_pb->set_num_segments(source_meta_pb.num_segments()); |
273 | 1 | new_rowset_meta_pb->set_rowset_state(source_meta_pb.rowset_state()); |
274 | | |
275 | 1 | new_rowset_meta_pb->clear_segments_key_bounds(); |
276 | 1 | for (const auto& key_bound : source_meta_pb.segments_key_bounds()) { |
277 | 0 | *new_rowset_meta_pb->add_segments_key_bounds() = key_bound; |
278 | 0 | } |
279 | 1 | if (source_meta_pb.has_delete_predicate()) { |
280 | 0 | DeletePredicatePB* new_delete_condition = new_rowset_meta_pb->mutable_delete_predicate(); |
281 | 0 | *new_delete_condition = source_meta_pb.delete_predicate(); |
282 | 0 | } |
283 | | |
284 | 1 | return Status::OK(); |
285 | 1 | } |
286 | | |
287 | | Status CloudSnapshotMgr::_rename_index_ids(TabletSchemaPB& schema_pb, |
288 | 3 | const TabletSchemaSPtr& tablet_schema) const { |
289 | 3 | if (tablet_schema == nullptr) { |
290 | 0 | return Status::OK(); |
291 | 0 | } |
292 | | |
293 | 9 | for (int i = 0; i < schema_pb.index_size(); ++i) { |
294 | 6 | TabletIndexPB* index_pb = schema_pb.mutable_index(i); |
295 | 6 | for (int32_t col_unique_id : index_pb->col_unique_id()) { |
296 | 6 | auto local_index = tablet_schema->get_index(col_unique_id, index_pb->index_type(), |
297 | 6 | index_pb->index_suffix_name()); |
298 | 6 | if (local_index) { |
299 | 6 | if (index_pb->index_id() != local_index->index_id()) { |
300 | 6 | index_pb->set_index_id(local_index->index_id()); |
301 | 6 | } |
302 | 6 | break; |
303 | 6 | } |
304 | 6 | } |
305 | 6 | } |
306 | 3 | return Status::OK(); |
307 | 3 | } |
308 | | |
309 | | #include "common/compile_check_end.h" |
310 | | } // namespace doris |