be/src/storage/compaction/single_replica_compaction.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "storage/compaction/single_replica_compaction.h" |
19 | | |
20 | | #include <absl/strings/str_split.h> |
21 | | #include <curl/curl.h> |
22 | | #include <gen_cpp/Types_constants.h> |
23 | | #include <gen_cpp/internal_service.pb.h> |
24 | | |
25 | | #include "common/logging.h" |
26 | | #include "common/metrics/doris_metrics.h" |
27 | | #include "io/fs/file_system.h" |
28 | | #include "io/fs/local_file_system.h" |
29 | | #include "io/fs/path.h" |
30 | | #include "runtime/exec_env.h" |
31 | | #include "runtime/memory/mem_tracker_limiter.h" |
32 | | #include "service/brpc.h" |
33 | | #include "service/http/http_client.h" |
34 | | #include "storage/rowset/rowset_factory.h" |
35 | | #include "storage/rowset/rowset_meta.h" |
36 | | #include "storage/snapshot/snapshot_manager.h" |
37 | | #include "storage/storage_engine.h" |
38 | | #include "storage/tablet/tablet_meta.h" |
39 | | #include "storage/task/engine_clone_task.h" |
40 | | #include "util/brpc_client_cache.h" |
41 | | #include "util/client_cache.h" |
42 | | #include "util/security.h" |
43 | | #include "util/thrift_rpc_helper.h" |
44 | | #include "util/trace.h" |
45 | | |
46 | | namespace doris { |
47 | | using namespace ErrorCode; |
48 | | |
49 | | SingleReplicaCompaction::SingleReplicaCompaction(StorageEngine& engine, |
50 | | const TabletSharedPtr& tablet, |
51 | | CompactionType compaction_type) |
52 | 2 | : CompactionMixin(engine, tablet, |
53 | 2 | "SingleReplicaCompaction:" + std::to_string(tablet->tablet_id())), |
54 | 2 | _compaction_type(compaction_type) {} |
55 | | |
56 | 2 | SingleReplicaCompaction::~SingleReplicaCompaction() = default; |
57 | | |
58 | 2 | Status SingleReplicaCompaction::prepare_compact() { |
59 | 2 | VLOG_CRITICAL << _tablet->tablet_id() << " prepare single replcia compaction and pick rowsets!"; |
60 | 2 | if (!tablet()->init_succeeded()) { |
61 | 0 | return Status::Error<CUMULATIVE_INVALID_PARAMETERS, false>("_tablet init failed"); |
62 | 0 | } |
63 | | |
64 | | // Single replica compaction does not require picking _input_rowsets |
65 | | // _input_rowsets depends on the fetched _output_version |
66 | 2 | return Status::OK(); |
67 | 2 | } |
68 | | |
69 | 0 | Status SingleReplicaCompaction::execute_compact() { |
70 | 0 | if (!tablet()->should_fetch_from_peer()) { |
71 | 0 | return Status::Aborted("compaction should be performed locally"); |
72 | 0 | } |
73 | 0 | std::unique_lock<std::mutex> lock_cumu(tablet()->get_cumulative_compaction_lock(), |
74 | 0 | std::try_to_lock); |
75 | 0 | if (!lock_cumu.owns_lock()) { |
76 | 0 | return Status::Error<TRY_LOCK_FAILED, false>( |
77 | 0 | "The tablet is under cumulative compaction. tablet={}", _tablet->tablet_id()); |
78 | 0 | } |
79 | | |
80 | 0 | std::unique_lock<std::mutex> lock_base(tablet()->get_base_compaction_lock(), std::try_to_lock); |
81 | 0 | if (!lock_base.owns_lock()) { |
82 | 0 | return Status::Error<TRY_LOCK_FAILED, false>( |
83 | 0 | "another base compaction is running. tablet={}", _tablet->tablet_id()); |
84 | 0 | } |
85 | | |
86 | 0 | SCOPED_ATTACH_TASK(_mem_tracker); |
87 | | |
88 | | // do single replica compaction |
89 | 0 | RETURN_IF_ERROR(_do_single_replica_compaction()); |
90 | | |
91 | 0 | _state = CompactionState::SUCCESS; |
92 | |
|
93 | 0 | return Status::OK(); |
94 | 0 | } |
95 | | |
96 | 0 | Status SingleReplicaCompaction::_do_single_replica_compaction() { |
97 | 0 | tablet()->data_dir()->disks_compaction_num_increment(1); |
98 | 0 | Status st = _do_single_replica_compaction_impl(); |
99 | 0 | tablet()->data_dir()->disks_compaction_num_increment(-1); |
100 | |
|
101 | 0 | return st; |
102 | 0 | } |
103 | | |
104 | 0 | Status SingleReplicaCompaction::_do_single_replica_compaction_impl() { |
105 | 0 | DBUG_EXECUTE_IF("do_single_compaction_return_ok", { return Status::OK(); }); |
106 | 0 | TReplicaInfo addr; |
107 | 0 | std::string token; |
108 | | // 1. get peer replica info |
109 | 0 | DBUG_EXECUTE_IF("single_compaction_failed_get_peer", |
110 | 0 | { return Status::Aborted("tablet don't have peer replica"); }); |
111 | 0 | if (!_engine.get_peer_replica_info(_tablet->tablet_id(), &addr, &token)) { |
112 | 0 | LOG(WARNING) << _tablet->tablet_id() << " tablet don't have peer replica"; |
113 | 0 | return Status::Aborted("tablet don't have peer replica"); |
114 | 0 | } |
115 | | |
116 | | // 2. get verisons from peer |
117 | 0 | std::vector<Version> peer_versions; |
118 | 0 | RETURN_IF_ERROR(_get_rowset_verisons_from_peer(addr, &peer_versions)); |
119 | | |
120 | 0 | Version proper_version; |
121 | | // 3. find proper version to fetch |
122 | 0 | if (!_find_rowset_to_fetch(peer_versions, &proper_version)) { |
123 | 0 | return Status::Cancelled("no matched versions for single replica compaction"); |
124 | 0 | } |
125 | | |
126 | | // 4. fetch compaction result |
127 | 0 | RETURN_IF_ERROR(_fetch_rowset(addr, token, proper_version)); |
128 | | // 5. modify rowsets in memory |
129 | 0 | RETURN_IF_ERROR(modify_rowsets()); |
130 | | |
131 | | // 6. update last success compaction time |
132 | 0 | if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION) { |
133 | 0 | tablet()->set_last_cumu_compaction_success_time(UnixMillis()); |
134 | 0 | } else if (compaction_type() == ReaderType::READER_BASE_COMPACTION) { |
135 | 0 | tablet()->set_last_base_compaction_success_time(UnixMillis()); |
136 | 0 | } else if (compaction_type() == ReaderType::READER_FULL_COMPACTION) { |
137 | 0 | tablet()->set_last_full_compaction_success_time(UnixMillis()); |
138 | 0 | } |
139 | |
|
140 | 0 | tablet()->set_last_fetched_version(_output_rowset->version()); |
141 | |
|
142 | 0 | int64_t current_max_version = -1; |
143 | 0 | { |
144 | 0 | std::shared_lock rdlock(_tablet->get_header_lock()); |
145 | 0 | if (RowsetSharedPtr max_rowset = _tablet->get_rowset_with_max_version()) { |
146 | 0 | current_max_version = max_rowset->end_version(); |
147 | 0 | } |
148 | 0 | } |
149 | |
|
150 | 0 | LOG(INFO) << "succeed to do single replica compaction" |
151 | 0 | << ". tablet=" << _tablet->tablet_id() << ", output_version=" << _output_version |
152 | 0 | << ", current_max_version=" << current_max_version |
153 | 0 | << ", input_rowsets_data_size=" << _input_rowsets_data_size |
154 | 0 | << ", input_rowsets_index_size=" << _input_rowsets_index_size |
155 | 0 | << ", input_rowsets_total_size=" << _input_rowsets_total_size |
156 | 0 | << ", input_row_num=" << _input_row_num |
157 | 0 | << ", input_segments_num=" << _input_num_segments |
158 | 0 | << ", _input_index_size=" << _input_rowsets_index_size |
159 | 0 | << ", output_rowset_data_size=" << _output_rowset->data_disk_size() |
160 | 0 | << ", output_rowset_index_size=" << _output_rowset->index_disk_size() |
161 | 0 | << ", output_rowset_total_size=" << _output_rowset->total_disk_size() |
162 | 0 | << ", output_row_num=" << _output_rowset->num_rows() |
163 | 0 | << ", output_segments_num=" << _output_rowset->num_segments(); |
164 | 0 | return Status::OK(); |
165 | 0 | } |
166 | | |
167 | | Status SingleReplicaCompaction::_get_rowset_verisons_from_peer( |
168 | 0 | const TReplicaInfo& addr, std::vector<Version>* peer_versions) { |
169 | 0 | DBUG_EXECUTE_IF("single_compaction_failed_get_peer_versions", |
170 | 0 | { return Status::Aborted("tablet failed get peer versions"); }); |
171 | 0 | PGetTabletVersionsRequest request; |
172 | 0 | request.set_tablet_id(_tablet->tablet_id()); |
173 | 0 | PGetTabletVersionsResponse response; |
174 | 0 | std::shared_ptr<PBackendService_Stub> stub = |
175 | 0 | ExecEnv::GetInstance()->brpc_internal_client_cache()->get_client(addr.host, |
176 | 0 | addr.brpc_port); |
177 | 0 | if (stub == nullptr) { |
178 | 0 | return Status::Aborted("get rpc stub failed, host={}, port={}", addr.host, addr.brpc_port); |
179 | 0 | } |
180 | | |
181 | 0 | brpc::Controller cntl; |
182 | 0 | stub->get_tablet_rowset_versions(&cntl, &request, &response, nullptr); |
183 | 0 | if (cntl.Failed()) { |
184 | 0 | return Status::Aborted("open brpc connection failed"); |
185 | 0 | } |
186 | 0 | if (response.status().status_code() != 0) { |
187 | 0 | return Status::Aborted("peer don't have tablet"); |
188 | 0 | } |
189 | 0 | if (response.versions_size() == 0) { |
190 | 0 | return Status::Aborted("no peer version"); |
191 | 0 | } |
192 | 0 | for (int i = 0; i < response.versions_size(); ++i) { |
193 | 0 | (*peer_versions).emplace_back(response.versions(i).first(), response.versions(i).second()); |
194 | 0 | } |
195 | 0 | return Status::OK(); |
196 | 0 | } |
197 | | |
198 | | bool SingleReplicaCompaction::_find_rowset_to_fetch(const std::vector<Version>& peer_versions, |
199 | 2 | Version* proper_version) { |
200 | | // already sorted |
201 | 2 | std::vector<Version> local_versions = tablet()->get_all_local_versions(); |
202 | 33 | for (const auto& v : local_versions) { |
203 | 33 | VLOG_CRITICAL << _tablet->tablet_id() << " tablet local version: " << v.first << " - " |
204 | 0 | << v.second; |
205 | 33 | } |
206 | 4 | for (const auto& v : peer_versions) { |
207 | 4 | VLOG_CRITICAL << _tablet->tablet_id() << " tablet peer version: " << v.first << " - " |
208 | 0 | << v.second; |
209 | 4 | } |
210 | | |
211 | 2 | bool find = false; |
212 | 2 | int index_peer = 0; |
213 | 2 | int index_local = 0; |
214 | | // peer_versions [0-0] [1-1] [2-2] [3-5] [6-7] |
215 | | // local_versions [0-0] [1-1] [2-2] [3-3] [4-4] [5-5] [6-7] |
216 | | // return output_version [3-5] |
217 | | // 1: skip same versions |
218 | 2 | while (index_local < local_versions.size() && index_peer < peer_versions.size()) { |
219 | 2 | if (peer_versions[index_peer].first == local_versions[index_local].first && |
220 | 2 | peer_versions[index_peer].second == local_versions[index_local].second) { |
221 | 0 | ++index_peer; |
222 | 0 | ++index_local; |
223 | 0 | continue; |
224 | 0 | } |
225 | 2 | break; |
226 | 2 | } |
227 | 2 | if (index_peer >= peer_versions.size() || index_local >= local_versions.size()) { |
228 | 0 | return false; |
229 | 0 | } |
230 | | // 2: first match |
231 | 2 | if (peer_versions[index_peer].first != local_versions[index_local].first) { |
232 | 1 | return false; |
233 | 1 | } |
234 | | // 3: second match |
235 | 1 | if (peer_versions[index_peer].contains(local_versions[index_local])) { |
236 | 1 | ++index_local; |
237 | 32 | while (index_local < local_versions.size()) { |
238 | 31 | if (peer_versions[index_peer].contains(local_versions[index_local])) { |
239 | 31 | ++index_local; |
240 | 31 | continue; |
241 | 31 | } |
242 | 0 | break; |
243 | 31 | } |
244 | 1 | --index_local; |
245 | 1 | if (local_versions[index_local].second == peer_versions[index_peer].second) { |
246 | 1 | *proper_version = peer_versions[index_peer]; |
247 | 1 | find = true; |
248 | 1 | } |
249 | 1 | } |
250 | 1 | if (find) { |
251 | | // 4. reset input rowsets |
252 | 1 | _input_rowsets.clear(); |
253 | 32 | tablet()->traverse_rowsets([this, &proper_version](const auto& rs) { |
254 | | // only need rowset in proper_version |
255 | 32 | if (rs->is_local() && proper_version->contains(rs->version())) { |
256 | 32 | this->_input_rowsets.emplace_back(rs); |
257 | 32 | } |
258 | 32 | }); |
259 | 1 | std::sort(_input_rowsets.begin(), _input_rowsets.end(), Rowset::comparator); |
260 | 1 | DCHECK_EQ(_input_rowsets.front()->start_version(), proper_version->first); |
261 | 1 | DCHECK_EQ(_input_rowsets.back()->end_version(), proper_version->second); |
262 | 1 | if (_input_rowsets.front()->start_version() != proper_version->first || |
263 | 1 | _input_rowsets.back()->end_version() != proper_version->second) { |
264 | 0 | LOG(WARNING) << fmt::format( |
265 | 0 | "single compaction input rowsets error, tablet_id={}, input rowset = [{}-{}], " |
266 | 0 | "remote rowset = {}", |
267 | 0 | tablet()->tablet_id(), _input_rowsets.front()->start_version(), |
268 | 0 | _input_rowsets.back()->end_version(), proper_version->to_string()); |
269 | 0 | return false; |
270 | 0 | } |
271 | 32 | for (auto& rowset : _input_rowsets) { |
272 | 32 | _input_rowsets_data_size += rowset->data_disk_size(); |
273 | 32 | _input_row_num += rowset->num_rows(); |
274 | 32 | _input_num_segments += rowset->num_segments(); |
275 | 32 | _input_rowsets_index_size += rowset->index_disk_size(); |
276 | 32 | _input_rowsets_total_size += rowset->data_disk_size() + rowset->index_disk_size(); |
277 | 32 | } |
278 | 1 | _output_version = *proper_version; |
279 | 1 | } |
280 | 1 | return find; |
281 | 1 | } |
282 | | |
283 | | Status SingleReplicaCompaction::_fetch_rowset(const TReplicaInfo& addr, const std::string& token, |
284 | 0 | const Version& rowset_version) { |
285 | 0 | LOG(INFO) << "begin to fetch compaction result, tablet_id=" << _tablet->tablet_id() |
286 | 0 | << ", addr=" << addr.host << ", version=" << rowset_version; |
287 | 0 | std::shared_lock migration_rlock(tablet()->get_migration_lock(), std::try_to_lock); |
288 | 0 | if (!migration_rlock.owns_lock()) { |
289 | 0 | return Status::Error<TRY_LOCK_FAILED, false>("got migration_rlock failed. tablet={}", |
290 | 0 | _tablet->tablet_id()); |
291 | 0 | } |
292 | | |
293 | 0 | std::string local_data_path = tablet()->tablet_path() + CLONE_PREFIX; |
294 | 0 | std::string local_path = local_data_path + "/"; |
295 | 0 | std::string snapshot_path; |
296 | 0 | int timeout_s = 0; |
297 | | // 1: make snapshot |
298 | 0 | RETURN_IF_ERROR(_make_snapshot(addr.host, addr.be_port, _tablet->tablet_id(), |
299 | 0 | _tablet->schema_hash(), timeout_s, rowset_version, |
300 | 0 | &snapshot_path)); |
301 | 0 | Defer defer {[&, this] { |
302 | | // TODO(plat1ko): Async release snapshot |
303 | 0 | auto st = _release_snapshot(addr.host, addr.be_port, snapshot_path); |
304 | 0 | if (!st.ok()) [[unlikely]] { |
305 | 0 | LOG_WARNING("failed to release snapshot in remote BE") |
306 | 0 | .tag("host", addr.host) |
307 | 0 | .tag("port", addr.be_port) |
308 | 0 | .tag("snapshot_path", snapshot_path) |
309 | 0 | .error(st); |
310 | 0 | } |
311 | 0 | }}; |
312 | | // 2: download snapshot |
313 | 0 | std::string remote_url_prefix; |
314 | 0 | { |
315 | 0 | std::stringstream ss; |
316 | 0 | ss << "http://" << addr.host << ":" << addr.http_port << HTTP_REQUEST_PREFIX |
317 | 0 | << HTTP_REQUEST_TOKEN_PARAM << token << HTTP_REQUEST_FILE_PARAM << snapshot_path << "/" |
318 | 0 | << _tablet->tablet_id() << "/" << _tablet->schema_hash() << "/"; |
319 | 0 | remote_url_prefix = ss.str(); |
320 | 0 | } |
321 | 0 | RETURN_IF_ERROR(_download_files(tablet()->data_dir(), remote_url_prefix, local_path)); |
322 | 0 | _pending_rs_guards = DORIS_TRY(_engine.snapshot_mgr()->convert_rowset_ids( |
323 | 0 | local_path, _tablet->tablet_id(), tablet()->replica_id(), _tablet->table_id(), |
324 | 0 | _tablet->partition_id(), _tablet->schema_hash())); |
325 | | // 4: finish_clone: create output_rowset and link file |
326 | 0 | return _finish_clone(local_data_path, rowset_version); |
327 | 0 | } |
328 | | |
329 | | Status SingleReplicaCompaction::_make_snapshot(const std::string& ip, int port, TTableId tablet_id, |
330 | | TSchemaHash schema_hash, int timeout_s, |
331 | 0 | const Version& version, std::string* snapshot_path) { |
332 | 0 | VLOG_NOTICE << "single replica compaction make snapshot ip=" << ip |
333 | 0 | << ", tablet_id=" << tablet_id; |
334 | 0 | TSnapshotRequest request; |
335 | 0 | request.__set_tablet_id(tablet_id); |
336 | 0 | request.__set_schema_hash(schema_hash); |
337 | 0 | request.__set_preferred_snapshot_version(g_Types_constants.TPREFER_SNAPSHOT_REQ_VERSION); |
338 | 0 | request.__set_start_version(version.first); |
339 | 0 | request.__set_end_version(version.second); |
340 | |
|
341 | 0 | if (timeout_s > 0) { |
342 | 0 | request.__set_timeout(timeout_s); |
343 | 0 | } |
344 | |
|
345 | 0 | TAgentResult result; |
346 | 0 | RETURN_IF_ERROR(ThriftRpcHelper::rpc<BackendServiceClient>( |
347 | 0 | ip, port, [&request, &result](BackendServiceConnection& client) { |
348 | 0 | client->make_snapshot(result, request); |
349 | 0 | })); |
350 | 0 | if (result.status.status_code != TStatusCode::OK) { |
351 | 0 | return Status::create(result.status); |
352 | 0 | } |
353 | | |
354 | 0 | if (result.__isset.snapshot_path) { |
355 | 0 | *snapshot_path = result.snapshot_path; |
356 | 0 | if (snapshot_path->at(snapshot_path->length() - 1) != '/') { |
357 | 0 | snapshot_path->append("/"); |
358 | 0 | } |
359 | 0 | DBUG_EXECUTE_IF("single_compaction_failed_make_snapshot", |
360 | 0 | { return Status::InternalError("failed snapshot"); }); |
361 | 0 | } else { |
362 | 0 | return Status::InternalError("success snapshot without snapshot path"); |
363 | 0 | } |
364 | 0 | return Status::OK(); |
365 | 0 | } |
366 | | |
367 | | Status SingleReplicaCompaction::_download_files(DataDir* data_dir, |
368 | | const std::string& remote_url_prefix, |
369 | 0 | const std::string& local_path) { |
370 | | // Check local path exist, if exist, remove it, then create the dir |
371 | | // local_file_full_path = tabletid/clone, for a specific tablet, there should be only one folder |
372 | | // if this folder exists, then should remove it |
373 | | // for example, BE clone from BE 1 to download file 1 with version (2,2), but clone from BE 1 failed |
374 | | // then it will try to clone from BE 2, but it will find the file 1 already exist, but file 1 with same |
375 | | // name may have different versions. |
376 | 0 | VLOG_DEBUG << "single replica compaction begin to download files, remote path=" |
377 | 0 | << mask_token(remote_url_prefix) << " local_path=" << local_path; |
378 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(local_path)); |
379 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(local_path)); |
380 | | |
381 | | // Get remote dir file list |
382 | 0 | std::string file_list_str; |
383 | 0 | auto list_files_cb = [&remote_url_prefix, &file_list_str](HttpClient* client) { |
384 | 0 | RETURN_IF_ERROR(client->init(remote_url_prefix)); |
385 | 0 | client->set_timeout_ms(LIST_REMOTE_FILE_TIMEOUT * 1000); |
386 | 0 | RETURN_IF_ERROR(client->execute(&file_list_str)); |
387 | 0 | return Status::OK(); |
388 | 0 | }; |
389 | 0 | RETURN_IF_ERROR(HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, list_files_cb)); |
390 | 0 | std::vector<std::string> file_name_list = |
391 | 0 | absl::StrSplit(file_list_str, "\n", absl::SkipWhitespace()); |
392 | | |
393 | | // If the header file is not exist, the table couldn't loaded by olap engine. |
394 | | // Avoid of data is not complete, we copy the header file at last. |
395 | | // The header file's name is end of .hdr. |
396 | 0 | for (int i = 0; i < file_name_list.size() - 1; ++i) { |
397 | 0 | if (file_name_list[i].ends_with(".hdr")) { |
398 | 0 | std::swap(file_name_list[i], file_name_list[file_name_list.size() - 1]); |
399 | 0 | break; |
400 | 0 | } |
401 | 0 | } |
402 | | |
403 | | // Get copy from remote |
404 | 0 | uint64_t total_file_size = 0; |
405 | 0 | MonotonicStopWatch watch; |
406 | 0 | watch.start(); |
407 | 0 | auto curl = std::unique_ptr<CURL, decltype(&curl_easy_cleanup)>(curl_easy_init(), |
408 | 0 | &curl_easy_cleanup); |
409 | 0 | if (!curl) { |
410 | 0 | return Status::InternalError("single compaction init curl failed"); |
411 | 0 | } |
412 | 0 | for (auto& file_name : file_name_list) { |
413 | 0 | auto remote_file_url = remote_url_prefix + file_name; |
414 | | |
415 | | // get file length |
416 | 0 | uint64_t file_size = 0; |
417 | 0 | auto get_file_size_cb = [&remote_file_url, &file_size](HttpClient* client) { |
418 | 0 | RETURN_IF_ERROR(client->init(remote_file_url)); |
419 | 0 | client->set_timeout_ms(GET_LENGTH_TIMEOUT * 1000); |
420 | 0 | RETURN_IF_ERROR(client->head()); |
421 | 0 | RETURN_IF_ERROR(client->get_content_length(&file_size)); |
422 | 0 | return Status::OK(); |
423 | 0 | }; |
424 | 0 | RETURN_IF_ERROR( |
425 | 0 | HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, get_file_size_cb)); |
426 | | // check disk capacity |
427 | 0 | if (data_dir->reach_capacity_limit(file_size)) { |
428 | 0 | return Status::Error<EXCEEDED_LIMIT>( |
429 | 0 | "reach the capacity limit of path {}, file_size={}", data_dir->path(), |
430 | 0 | file_size); |
431 | 0 | } |
432 | | |
433 | 0 | total_file_size += file_size; |
434 | 0 | uint64_t estimate_timeout = file_size / config::download_low_speed_limit_kbps / 1024; |
435 | 0 | if (estimate_timeout < config::download_low_speed_time) { |
436 | 0 | estimate_timeout = config::download_low_speed_time; |
437 | 0 | } |
438 | |
|
439 | 0 | std::string local_file_path = local_path + file_name; |
440 | |
|
441 | 0 | LOG(INFO) << "single replica compaction begin to download file from: " |
442 | 0 | << mask_token(remote_file_url) << " to: " << local_file_path |
443 | 0 | << ". size(B): " << file_size << ", timeout(s): " << estimate_timeout; |
444 | |
|
445 | 0 | auto download_cb = [&remote_file_url, estimate_timeout, &local_file_path, |
446 | 0 | file_size](HttpClient* client) { |
447 | 0 | RETURN_IF_ERROR(client->init(remote_file_url)); |
448 | 0 | client->set_timeout_ms(estimate_timeout * 1000); |
449 | 0 | RETURN_IF_ERROR(client->download(local_file_path)); |
450 | | |
451 | 0 | DBUG_EXECUTE_IF("single_compaction_failed_download_file", |
452 | 0 | { return Status::InternalError("failed to download file"); }); |
453 | | // Check file length |
454 | 0 | uint64_t local_file_size = std::filesystem::file_size(local_file_path); |
455 | 0 | if (local_file_size != file_size) { |
456 | 0 | LOG(WARNING) << "download file length error" |
457 | 0 | << ", remote_path=" << mask_token(remote_file_url) |
458 | 0 | << ", file_size=" << file_size |
459 | 0 | << ", local_file_size=" << local_file_size; |
460 | 0 | return Status::InternalError("downloaded file size is not equal"); |
461 | 0 | } |
462 | 0 | return io::global_local_filesystem()->permission(local_file_path, |
463 | 0 | io::LocalFileSystem::PERMS_OWNER_RW); |
464 | 0 | }; |
465 | 0 | RETURN_IF_ERROR(HttpClient::execute_with_retry(DOWNLOAD_FILE_MAX_RETRY, 1, download_cb)); |
466 | 0 | } // Clone files from remote backend |
467 | | |
468 | 0 | uint64_t total_time_ms = watch.elapsed_time() / 1000 / 1000; |
469 | 0 | total_time_ms = total_time_ms > 0 ? total_time_ms : 0; |
470 | 0 | double copy_rate = 0.0; |
471 | 0 | if (total_time_ms > 0) { |
472 | 0 | copy_rate = total_file_size / ((double)total_time_ms) / 1000; |
473 | 0 | } |
474 | 0 | LOG(INFO) << "succeed to single replica compaction copy tablet " << _tablet->tablet_id() |
475 | 0 | << ", total file size: " << total_file_size << " B" |
476 | 0 | << ", cost: " << total_time_ms << " ms" |
477 | 0 | << ", rate: " << copy_rate << " MB/s"; |
478 | 0 | return Status::OK(); |
479 | 0 | } |
480 | | |
481 | | Status SingleReplicaCompaction::_release_snapshot(const std::string& ip, int port, |
482 | 0 | const std::string& snapshot_path) { |
483 | 0 | TAgentResult result; |
484 | 0 | RETURN_IF_ERROR(ThriftRpcHelper::rpc<BackendServiceClient>( |
485 | 0 | ip, port, [&snapshot_path, &result](BackendServiceConnection& client) { |
486 | 0 | client->release_snapshot(result, snapshot_path); |
487 | 0 | })); |
488 | 0 | return Status::create(result.status); |
489 | 0 | } |
490 | | |
491 | | Status SingleReplicaCompaction::_finish_clone(const std::string& clone_dir, |
492 | 0 | const Version& output_version) { |
493 | 0 | Status res = Status::OK(); |
494 | 0 | std::vector<std::string> linked_success_files; |
495 | 0 | { |
496 | 0 | do { |
497 | | // check clone dir existed |
498 | 0 | bool exists = true; |
499 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(clone_dir, &exists)); |
500 | 0 | if (!exists) { |
501 | 0 | return Status::InternalError("clone dir not existed. clone_dir={}", clone_dir); |
502 | 0 | } |
503 | | |
504 | | // Load src header. |
505 | | // The tablet meta info is downloaded from source BE as .hdr file. |
506 | | // So we load it and generate cloned_tablet_meta. |
507 | 0 | auto cloned_tablet_meta_file = |
508 | 0 | fmt::format("{}/{}.hdr", clone_dir, _tablet->tablet_id()); |
509 | 0 | auto cloned_tablet_meta = std::make_shared<TabletMeta>(); |
510 | 0 | RETURN_IF_ERROR(cloned_tablet_meta->create_from_file(cloned_tablet_meta_file)); |
511 | | |
512 | | // remove the cloned meta file |
513 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(cloned_tablet_meta_file)); |
514 | | |
515 | 0 | RowsetMetaSharedPtr output_rs_meta = |
516 | 0 | cloned_tablet_meta->acquire_rs_meta_by_version(output_version); |
517 | 0 | if (output_rs_meta == nullptr) { |
518 | 0 | LOG(WARNING) << "version not found in cloned tablet meta when do single compaction"; |
519 | 0 | return Status::InternalError("version not found in cloned tablet meta"); |
520 | 0 | } |
521 | 0 | res = tablet()->create_rowset(output_rs_meta, &_output_rowset); |
522 | 0 | if (!res.ok()) { |
523 | 0 | LOG(WARNING) << "fail to init rowset. version=" << output_version; |
524 | 0 | return res; |
525 | 0 | } |
526 | | |
527 | | // check all files in /clone and /tablet |
528 | 0 | std::vector<io::FileInfo> clone_files; |
529 | 0 | RETURN_IF_ERROR( |
530 | 0 | io::global_local_filesystem()->list(clone_dir, true, &clone_files, &exists)); |
531 | 0 | std::unordered_set<std::string> clone_file_names; |
532 | 0 | for (auto& file : clone_files) { |
533 | 0 | clone_file_names.insert(file.file_name); |
534 | 0 | } |
535 | |
|
536 | 0 | std::vector<io::FileInfo> local_files; |
537 | 0 | const auto& tablet_dir = tablet()->tablet_path(); |
538 | 0 | RETURN_IF_ERROR( |
539 | 0 | io::global_local_filesystem()->list(tablet_dir, true, &local_files, &exists)); |
540 | 0 | std::unordered_set<std::string> local_file_names; |
541 | 0 | for (auto& file : local_files) { |
542 | 0 | local_file_names.insert(file.file_name); |
543 | 0 | } |
544 | | |
545 | | /// Traverse all downloaded clone files in CLONE dir. |
546 | | /// If it does not exist in local tablet dir, link the file to local tablet dir |
547 | | /// And save all linked files in linked_success_files. |
548 | 0 | for (const std::string& clone_file : clone_file_names) { |
549 | 0 | if (local_file_names.find(clone_file) != local_file_names.end()) { |
550 | 0 | VLOG_NOTICE << "find same file when clone, skip it. " |
551 | 0 | << "tablet=" << _tablet->tablet_id() |
552 | 0 | << ", clone_file=" << clone_file; |
553 | 0 | continue; |
554 | 0 | } |
555 | | |
556 | 0 | auto from = fmt::format("{}/{}", clone_dir, clone_file); |
557 | 0 | auto to = fmt::format("{}/{}", tablet_dir, clone_file); |
558 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->link_file(from, to)); |
559 | 0 | linked_success_files.emplace_back(std::move(to)); |
560 | 0 | } |
561 | | |
562 | 0 | if (!res.ok()) { |
563 | 0 | break; |
564 | 0 | } |
565 | 0 | } while (false); |
566 | | |
567 | | // clear linked files if errors happen |
568 | 0 | if (!res.ok()) { |
569 | 0 | std::vector<io::Path> paths; |
570 | 0 | for (auto& file : linked_success_files) { |
571 | 0 | paths.emplace_back(file); |
572 | 0 | } |
573 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->batch_delete(paths)); |
574 | 0 | } |
575 | 0 | } |
576 | | // clear clone dir |
577 | 0 | std::filesystem::path clone_dir_path(clone_dir); |
578 | 0 | std::error_code ec; |
579 | 0 | std::filesystem::remove_all(clone_dir_path, ec); |
580 | 0 | if (ec) { |
581 | 0 | LOG(WARNING) << "failed to remove=" << clone_dir_path << " msg=" << ec.message(); |
582 | 0 | return Status::IOError("failed to remove {}, due to {}", clone_dir, ec.message()); |
583 | 0 | } |
584 | 0 | LOG(INFO) << "finish to clone data, clear downloaded data. res=" << res |
585 | 0 | << ", tablet=" << _tablet->tablet_id() << ", clone_dir=" << clone_dir; |
586 | 0 | return res; |
587 | 0 | } |
588 | | |
589 | | } // namespace doris |