/root/doris/be/src/olap/data_dir.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/data_dir.h" |
19 | | |
20 | | #include <fmt/core.h> |
21 | | #include <fmt/format.h> |
22 | | #include <gen_cpp/FrontendService_types.h> |
23 | | #include <gen_cpp/Types_types.h> |
24 | | #include <gen_cpp/olap_file.pb.h> |
25 | | |
26 | | #include <atomic> |
27 | | #include <cstdio> |
28 | | // IWYU pragma: no_include <bits/chrono.h> |
29 | | #include <chrono> // IWYU pragma: keep |
30 | | #include <cstddef> |
31 | | #include <filesystem> |
32 | | #include <memory> |
33 | | #include <new> |
34 | | #include <roaring/roaring.hh> |
35 | | #include <set> |
36 | | #include <sstream> |
37 | | #include <string> |
38 | | #include <thread> |
39 | | #include <utility> |
40 | | |
41 | | #include "common/cast_set.h" |
42 | | #include "common/config.h" |
43 | | #include "common/logging.h" |
44 | | #include "io/fs/file_reader.h" |
45 | | #include "io/fs/file_writer.h" |
46 | | #include "io/fs/local_file_system.h" |
47 | | #include "io/fs/path.h" |
48 | | #include "olap/delete_handler.h" |
49 | | #include "olap/olap_common.h" |
50 | | #include "olap/olap_define.h" |
51 | | #include "olap/olap_meta.h" |
52 | | #include "olap/rowset/beta_rowset.h" |
53 | | #include "olap/rowset/pending_rowset_helper.h" |
54 | | #include "olap/rowset/rowset.h" |
55 | | #include "olap/rowset/rowset_id_generator.h" |
56 | | #include "olap/rowset/rowset_meta.h" |
57 | | #include "olap/rowset/rowset_meta_manager.h" |
58 | | #include "olap/storage_engine.h" |
59 | | #include "olap/storage_policy.h" |
60 | | #include "olap/tablet.h" |
61 | | #include "olap/tablet_manager.h" |
62 | | #include "olap/tablet_meta_manager.h" |
63 | | #include "olap/txn_manager.h" |
64 | | #include "olap/utils.h" // for check_dir_existed |
65 | | #include "service/backend_options.h" |
66 | | #include "util/doris_metrics.h" |
67 | | #include "util/string_util.h" |
68 | | #include "util/uid_util.h" |
69 | | |
70 | | namespace doris { |
71 | | #include "common/compile_check_begin.h" |
72 | | using namespace ErrorCode; |
73 | | |
74 | | namespace { |
75 | | |
76 | 72 | Status read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id) { |
77 | 72 | bool exists = false; |
78 | 72 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(cluster_id_path, &exists)); |
79 | 72 | *cluster_id = -1; |
80 | 72 | if (exists) { |
81 | 0 | io::FileReaderSPtr reader; |
82 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->open_file(cluster_id_path, &reader)); |
83 | 0 | size_t fsize = reader->size(); |
84 | 0 | if (fsize > 0) { |
85 | 0 | std::string content; |
86 | 0 | content.resize(fsize, '\0'); |
87 | 0 | size_t bytes_read = 0; |
88 | 0 | RETURN_IF_ERROR(reader->read_at(0, {content.data(), fsize}, &bytes_read)); |
89 | 0 | DCHECK_EQ(fsize, bytes_read); |
90 | 0 | *cluster_id = std::stoi(content); |
91 | 0 | } |
92 | 0 | } |
93 | 72 | return Status::OK(); |
94 | 72 | } |
95 | | |
96 | 0 | Status _write_cluster_id_to_path(const std::string& path, int32_t cluster_id) { |
97 | 0 | bool exists = false; |
98 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists)); |
99 | 0 | if (!exists) { |
100 | 0 | io::FileWriterPtr file_writer; |
101 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_file(path, &file_writer)); |
102 | 0 | RETURN_IF_ERROR(file_writer->append(std::to_string(cluster_id))); |
103 | 0 | RETURN_IF_ERROR(file_writer->close()); |
104 | 0 | } |
105 | 0 | return Status::OK(); |
106 | 0 | } |
107 | | |
108 | | } // namespace |
109 | | |
110 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_total_capacity, MetricUnit::BYTES); |
111 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_avail_capacity, MetricUnit::BYTES); |
112 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_local_used_capacity, MetricUnit::BYTES); |
113 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_remote_used_capacity, MetricUnit::BYTES); |
114 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_trash_used_capacity, MetricUnit::BYTES); |
115 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_state, MetricUnit::BYTES); |
116 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_compaction_score, MetricUnit::NOUNIT); |
117 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_compaction_num, MetricUnit::NOUNIT); |
118 | | |
119 | | DataDir::DataDir(StorageEngine& engine, const std::string& path, int64_t capacity_bytes, |
120 | | TStorageMedium::type storage_medium) |
121 | 192 | : _engine(engine), |
122 | 192 | _path(path), |
123 | 192 | _available_bytes(0), |
124 | 192 | _disk_capacity_bytes(0), |
125 | 192 | _trash_used_bytes(0), |
126 | 192 | _storage_medium(storage_medium), |
127 | 192 | _is_used(false), |
128 | 192 | _cluster_id(-1), |
129 | 192 | _to_be_deleted(false) { |
130 | 192 | _data_dir_metric_entity = DorisMetrics::instance()->metric_registry()->register_entity( |
131 | 192 | std::string("data_dir.") + path, {{"path", path}}); |
132 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_total_capacity); |
133 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_avail_capacity); |
134 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_local_used_capacity); |
135 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_remote_used_capacity); |
136 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_trash_used_capacity); |
137 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_state); |
138 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_compaction_score); |
139 | 192 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_compaction_num); |
140 | 192 | } |
141 | | |
142 | 192 | DataDir::~DataDir() { |
143 | 192 | DorisMetrics::instance()->metric_registry()->deregister_entity(_data_dir_metric_entity); |
144 | 192 | delete _meta; |
145 | 192 | } |
146 | | |
147 | 72 | Status DataDir::init(bool init_meta) { |
148 | 72 | bool exists = false; |
149 | 72 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(_path, &exists)); |
150 | 72 | if (!exists) { |
151 | 0 | RETURN_NOT_OK_STATUS_WITH_WARN(Status::IOError("opendir failed, path={}", _path), |
152 | 0 | "check file exist failed"); |
153 | 0 | } |
154 | | |
155 | 72 | RETURN_NOT_OK_STATUS_WITH_WARN(update_capacity(), "update_capacity failed"); |
156 | 72 | RETURN_NOT_OK_STATUS_WITH_WARN(_init_cluster_id(), "_init_cluster_id failed"); |
157 | 72 | RETURN_NOT_OK_STATUS_WITH_WARN(_init_capacity_and_create_shards(), |
158 | 72 | "_init_capacity_and_create_shards failed"); |
159 | 72 | if (init_meta) { |
160 | 72 | RETURN_NOT_OK_STATUS_WITH_WARN(_init_meta(), "_init_meta failed"); |
161 | 72 | } |
162 | | |
163 | 72 | _is_used = true; |
164 | 72 | return Status::OK(); |
165 | 72 | } |
166 | | |
167 | 42 | void DataDir::stop_bg_worker() { |
168 | 42 | _stop_bg_worker = true; |
169 | 42 | } |
170 | | |
171 | 72 | Status DataDir::_init_cluster_id() { |
172 | 72 | auto cluster_id_path = fmt::format("{}/{}", _path, CLUSTER_ID_PREFIX); |
173 | 72 | RETURN_IF_ERROR(read_cluster_id(cluster_id_path, &_cluster_id)); |
174 | 72 | if (_cluster_id == -1) { |
175 | 72 | _cluster_id_incomplete = true; |
176 | 72 | } |
177 | 72 | return Status::OK(); |
178 | 72 | } |
179 | | |
180 | 72 | Status DataDir::_init_capacity_and_create_shards() { |
181 | 72 | RETURN_IF_ERROR(io::global_local_filesystem()->get_space_info(_path, &_disk_capacity_bytes, |
182 | 72 | &_available_bytes)); |
183 | 72 | auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX); |
184 | 72 | bool exists = false; |
185 | 72 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(data_path, &exists)); |
186 | 72 | if (!exists) { |
187 | 72 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(data_path)); |
188 | 72 | } |
189 | 73.7k | for (int i = 0; i < MAX_SHARD_NUM; ++i) { |
190 | 73.7k | auto shard_path = fmt::format("{}/{}", data_path, i); |
191 | 73.7k | RETURN_IF_ERROR(io::global_local_filesystem()->exists(shard_path, &exists)); |
192 | 73.7k | if (!exists) { |
193 | 73.7k | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(shard_path)); |
194 | 73.7k | } |
195 | 73.7k | } |
196 | | |
197 | 72 | return Status::OK(); |
198 | 72 | } |
199 | | |
200 | 73 | Status DataDir::_init_meta() { |
201 | | // init path hash |
202 | 73 | _path_hash = hash_of_path(BackendOptions::get_localhost(), _path); |
203 | 73 | LOG(INFO) << "path: " << _path << ", hash: " << _path_hash; |
204 | | |
205 | | // init meta |
206 | 73 | _meta = new (std::nothrow) OlapMeta(_path); |
207 | 73 | if (_meta == nullptr) { |
208 | 0 | RETURN_NOT_OK_STATUS_WITH_WARN( |
209 | 0 | Status::MemoryAllocFailed("allocate memory for OlapMeta failed"), |
210 | 0 | "new OlapMeta failed"); |
211 | 0 | } |
212 | 73 | Status res = _meta->init(); |
213 | 73 | if (!res.ok()) { |
214 | 0 | RETURN_NOT_OK_STATUS_WITH_WARN(Status::IOError("open rocksdb failed, path={}", _path), |
215 | 0 | "init OlapMeta failed"); |
216 | 0 | } |
217 | 73 | return Status::OK(); |
218 | 73 | } |
219 | | |
220 | 0 | Status DataDir::set_cluster_id(int32_t cluster_id) { |
221 | 0 | if (_cluster_id != -1 && _cluster_id != cluster_id) { |
222 | 0 | LOG(ERROR) << "going to set cluster id to already assigned store, cluster_id=" |
223 | 0 | << _cluster_id << ", new_cluster_id=" << cluster_id; |
224 | 0 | return Status::InternalError("going to set cluster id to already assigned store"); |
225 | 0 | } |
226 | 0 | if (!_cluster_id_incomplete) { |
227 | 0 | return Status::OK(); |
228 | 0 | } |
229 | 0 | auto cluster_id_path = fmt::format("{}/{}", _path, CLUSTER_ID_PREFIX); |
230 | 0 | return _write_cluster_id_to_path(cluster_id_path, cluster_id); |
231 | 0 | } |
232 | | |
233 | 0 | void DataDir::health_check() { |
234 | | // check disk |
235 | 0 | if (_is_used) { |
236 | 0 | Status res = _read_and_write_test_file(); |
237 | 0 | if (!res && res.is<IO_ERROR>()) { |
238 | 0 | LOG(WARNING) << "store read/write test file occur IO Error. path=" << _path |
239 | 0 | << ", err: " << res; |
240 | 0 | _engine.add_broken_path(_path); |
241 | 0 | _is_used = !res.is<IO_ERROR>(); |
242 | 0 | } |
243 | 0 | } |
244 | 0 | disks_state->set_value(_is_used ? 1 : 0); |
245 | 0 | } |
246 | | |
247 | 0 | Status DataDir::_read_and_write_test_file() { |
248 | 0 | auto test_file = fmt::format("{}/{}", _path, kTestFilePath); |
249 | 0 | return read_write_test_file(test_file); |
250 | 0 | } |
251 | | |
252 | 304 | void DataDir::register_tablet(Tablet* tablet) { |
253 | 304 | TabletInfo tablet_info(tablet->tablet_id(), tablet->tablet_uid()); |
254 | | |
255 | 304 | std::lock_guard<std::mutex> l(_mutex); |
256 | 304 | _tablet_set.emplace(std::move(tablet_info)); |
257 | 304 | } |
258 | | |
259 | 255 | void DataDir::deregister_tablet(Tablet* tablet) { |
260 | 255 | TabletInfo tablet_info(tablet->tablet_id(), tablet->tablet_uid()); |
261 | | |
262 | 255 | std::lock_guard<std::mutex> l(_mutex); |
263 | 255 | _tablet_set.erase(tablet_info); |
264 | 255 | } |
265 | | |
266 | 0 | void DataDir::clear_tablets(std::vector<TabletInfo>* tablet_infos) { |
267 | 0 | std::lock_guard<std::mutex> l(_mutex); |
268 | |
|
269 | 0 | tablet_infos->insert(tablet_infos->end(), _tablet_set.begin(), _tablet_set.end()); |
270 | 0 | _tablet_set.clear(); |
271 | 0 | } |
272 | | |
273 | 0 | std::string DataDir::get_absolute_shard_path(int64_t shard_id) { |
274 | 0 | return fmt::format("{}/{}/{}", _path, DATA_PREFIX, shard_id); |
275 | 0 | } |
276 | | |
277 | | std::string DataDir::get_absolute_tablet_path(int64_t shard_id, int64_t tablet_id, |
278 | 0 | int32_t schema_hash) { |
279 | 0 | return fmt::format("{}/{}/{}", get_absolute_shard_path(shard_id), tablet_id, schema_hash); |
280 | 0 | } |
281 | | |
282 | 0 | void DataDir::find_tablet_in_trash(int64_t tablet_id, std::vector<std::string>* paths) { |
283 | | // path: /root_path/trash/time_label/tablet_id/schema_hash |
284 | 0 | auto trash_path = fmt::format("{}/{}", _path, TRASH_PREFIX); |
285 | 0 | bool exists = true; |
286 | 0 | std::vector<io::FileInfo> sub_dirs; |
287 | 0 | Status st = io::global_local_filesystem()->list(trash_path, false, &sub_dirs, &exists); |
288 | 0 | if (!st) { |
289 | 0 | return; |
290 | 0 | } |
291 | | |
292 | 0 | for (auto& sub_dir : sub_dirs) { |
293 | | // sub dir is time_label |
294 | 0 | if (sub_dir.is_file) { |
295 | 0 | continue; |
296 | 0 | } |
297 | 0 | auto sub_path = fmt::format("{}/{}", trash_path, sub_dir.file_name); |
298 | 0 | auto tablet_path = fmt::format("{}/{}", sub_path, tablet_id); |
299 | 0 | st = io::global_local_filesystem()->exists(tablet_path, &exists); |
300 | 0 | if (st && exists) { |
301 | 0 | paths->emplace_back(std::move(tablet_path)); |
302 | 0 | } |
303 | 0 | } |
304 | 0 | } |
305 | | |
306 | | std::string DataDir::get_root_path_from_schema_hash_path_in_trash( |
307 | 0 | const std::string& schema_hash_dir_in_trash) { |
308 | 0 | return io::Path(schema_hash_dir_in_trash) |
309 | 0 | .parent_path() |
310 | 0 | .parent_path() |
311 | 0 | .parent_path() |
312 | 0 | .parent_path() |
313 | 0 | .string(); |
314 | 0 | } |
315 | | |
316 | 42 | Status DataDir::_check_incompatible_old_format_tablet() { |
317 | 42 | auto check_incompatible_old_func = [](int64_t tablet_id, int32_t schema_hash, |
318 | 42 | std::string_view value) -> bool { |
319 | | // if strict check incompatible old format, then log fatal |
320 | 0 | if (config::storage_strict_check_incompatible_old_format) { |
321 | 0 | throw Exception(Status::FatalError( |
322 | 0 | "There are incompatible old format metas, current version does not support and " |
323 | 0 | "it may lead to data missing!!! tablet_id = {} schema_hash = {}", |
324 | 0 | tablet_id, schema_hash)); |
325 | 0 | } else { |
326 | 0 | LOG(WARNING) |
327 | 0 | << "There are incompatible old format metas, current version does not support " |
328 | 0 | << "and it may lead to data missing!!! " |
329 | 0 | << "tablet_id = " << tablet_id << " schema_hash = " << schema_hash; |
330 | 0 | } |
331 | 0 | return false; |
332 | 0 | }; |
333 | | |
334 | | // seek old header prefix. when check_incompatible_old_func is called, it has old format in olap_meta |
335 | 42 | Status check_incompatible_old_status = TabletMetaManager::traverse_headers( |
336 | 42 | _meta, check_incompatible_old_func, OLD_HEADER_PREFIX); |
337 | 42 | if (!check_incompatible_old_status) { |
338 | 0 | LOG(WARNING) << "check incompatible old format meta fails, it may lead to data missing!!! " |
339 | 0 | << _path; |
340 | 42 | } else { |
341 | 42 | LOG(INFO) << "successfully check incompatible old format meta " << _path; |
342 | 42 | } |
343 | 42 | return check_incompatible_old_status; |
344 | 42 | } |
345 | | |
346 | | // TODO(ygl): deal with rowsets and tablets when load failed |
347 | 42 | Status DataDir::load() { |
348 | 42 | LOG(INFO) << "start to load tablets from " << _path; |
349 | | |
350 | | // load rowset meta from meta env and create rowset |
351 | | // COMMITTED: add to txn manager |
352 | | // VISIBLE: add to tablet |
353 | | // if one rowset load failed, then the total data dir will not be loaded |
354 | | |
355 | | // necessarily check incompatible old format. when there are old metas, it may load to data missing |
356 | 42 | RETURN_IF_ERROR(_check_incompatible_old_format_tablet()); |
357 | | |
358 | 42 | std::vector<RowsetMetaSharedPtr> dir_rowset_metas; |
359 | 42 | LOG(INFO) << "begin loading rowset from meta"; |
360 | 42 | auto load_rowset_func = [&dir_rowset_metas, this](TabletUid tablet_uid, RowsetId rowset_id, |
361 | 42 | std::string_view meta_str) -> bool { |
362 | 0 | RowsetMetaSharedPtr rowset_meta(new RowsetMeta()); |
363 | 0 | bool parsed = rowset_meta->init(meta_str); |
364 | 0 | if (!parsed) { |
365 | 0 | LOG(WARNING) << "parse rowset meta string failed for rowset_id:" << rowset_id; |
366 | | // return false will break meta iterator, return true to skip this error |
367 | 0 | return true; |
368 | 0 | } |
369 | | |
370 | 0 | if (rowset_meta->has_delete_predicate()) { |
371 | | // copy the delete sub pred v1 to check then |
372 | 0 | auto orig_delete_sub_pred = rowset_meta->delete_predicate().sub_predicates(); |
373 | 0 | auto* delete_pred = rowset_meta->mutable_delete_pred_pb(); |
374 | |
|
375 | 0 | if ((!delete_pred->sub_predicates().empty() && |
376 | 0 | delete_pred->sub_predicates_v2().empty()) || |
377 | 0 | (!delete_pred->in_predicates().empty() && |
378 | 0 | delete_pred->in_predicates()[0].has_column_unique_id())) { |
379 | | // convert pred and write only when delete sub pred v2 is not set or there is in list pred to be set column uid |
380 | 0 | RETURN_IF_ERROR(DeleteHandler::convert_to_sub_pred_v2( |
381 | 0 | delete_pred, rowset_meta->tablet_schema())); |
382 | 0 | LOG(INFO) << fmt::format( |
383 | 0 | "convert rowset with old delete pred: rowset_id={}, tablet_id={}", |
384 | 0 | rowset_id.to_string(), tablet_uid.to_string()); |
385 | 0 | CHECK_EQ(orig_delete_sub_pred.size(), delete_pred->sub_predicates().size()) |
386 | 0 | << "inconsistent sub predicate v1 after conversion"; |
387 | 0 | for (int i = 0; i < orig_delete_sub_pred.size(); ++i) { |
388 | 0 | CHECK_STREQ(orig_delete_sub_pred.Get(i).c_str(), |
389 | 0 | delete_pred->sub_predicates().Get(i).c_str()) |
390 | 0 | << "inconsistent sub predicate v1 after conversion"; |
391 | 0 | } |
392 | 0 | std::string result; |
393 | 0 | rowset_meta->serialize(&result); |
394 | 0 | std::string key = |
395 | 0 | ROWSET_PREFIX + tablet_uid.to_string() + "_" + rowset_id.to_string(); |
396 | 0 | RETURN_IF_ERROR(_meta->put(META_COLUMN_FAMILY_INDEX, key, result)); |
397 | 0 | } |
398 | 0 | } |
399 | | |
400 | 0 | if (rowset_meta->partition_id() == 0) { |
401 | 0 | LOG(WARNING) << "rs tablet=" << rowset_meta->tablet_id() << " rowset_id=" << rowset_id |
402 | 0 | << " load from meta but partition id eq 0"; |
403 | 0 | } |
404 | |
|
405 | 0 | dir_rowset_metas.push_back(rowset_meta); |
406 | 0 | return true; |
407 | 0 | }; |
408 | 42 | MonotonicStopWatch rs_timer; |
409 | 42 | rs_timer.start(); |
410 | 42 | Status load_rowset_status = RowsetMetaManager::traverse_rowset_metas(_meta, load_rowset_func); |
411 | 42 | rs_timer.stop(); |
412 | 42 | if (!load_rowset_status) { |
413 | 0 | LOG(WARNING) << "errors when load rowset meta from meta env, skip this data dir:" << _path; |
414 | 42 | } else { |
415 | 42 | LOG(INFO) << "load rowset from meta finished, cost: " |
416 | 42 | << rs_timer.elapsed_time_milliseconds() << " ms, data dir: " << _path; |
417 | 42 | } |
418 | | |
419 | | // load tablet |
420 | | // create tablet from tablet meta and add it to tablet mgr |
421 | 42 | LOG(INFO) << "begin loading tablet from meta"; |
422 | 42 | std::set<int64_t> tablet_ids; |
423 | 42 | std::set<int64_t> failed_tablet_ids; |
424 | 42 | auto load_tablet_func = [this, &tablet_ids, &failed_tablet_ids]( |
425 | 42 | int64_t tablet_id, int32_t schema_hash, |
426 | 42 | std::string_view value) -> bool { |
427 | 0 | Status status = _engine.tablet_manager()->load_tablet_from_meta( |
428 | 0 | this, tablet_id, schema_hash, value, false, false, false, false); |
429 | 0 | if (!status.ok() && !status.is<TABLE_ALREADY_DELETED_ERROR>() && |
430 | 0 | !status.is<ENGINE_INSERT_OLD_TABLET>()) { |
431 | | // load_tablet_from_meta() may return Status::Error<TABLE_ALREADY_DELETED_ERROR>() |
432 | | // which means the tablet status is DELETED |
433 | | // This may happen when the tablet was just deleted before the BE restarted, |
434 | | // but it has not been cleared from rocksdb. At this time, restarting the BE |
435 | | // will read the tablet in the DELETE state from rocksdb. These tablets have been |
436 | | // added to the garbage collection queue and will be automatically deleted afterwards. |
437 | | // Therefore, we believe that this situation is not a failure. |
438 | | |
439 | | // Besides, load_tablet_from_meta() may return Status::Error<ENGINE_INSERT_OLD_TABLET>() |
440 | | // when BE is restarting and the older tablet have been added to the |
441 | | // garbage collection queue but not deleted yet. |
442 | | // In this case, since the data_dirs are parallel loaded, a later loaded tablet |
443 | | // may be older than previously loaded one, which should not be acknowledged as a |
444 | | // failure. |
445 | 0 | LOG(WARNING) << "load tablet from header failed. status:" << status |
446 | 0 | << ", tablet=" << tablet_id << "." << schema_hash; |
447 | 0 | failed_tablet_ids.insert(tablet_id); |
448 | 0 | } else { |
449 | 0 | tablet_ids.insert(tablet_id); |
450 | 0 | } |
451 | 0 | return true; |
452 | 0 | }; |
453 | 42 | MonotonicStopWatch tablet_timer; |
454 | 42 | tablet_timer.start(); |
455 | 42 | Status load_tablet_status = TabletMetaManager::traverse_headers(_meta, load_tablet_func); |
456 | 42 | tablet_timer.stop(); |
457 | 42 | if (!failed_tablet_ids.empty()) { |
458 | 0 | LOG(WARNING) << "load tablets from header failed" |
459 | 0 | << ", loaded tablet: " << tablet_ids.size() |
460 | 0 | << ", error tablet: " << failed_tablet_ids.size() << ", path: " << _path; |
461 | 0 | if (!config::ignore_load_tablet_failure) { |
462 | 0 | throw Exception(Status::FatalError( |
463 | 0 | "load tablets encounter failure. stop BE process. path: {}", _path)); |
464 | 0 | } |
465 | 0 | } |
466 | 42 | if (!load_tablet_status) { |
467 | 0 | LOG(WARNING) << "there is failure when loading tablet headers" |
468 | 0 | << ", loaded tablet: " << tablet_ids.size() |
469 | 0 | << ", error tablet: " << failed_tablet_ids.size() << ", path: " << _path; |
470 | 42 | } else { |
471 | 42 | LOG(INFO) << "load tablet from meta finished" |
472 | 42 | << ", loaded tablet: " << tablet_ids.size() |
473 | 42 | << ", error tablet: " << failed_tablet_ids.size() |
474 | 42 | << ", cost: " << tablet_timer.elapsed_time_milliseconds() |
475 | 42 | << " ms, path: " << _path; |
476 | 42 | } |
477 | | |
478 | 42 | for (int64_t tablet_id : tablet_ids) { |
479 | 0 | TabletSharedPtr tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
480 | 0 | if (tablet && tablet->set_tablet_schema_into_rowset_meta()) { |
481 | 0 | RETURN_IF_ERROR(TabletMetaManager::save(this, tablet->tablet_id(), |
482 | 0 | tablet->schema_hash(), tablet->tablet_meta())); |
483 | 0 | } |
484 | 0 | } |
485 | | |
486 | 42 | auto load_pending_publish_info_func = |
487 | 42 | [&engine = _engine](int64_t tablet_id, int64_t publish_version, std::string_view info) { |
488 | 0 | PendingPublishInfoPB pending_publish_info_pb; |
489 | 0 | bool parsed = pending_publish_info_pb.ParseFromArray(info.data(), |
490 | 0 | cast_set<int>(info.size())); |
491 | 0 | if (!parsed) { |
492 | 0 | LOG(WARNING) << "parse pending publish info failed, tablet_id: " << tablet_id |
493 | 0 | << " publish_version: " << publish_version; |
494 | 0 | } |
495 | 0 | engine.add_async_publish_task(pending_publish_info_pb.partition_id(), tablet_id, |
496 | 0 | publish_version, |
497 | 0 | pending_publish_info_pb.transaction_id(), true); |
498 | 0 | return true; |
499 | 0 | }; |
500 | 42 | MonotonicStopWatch pending_publish_timer; |
501 | 42 | pending_publish_timer.start(); |
502 | 42 | RETURN_IF_ERROR( |
503 | 42 | TabletMetaManager::traverse_pending_publish(_meta, load_pending_publish_info_func)); |
504 | 42 | pending_publish_timer.stop(); |
505 | 42 | LOG(INFO) << "load pending publish task from meta finished, cost: " |
506 | 42 | << pending_publish_timer.elapsed_time_milliseconds() << " ms, data dir: " << _path; |
507 | | |
508 | 42 | int64_t rowset_partition_id_eq_0_num = 0; |
509 | 42 | for (auto rowset_meta : dir_rowset_metas) { |
510 | 0 | if (rowset_meta->partition_id() == 0) { |
511 | 0 | ++rowset_partition_id_eq_0_num; |
512 | 0 | } |
513 | 0 | } |
514 | 42 | if (rowset_partition_id_eq_0_num > config::ignore_invalid_partition_id_rowset_num) { |
515 | 0 | throw Exception(Status::FatalError( |
516 | 0 | "rowset partition id eq 0 is {} bigger than config {}, be exit, plz check be.INFO", |
517 | 0 | rowset_partition_id_eq_0_num, config::ignore_invalid_partition_id_rowset_num)); |
518 | 0 | } |
519 | | |
520 | | // traverse rowset |
521 | | // 1. add committed rowset to txn map |
522 | | // 2. add visible rowset to tablet |
523 | | // ignore any errors when load tablet or rowset, because fe will repair them after report |
524 | 42 | int64_t invalid_rowset_counter = 0; |
525 | 42 | for (auto&& rowset_meta : dir_rowset_metas) { |
526 | 0 | TabletSharedPtr tablet = _engine.tablet_manager()->get_tablet(rowset_meta->tablet_id()); |
527 | | // tablet maybe dropped, but not drop related rowset meta |
528 | 0 | if (tablet == nullptr) { |
529 | 0 | VLOG_NOTICE << "could not find tablet id: " << rowset_meta->tablet_id() |
530 | 0 | << ", schema hash: " << rowset_meta->tablet_schema_hash() |
531 | 0 | << ", for rowset: " << rowset_meta->rowset_id() << ", skip this rowset"; |
532 | 0 | ++invalid_rowset_counter; |
533 | 0 | continue; |
534 | 0 | } |
535 | | |
536 | 0 | if (rowset_meta->partition_id() == 0) { |
537 | 0 | LOG(WARNING) << "skip tablet_id=" << tablet->tablet_id() |
538 | 0 | << " rowset: " << rowset_meta->rowset_id() |
539 | 0 | << " txn: " << rowset_meta->txn_id(); |
540 | 0 | continue; |
541 | 0 | } |
542 | | |
543 | 0 | RowsetSharedPtr rowset; |
544 | 0 | Status create_status = tablet->create_rowset(rowset_meta, &rowset); |
545 | 0 | if (!create_status) { |
546 | 0 | LOG(WARNING) << "could not create rowset from rowsetmeta: " |
547 | 0 | << " rowset_id: " << rowset_meta->rowset_id() |
548 | 0 | << " rowset_type: " << rowset_meta->rowset_type() |
549 | 0 | << " rowset_state: " << rowset_meta->rowset_state(); |
550 | 0 | continue; |
551 | 0 | } |
552 | 0 | if (rowset_meta->rowset_state() == RowsetStatePB::COMMITTED && |
553 | 0 | rowset_meta->tablet_uid() == tablet->tablet_uid()) { |
554 | 0 | if (!rowset_meta->tablet_schema()) { |
555 | 0 | rowset_meta->set_tablet_schema(tablet->tablet_schema()); |
556 | 0 | RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), |
557 | 0 | rowset_meta->rowset_id(), |
558 | 0 | rowset_meta->get_rowset_pb(), false)); |
559 | 0 | } |
560 | 0 | Status commit_txn_status = _engine.txn_manager()->commit_txn( |
561 | 0 | _meta, rowset_meta->partition_id(), rowset_meta->txn_id(), |
562 | 0 | rowset_meta->tablet_id(), rowset_meta->tablet_uid(), rowset_meta->load_id(), |
563 | 0 | rowset, _engine.pending_local_rowsets().add(rowset_meta->rowset_id()), true); |
564 | 0 | if (commit_txn_status || commit_txn_status.is<PUSH_TRANSACTION_ALREADY_EXIST>()) { |
565 | 0 | LOG(INFO) << "successfully to add committed rowset: " << rowset_meta->rowset_id() |
566 | 0 | << " to tablet: " << rowset_meta->tablet_id() |
567 | 0 | << " schema hash: " << rowset_meta->tablet_schema_hash() |
568 | 0 | << " for txn: " << rowset_meta->txn_id(); |
569 | |
|
570 | 0 | } else if (commit_txn_status.is<ErrorCode::INTERNAL_ERROR>()) { |
571 | 0 | LOG(WARNING) << "failed to add committed rowset: " << rowset_meta->rowset_id() |
572 | 0 | << " to tablet: " << rowset_meta->tablet_id() |
573 | 0 | << " for txn: " << rowset_meta->txn_id() |
574 | 0 | << " error: " << commit_txn_status; |
575 | 0 | return commit_txn_status; |
576 | 0 | } else { |
577 | 0 | LOG(WARNING) << "failed to add committed rowset: " << rowset_meta->rowset_id() |
578 | 0 | << " to tablet: " << rowset_meta->tablet_id() |
579 | 0 | << " for txn: " << rowset_meta->txn_id() |
580 | 0 | << " error: " << commit_txn_status; |
581 | 0 | } |
582 | 0 | } else if (rowset_meta->rowset_state() == RowsetStatePB::VISIBLE && |
583 | 0 | rowset_meta->tablet_uid() == tablet->tablet_uid()) { |
584 | 0 | if (!rowset_meta->tablet_schema()) { |
585 | 0 | rowset_meta->set_tablet_schema(tablet->tablet_schema()); |
586 | 0 | RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), |
587 | 0 | rowset_meta->rowset_id(), |
588 | 0 | rowset_meta->get_rowset_pb(), false)); |
589 | 0 | } |
590 | 0 | Status publish_status = tablet->add_rowset(rowset); |
591 | 0 | if (!publish_status && !publish_status.is<PUSH_VERSION_ALREADY_EXIST>()) { |
592 | 0 | LOG(WARNING) << "add visible rowset to tablet failed rowset_id:" |
593 | 0 | << rowset->rowset_id() << " tablet id: " << rowset_meta->tablet_id() |
594 | 0 | << " txn id:" << rowset_meta->txn_id() |
595 | 0 | << " start_version: " << rowset_meta->version().first |
596 | 0 | << " end_version: " << rowset_meta->version().second; |
597 | 0 | } |
598 | 0 | } else { |
599 | 0 | LOG(WARNING) << "find invalid rowset: " << rowset_meta->rowset_id() |
600 | 0 | << " with tablet id: " << rowset_meta->tablet_id() |
601 | 0 | << " tablet uid: " << rowset_meta->tablet_uid() |
602 | 0 | << " schema hash: " << rowset_meta->tablet_schema_hash() |
603 | 0 | << " txn: " << rowset_meta->txn_id() |
604 | 0 | << " current valid tablet uid: " << tablet->tablet_uid(); |
605 | 0 | ++invalid_rowset_counter; |
606 | 0 | } |
607 | 0 | } |
608 | | |
609 | 42 | int64_t dbm_cnt {0}; |
610 | 42 | int64_t unknown_dbm_cnt {0}; |
611 | 42 | auto load_delete_bitmap_func = [this, &dbm_cnt, &unknown_dbm_cnt](int64_t tablet_id, |
612 | 42 | int64_t version, |
613 | 42 | std::string_view val) { |
614 | 0 | TabletSharedPtr tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
615 | 0 | if (!tablet) { |
616 | 0 | return true; |
617 | 0 | } |
618 | 0 | const std::vector<RowsetMetaSharedPtr>& all_rowsets = tablet->tablet_meta()->all_rs_metas(); |
619 | 0 | RowsetIdUnorderedSet rowset_ids; |
620 | 0 | for (auto& rowset_meta : all_rowsets) { |
621 | 0 | rowset_ids.insert(rowset_meta->rowset_id()); |
622 | 0 | } |
623 | |
|
624 | 0 | DeleteBitmapPB delete_bitmap_pb; |
625 | 0 | delete_bitmap_pb.ParseFromArray(val.data(), cast_set<int>(val.size())); |
626 | 0 | int rst_ids_size = delete_bitmap_pb.rowset_ids_size(); |
627 | 0 | int seg_ids_size = delete_bitmap_pb.segment_ids_size(); |
628 | 0 | int seg_maps_size = delete_bitmap_pb.segment_delete_bitmaps_size(); |
629 | 0 | CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size); |
630 | |
|
631 | 0 | for (int i = 0; i < rst_ids_size; ++i) { |
632 | 0 | RowsetId rst_id; |
633 | 0 | rst_id.init(delete_bitmap_pb.rowset_ids(i)); |
634 | | // only process the rowset in _rs_metas |
635 | 0 | if (rowset_ids.find(rst_id) == rowset_ids.end()) { |
636 | 0 | ++unknown_dbm_cnt; |
637 | 0 | continue; |
638 | 0 | } |
639 | 0 | ++dbm_cnt; |
640 | 0 | auto seg_id = delete_bitmap_pb.segment_ids(i); |
641 | 0 | auto iter = tablet->tablet_meta()->delete_bitmap().delete_bitmap.find( |
642 | 0 | {rst_id, seg_id, version}); |
643 | | // This version of delete bitmap already exists |
644 | 0 | if (iter != tablet->tablet_meta()->delete_bitmap().delete_bitmap.end()) { |
645 | 0 | continue; |
646 | 0 | } |
647 | 0 | auto bitmap = delete_bitmap_pb.segment_delete_bitmaps(i).data(); |
648 | 0 | tablet->tablet_meta()->delete_bitmap().delete_bitmap[{rst_id, seg_id, version}] = |
649 | 0 | roaring::Roaring::read(bitmap); |
650 | 0 | } |
651 | 0 | return true; |
652 | 0 | }; |
653 | 42 | MonotonicStopWatch dbm_timer; |
654 | 42 | dbm_timer.start(); |
655 | 42 | RETURN_IF_ERROR(TabletMetaManager::traverse_delete_bitmap(_meta, load_delete_bitmap_func)); |
656 | 42 | dbm_timer.stop(); |
657 | | |
658 | 42 | LOG(INFO) << "load delete bitmap from meta finished, cost: " |
659 | 42 | << dbm_timer.elapsed_time_milliseconds() << " ms, data dir: " << _path; |
660 | | |
661 | | // At startup, we only count these invalid rowset, but do not actually delete it. |
662 | | // The actual delete operation is in StorageEngine::_clean_unused_rowset_metas, |
663 | | // which is cleaned up uniformly by the background cleanup thread. |
664 | 42 | LOG(INFO) << "finish to load tablets from " << _path |
665 | 42 | << ", total rowset meta: " << dir_rowset_metas.size() |
666 | 42 | << ", invalid rowset num: " << invalid_rowset_counter |
667 | 42 | << ", visible/stale rowsets' delete bitmap count: " << dbm_cnt |
668 | 42 | << ", invalid rowsets' delete bitmap count: " << unknown_dbm_cnt; |
669 | | |
670 | 42 | return Status::OK(); |
671 | 42 | } |
672 | | |
673 | | // gc unused local tablet dir |
674 | 20 | void DataDir::_perform_tablet_gc(const std::string& tablet_schema_hash_path, int16_t shard_id) { |
675 | 20 | if (_stop_bg_worker) { |
676 | 0 | return; |
677 | 0 | } |
678 | | |
679 | 20 | TTabletId tablet_id = -1; |
680 | 20 | TSchemaHash schema_hash = -1; |
681 | 20 | bool is_valid = TabletManager::get_tablet_id_and_schema_hash_from_path( |
682 | 20 | tablet_schema_hash_path, &tablet_id, &schema_hash); |
683 | 20 | if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { |
684 | 0 | LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path; |
685 | 0 | return; |
686 | 0 | } |
687 | | |
688 | 20 | auto tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
689 | 20 | if (!tablet || tablet->data_dir() != this) { |
690 | 10 | if (tablet) { |
691 | 0 | LOG(INFO) << "The tablet in path " << tablet_schema_hash_path |
692 | 0 | << " is not same with the running one: " << tablet->tablet_path() |
693 | 0 | << ", might be the old tablet after migration, try to move it to trash"; |
694 | 0 | } |
695 | 10 | _engine.tablet_manager()->try_delete_unused_tablet_path(this, tablet_id, schema_hash, |
696 | 10 | tablet_schema_hash_path, shard_id); |
697 | 10 | return; |
698 | 10 | } |
699 | | |
700 | 10 | _perform_rowset_gc(tablet_schema_hash_path); |
701 | 10 | } |
702 | | |
703 | | // gc unused local rowsets under tablet dir |
704 | 10 | void DataDir::_perform_rowset_gc(const std::string& tablet_schema_hash_path) { |
705 | 10 | if (_stop_bg_worker) { |
706 | 0 | return; |
707 | 0 | } |
708 | | |
709 | 10 | TTabletId tablet_id = -1; |
710 | 10 | TSchemaHash schema_hash = -1; |
711 | 10 | bool is_valid = doris::TabletManager::get_tablet_id_and_schema_hash_from_path( |
712 | 10 | tablet_schema_hash_path, &tablet_id, &schema_hash); |
713 | 10 | if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { |
714 | 0 | LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path; |
715 | 0 | return; |
716 | 0 | } |
717 | | |
718 | 10 | auto tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
719 | 10 | if (!tablet) { |
720 | | // Could not found the tablet, maybe it's a dropped tablet, will be reclaimed |
721 | | // in the next time `_perform_path_gc_by_tablet` |
722 | 0 | return; |
723 | 0 | } |
724 | | |
725 | 10 | if (tablet->data_dir() != this) { |
726 | | // Current running tablet is not in same data_dir, maybe it's a tablet after migration, |
727 | | // will be reclaimed in the next time `_perform_path_gc_by_tablet` |
728 | 0 | return; |
729 | 0 | } |
730 | | |
731 | 10 | bool exists; |
732 | 10 | std::vector<io::FileInfo> files; |
733 | 10 | auto st = io::global_local_filesystem()->list(tablet_schema_hash_path, true, &files, &exists); |
734 | 10 | if (!st.ok()) [[unlikely]] { |
735 | 0 | LOG(WARNING) << "[path gc] fail to list tablet path " << tablet_schema_hash_path << " : " |
736 | 0 | << st; |
737 | 0 | return; |
738 | 0 | } |
739 | | |
740 | | // Rowset files excluding pending rowsets |
741 | 10 | std::vector<std::pair<RowsetId, std::string /* filename */>> rowsets_not_pending; |
742 | 480 | for (auto&& file : files) { |
743 | 480 | auto rowset_id = extract_rowset_id(file.file_name); |
744 | 480 | if (rowset_id.hi == 0) { |
745 | 0 | continue; // Not a rowset |
746 | 0 | } |
747 | | |
748 | 480 | if (_engine.pending_local_rowsets().contains(rowset_id)) { |
749 | 80 | continue; // Pending rowset file |
750 | 80 | } |
751 | | |
752 | 400 | rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name)); |
753 | 400 | } |
754 | | |
755 | 10 | RowsetIdUnorderedSet rowsets_in_version_map; |
756 | 10 | tablet->traverse_rowsets( |
757 | 30 | [&rowsets_in_version_map](auto& rs) { rowsets_in_version_map.insert(rs->rowset_id()); }, |
758 | 10 | true); |
759 | | |
760 | 10 | DBUG_EXECUTE_IF("DataDir::_perform_rowset_gc.simulation.slow", { |
761 | 10 | auto target_tablet_id = dp->param<int64_t>("tablet_id", -1); |
762 | 10 | if (target_tablet_id == tablet_id) { |
763 | 10 | LOG(INFO) << "debug point wait tablet to remove rsmgr tabletId=" << tablet_id; |
764 | 10 | DBUG_BLOCK; |
765 | 10 | } |
766 | 10 | }); |
767 | | |
768 | 80 | auto reclaim_rowset_file = [](const std::string& path) { |
769 | 80 | auto st = io::global_local_filesystem()->delete_file(path); |
770 | 80 | if (!st.ok()) [[unlikely]] { |
771 | 0 | LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " << st; |
772 | 0 | return; |
773 | 0 | } |
774 | 80 | LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log |
775 | 80 | }; |
776 | | |
777 | 100 | auto should_reclaim = [&, this](const RowsetId& rowset_id) { |
778 | 100 | return !rowsets_in_version_map.contains(rowset_id) && |
779 | 100 | !_engine.check_rowset_id_in_unused_rowsets(rowset_id) && |
780 | 100 | RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), rowset_id) |
781 | 40 | .is<META_KEY_NOT_FOUND>(); |
782 | 100 | }; |
783 | | |
784 | | // rowset_id -> is_garbage |
785 | 10 | std::unordered_map<RowsetId, bool> checked_rowsets; |
786 | 400 | for (auto&& [rowset_id, filename] : rowsets_not_pending) { |
787 | 400 | if (_stop_bg_worker) { |
788 | 0 | return; |
789 | 0 | } |
790 | | |
791 | 400 | if (auto it = checked_rowsets.find(rowset_id); it != checked_rowsets.end()) { |
792 | 300 | if (it->second) { // Is checked garbage rowset |
793 | 60 | reclaim_rowset_file(tablet_schema_hash_path + '/' + filename); |
794 | 60 | } |
795 | 300 | continue; |
796 | 300 | } |
797 | | |
798 | 100 | if (should_reclaim(rowset_id)) { |
799 | 20 | if (config::path_gc_check_step > 0 && |
800 | 20 | ++_path_gc_step % config::path_gc_check_step == 0) { |
801 | 0 | std::this_thread::sleep_for( |
802 | 0 | std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); |
803 | 0 | } |
804 | 20 | reclaim_rowset_file(tablet_schema_hash_path + '/' + filename); |
805 | 20 | checked_rowsets.emplace(rowset_id, true); |
806 | 80 | } else { |
807 | 80 | checked_rowsets.emplace(rowset_id, false); |
808 | 80 | } |
809 | 100 | } |
810 | 10 | } |
811 | | |
812 | 1 | void DataDir::perform_path_gc() { |
813 | 1 | if (_stop_bg_worker) { |
814 | 0 | return; |
815 | 0 | } |
816 | | |
817 | 1 | LOG(INFO) << "start to gc data dir " << _path; |
818 | 1 | auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX); |
819 | 1 | std::vector<io::FileInfo> shards; |
820 | 1 | bool exists = true; |
821 | 1 | const auto& fs = io::global_local_filesystem(); |
822 | 1 | auto st = fs->list(data_path, false, &shards, &exists); |
823 | 1 | if (!st.ok()) [[unlikely]] { |
824 | 0 | LOG(WARNING) << "failed to scan data dir: " << st; |
825 | 0 | return; |
826 | 0 | } |
827 | | |
828 | 4 | for (const auto& shard : shards) { |
829 | 4 | if (_stop_bg_worker) { |
830 | 0 | break; |
831 | 0 | } |
832 | | |
833 | 4 | if (shard.is_file) { |
834 | 0 | continue; |
835 | 0 | } |
836 | | |
837 | 4 | auto shard_path = fmt::format("{}/{}", data_path, shard.file_name); |
838 | 4 | std::vector<io::FileInfo> tablet_ids; |
839 | 4 | st = io::global_local_filesystem()->list(shard_path, false, &tablet_ids, &exists); |
840 | 4 | if (!st.ok()) [[unlikely]] { |
841 | 0 | LOG(WARNING) << "fail to walk dir, shard_path=" << shard_path << " : " << st; |
842 | 0 | continue; |
843 | 0 | } |
844 | | |
845 | 20 | for (const auto& tablet_id : tablet_ids) { |
846 | 20 | if (_stop_bg_worker) { |
847 | 0 | break; |
848 | 0 | } |
849 | | |
850 | 20 | if (tablet_id.is_file) { |
851 | 0 | continue; |
852 | 0 | } |
853 | | |
854 | 20 | auto tablet_id_path = fmt::format("{}/{}", shard_path, tablet_id.file_name); |
855 | 20 | std::vector<io::FileInfo> schema_hashes; |
856 | 20 | st = fs->list(tablet_id_path, false, &schema_hashes, &exists); |
857 | 20 | if (!st.ok()) [[unlikely]] { |
858 | 0 | LOG(WARNING) << "fail to walk dir, tablet_id_path=" << tablet_id_path << " : " |
859 | 0 | << st; |
860 | 0 | continue; |
861 | 0 | } |
862 | | |
863 | 20 | for (auto&& schema_hash : schema_hashes) { |
864 | 20 | if (schema_hash.is_file) { |
865 | 0 | continue; |
866 | 0 | } |
867 | | |
868 | 20 | if (config::path_gc_check_step > 0 && |
869 | 20 | ++_path_gc_step % config::path_gc_check_step == 0) { |
870 | 0 | std::this_thread::sleep_for( |
871 | 0 | std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); |
872 | 0 | } |
873 | 20 | int16_t shard_id = -1; |
874 | 20 | try { |
875 | 20 | shard_id = cast_set<int16_t>(std::stoi(shard.file_name)); |
876 | 20 | } catch (const std::exception&) { |
877 | 0 | LOG(WARNING) << "failed to stoi shard_id, shard name=" << shard.file_name; |
878 | 0 | continue; |
879 | 0 | } |
880 | 20 | _perform_tablet_gc(tablet_id_path + '/' + schema_hash.file_name, shard_id); |
881 | 20 | } |
882 | 20 | } |
883 | 4 | } |
884 | | |
885 | 1 | LOG(INFO) << "gc data dir path: " << _path << " finished"; |
886 | 1 | } |
887 | | |
888 | 193 | Status DataDir::update_capacity() { |
889 | 193 | RETURN_IF_ERROR(io::global_local_filesystem()->get_space_info(_path, &_disk_capacity_bytes, |
890 | 193 | &_available_bytes)); |
891 | 182 | disks_total_capacity->set_value(_disk_capacity_bytes); |
892 | 182 | disks_avail_capacity->set_value(_available_bytes); |
893 | 182 | LOG(INFO) << "path: " << _path << " total capacity: " << _disk_capacity_bytes |
894 | 182 | << ", available capacity: " << _available_bytes << ", usage: " << get_usage(0) |
895 | 182 | << ", in_use: " << is_used(); |
896 | | |
897 | 182 | return Status::OK(); |
898 | 193 | } |
899 | | |
900 | 0 | void DataDir::update_trash_capacity() { |
901 | 0 | auto trash_path = fmt::format("{}/{}", _path, TRASH_PREFIX); |
902 | 0 | try { |
903 | 0 | _trash_used_bytes = _engine.get_file_or_directory_size(trash_path); |
904 | 0 | } catch (const std::filesystem::filesystem_error& e) { |
905 | 0 | LOG(WARNING) << "update trash capacity failed, path: " << _path << ", err: " << e.what(); |
906 | 0 | return; |
907 | 0 | } |
908 | 0 | disks_trash_used_capacity->set_value(_trash_used_bytes); |
909 | 0 | LOG(INFO) << "path: " << _path << " trash capacity: " << _trash_used_bytes; |
910 | 0 | } |
911 | | |
912 | 0 | void DataDir::update_local_data_size(int64_t size) { |
913 | 0 | disks_local_used_capacity->set_value(size); |
914 | 0 | } |
915 | | |
916 | 0 | void DataDir::update_remote_data_size(int64_t size) { |
917 | 0 | disks_remote_used_capacity->set_value(size); |
918 | 0 | } |
919 | | |
920 | 0 | size_t DataDir::tablet_size() const { |
921 | 0 | std::lock_guard<std::mutex> l(_mutex); |
922 | 0 | return _tablet_set.size(); |
923 | 0 | } |
924 | | |
925 | 328 | bool DataDir::reach_capacity_limit(int64_t incoming_data_size) { |
926 | 328 | double used_pct = get_usage(incoming_data_size); |
927 | 328 | int64_t left_bytes = _available_bytes - incoming_data_size; |
928 | 328 | if (used_pct >= config::storage_flood_stage_usage_percent / 100.0 && |
929 | 328 | left_bytes <= config::storage_flood_stage_left_capacity_bytes) { |
930 | 0 | LOG(WARNING) << "reach capacity limit. used pct: " << used_pct |
931 | 0 | << ", left bytes: " << left_bytes << ", path: " << _path; |
932 | 0 | return true; |
933 | 0 | } |
934 | 328 | return false; |
935 | 328 | } |
936 | | |
937 | 2 | void DataDir::disks_compaction_score_increment(int64_t delta) { |
938 | 2 | disks_compaction_score->increment(delta); |
939 | 2 | } |
940 | | |
941 | 2 | void DataDir::disks_compaction_num_increment(int64_t delta) { |
942 | 2 | disks_compaction_num->increment(delta); |
943 | 2 | } |
944 | | |
945 | 235 | Status DataDir::move_to_trash(const std::string& tablet_path) { |
946 | 235 | if (config::trash_file_expire_time_sec <= 0) { |
947 | 235 | LOG(INFO) << "delete tablet dir " << tablet_path |
948 | 235 | << " directly due to trash_file_expire_time_sec is 0"; |
949 | 235 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_path)); |
950 | 235 | return delete_tablet_parent_path_if_empty(tablet_path); |
951 | 235 | } |
952 | | |
953 | 0 | Status res = Status::OK(); |
954 | | // 1. get timestamp string |
955 | 0 | std::string time_str; |
956 | 0 | if ((res = gen_timestamp_string(&time_str)) != Status::OK()) { |
957 | 0 | LOG(WARNING) << "failed to generate time_string when move file to trash.err code=" << res; |
958 | 0 | return res; |
959 | 0 | } |
960 | | |
961 | | // 2. generate new file path |
962 | | // a global counter to avoid file name duplication. |
963 | 0 | static std::atomic<uint64_t> delete_counter(0); |
964 | 0 | auto trash_root_path = |
965 | 0 | fmt::format("{}/{}/{}.{}", _path, TRASH_PREFIX, time_str, delete_counter++); |
966 | 0 | auto fs_tablet_path = io::Path(tablet_path); |
967 | 0 | auto trash_tablet_path = trash_root_path / |
968 | 0 | fs_tablet_path.parent_path().filename() /* tablet_id */ / |
969 | 0 | fs_tablet_path.filename() /* schema_hash */; |
970 | | |
971 | | // 3. create target dir, or the rename() function will fail. |
972 | 0 | auto trash_tablet_parent = trash_tablet_path.parent_path(); |
973 | | // create dir if not exists |
974 | 0 | bool exists = true; |
975 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(trash_tablet_parent, &exists)); |
976 | 0 | if (!exists) { |
977 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(trash_tablet_parent)); |
978 | 0 | } |
979 | | |
980 | | // 4. move tablet to trash |
981 | 0 | VLOG_NOTICE << "move file to trash. " << tablet_path << " -> " << trash_tablet_path; |
982 | 0 | if (rename(tablet_path.c_str(), trash_tablet_path.c_str()) < 0) { |
983 | 0 | return Status::Error<OS_ERROR>("move file to trash failed. file={}, target={}, err={}", |
984 | 0 | tablet_path, trash_tablet_path.native(), Errno::str()); |
985 | 0 | } |
986 | | |
987 | | // 5. check parent dir of source file, delete it when empty |
988 | 0 | RETURN_IF_ERROR(delete_tablet_parent_path_if_empty(tablet_path)); |
989 | | |
990 | 0 | return Status::OK(); |
991 | 0 | } |
992 | | |
993 | 235 | Status DataDir::delete_tablet_parent_path_if_empty(const std::string& tablet_path) { |
994 | 235 | auto fs_tablet_path = io::Path(tablet_path); |
995 | 235 | std::string source_parent_dir = fs_tablet_path.parent_path(); // tablet_id level |
996 | 235 | std::vector<io::FileInfo> sub_files; |
997 | 235 | bool exists = true; |
998 | 235 | RETURN_IF_ERROR( |
999 | 235 | io::global_local_filesystem()->list(source_parent_dir, false, &sub_files, &exists)); |
1000 | 235 | if (sub_files.empty()) { |
1001 | 235 | LOG(INFO) << "remove empty dir " << source_parent_dir; |
1002 | | // no need to exam return status |
1003 | 235 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(source_parent_dir)); |
1004 | 235 | } |
1005 | 235 | return Status::OK(); |
1006 | 235 | } |
1007 | | |
1008 | 0 | void DataDir::perform_remote_rowset_gc() { |
1009 | 0 | std::vector<std::pair<std::string, std::string>> gc_kvs; |
1010 | 0 | auto traverse_remote_rowset_func = [&gc_kvs](std::string_view key, |
1011 | 0 | std::string_view value) -> bool { |
1012 | 0 | gc_kvs.emplace_back(key, value); |
1013 | 0 | return true; |
1014 | 0 | }; |
1015 | 0 | static_cast<void>(_meta->iterate(META_COLUMN_FAMILY_INDEX, REMOTE_ROWSET_GC_PREFIX, |
1016 | 0 | traverse_remote_rowset_func)); |
1017 | 0 | std::vector<std::string> deleted_keys; |
1018 | 0 | for (auto& [key, val] : gc_kvs) { |
1019 | 0 | auto rowset_id = key.substr(REMOTE_ROWSET_GC_PREFIX.size()); |
1020 | 0 | RemoteRowsetGcPB gc_pb; |
1021 | 0 | if (!gc_pb.ParseFromString(val)) { |
1022 | 0 | LOG(WARNING) << "malformed RemoteRowsetGcPB. rowset_id=" << rowset_id; |
1023 | 0 | deleted_keys.push_back(std::move(key)); |
1024 | 0 | continue; |
1025 | 0 | } |
1026 | | |
1027 | 0 | auto storage_resource = get_storage_resource(gc_pb.resource_id()); |
1028 | 0 | if (!storage_resource) { |
1029 | 0 | LOG(WARNING) << "Cannot get file system: " << gc_pb.resource_id(); |
1030 | 0 | continue; |
1031 | 0 | } |
1032 | | |
1033 | 0 | std::vector<io::Path> seg_paths; |
1034 | 0 | seg_paths.reserve(gc_pb.num_segments()); |
1035 | 0 | for (int i = 0; i < gc_pb.num_segments(); ++i) { |
1036 | 0 | seg_paths.emplace_back( |
1037 | 0 | storage_resource->first.remote_segment_path(gc_pb.tablet_id(), rowset_id, i)); |
1038 | 0 | } |
1039 | |
|
1040 | 0 | auto& fs = storage_resource->first.fs; |
1041 | 0 | LOG(INFO) << "delete remote rowset. root_path=" << fs->root_path() |
1042 | 0 | << ", rowset_id=" << rowset_id; |
1043 | 0 | auto st = fs->batch_delete(seg_paths); |
1044 | 0 | if (st.ok()) { |
1045 | 0 | deleted_keys.push_back(std::move(key)); |
1046 | 0 | unused_remote_rowset_num << -1; |
1047 | 0 | } else { |
1048 | 0 | LOG(WARNING) << "failed to delete remote rowset. err=" << st; |
1049 | 0 | } |
1050 | 0 | } |
1051 | 0 | for (const auto& key : deleted_keys) { |
1052 | 0 | static_cast<void>(_meta->remove(META_COLUMN_FAMILY_INDEX, key)); |
1053 | 0 | } |
1054 | 0 | } |
1055 | | |
1056 | 0 | void DataDir::perform_remote_tablet_gc() { |
1057 | 0 | std::vector<std::pair<std::string, std::string>> tablet_gc_kvs; |
1058 | 0 | auto traverse_remote_tablet_func = [&tablet_gc_kvs](std::string_view key, |
1059 | 0 | std::string_view value) -> bool { |
1060 | 0 | tablet_gc_kvs.emplace_back(key, value); |
1061 | 0 | return true; |
1062 | 0 | }; |
1063 | 0 | static_cast<void>(_meta->iterate(META_COLUMN_FAMILY_INDEX, REMOTE_TABLET_GC_PREFIX, |
1064 | 0 | traverse_remote_tablet_func)); |
1065 | 0 | std::vector<std::string> deleted_keys; |
1066 | 0 | for (auto& [key, val] : tablet_gc_kvs) { |
1067 | 0 | auto tablet_id = key.substr(REMOTE_TABLET_GC_PREFIX.size()); |
1068 | 0 | RemoteTabletGcPB gc_pb; |
1069 | 0 | if (!gc_pb.ParseFromString(val)) { |
1070 | 0 | LOG(WARNING) << "malformed RemoteTabletGcPB. tablet_id=" << tablet_id; |
1071 | 0 | deleted_keys.push_back(std::move(key)); |
1072 | 0 | continue; |
1073 | 0 | } |
1074 | 0 | bool success = true; |
1075 | 0 | for (auto& resource_id : gc_pb.resource_ids()) { |
1076 | 0 | auto fs = get_filesystem(resource_id); |
1077 | 0 | if (!fs) { |
1078 | 0 | LOG(WARNING) << "could not get file system. resource_id=" << resource_id; |
1079 | 0 | success = false; |
1080 | 0 | continue; |
1081 | 0 | } |
1082 | 0 | LOG(INFO) << "delete remote rowsets of tablet. root_path=" << fs->root_path() |
1083 | 0 | << ", tablet_id=" << tablet_id; |
1084 | 0 | auto st = fs->delete_directory(DATA_PREFIX + '/' + tablet_id); |
1085 | 0 | if (!st.ok()) { |
1086 | 0 | LOG(WARNING) << "failed to delete all remote rowset in tablet. err=" << st; |
1087 | 0 | success = false; |
1088 | 0 | } |
1089 | 0 | } |
1090 | 0 | if (success) { |
1091 | 0 | deleted_keys.push_back(std::move(key)); |
1092 | 0 | } |
1093 | 0 | } |
1094 | 0 | for (const auto& key : deleted_keys) { |
1095 | 0 | static_cast<void>(_meta->remove(META_COLUMN_FAMILY_INDEX, key)); |
1096 | 0 | } |
1097 | 0 | } |
1098 | | #include "common/compile_check_end.h" |
1099 | | } // namespace doris |