/root/doris/be/src/olap/data_dir.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/data_dir.h" |
19 | | |
20 | | #include <fmt/core.h> |
21 | | #include <fmt/format.h> |
22 | | #include <gen_cpp/FrontendService_types.h> |
23 | | #include <gen_cpp/Types_types.h> |
24 | | #include <gen_cpp/olap_file.pb.h> |
25 | | |
26 | | #include <atomic> |
27 | | #include <cstdio> |
28 | | // IWYU pragma: no_include <bits/chrono.h> |
29 | | #include <chrono> // IWYU pragma: keep |
30 | | #include <cstddef> |
31 | | #include <filesystem> |
32 | | #include <memory> |
33 | | #include <new> |
34 | | #include <roaring/roaring.hh> |
35 | | #include <set> |
36 | | #include <sstream> |
37 | | #include <string> |
38 | | #include <thread> |
39 | | #include <utility> |
40 | | |
41 | | #include "common/config.h" |
42 | | #include "common/logging.h" |
43 | | #include "io/fs/file_reader.h" |
44 | | #include "io/fs/file_writer.h" |
45 | | #include "io/fs/local_file_system.h" |
46 | | #include "io/fs/path.h" |
47 | | #include "olap/delete_handler.h" |
48 | | #include "olap/olap_common.h" |
49 | | #include "olap/olap_define.h" |
50 | | #include "olap/olap_meta.h" |
51 | | #include "olap/rowset/beta_rowset.h" |
52 | | #include "olap/rowset/pending_rowset_helper.h" |
53 | | #include "olap/rowset/rowset.h" |
54 | | #include "olap/rowset/rowset_id_generator.h" |
55 | | #include "olap/rowset/rowset_meta.h" |
56 | | #include "olap/rowset/rowset_meta_manager.h" |
57 | | #include "olap/storage_engine.h" |
58 | | #include "olap/storage_policy.h" |
59 | | #include "olap/tablet.h" |
60 | | #include "olap/tablet_manager.h" |
61 | | #include "olap/tablet_meta_manager.h" |
62 | | #include "olap/txn_manager.h" |
63 | | #include "olap/utils.h" // for check_dir_existed |
64 | | #include "service/backend_options.h" |
65 | | #include "util/doris_metrics.h" |
66 | | #include "util/string_util.h" |
67 | | #include "util/uid_util.h" |
68 | | |
69 | | namespace doris { |
70 | | using namespace ErrorCode; |
71 | | |
72 | | namespace { |
73 | | |
74 | 63 | Status read_cluster_id(const std::string& cluster_id_path, int32_t* cluster_id) { |
75 | 63 | bool exists = false; |
76 | 63 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(cluster_id_path, &exists)); |
77 | 63 | *cluster_id = -1; |
78 | 63 | if (exists) { |
79 | 0 | io::FileReaderSPtr reader; |
80 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->open_file(cluster_id_path, &reader)); |
81 | 0 | size_t fsize = reader->size(); |
82 | 0 | if (fsize > 0) { |
83 | 0 | std::string content; |
84 | 0 | content.resize(fsize, '\0'); |
85 | 0 | size_t bytes_read = 0; |
86 | 0 | RETURN_IF_ERROR(reader->read_at(0, {content.data(), fsize}, &bytes_read)); |
87 | 0 | DCHECK_EQ(fsize, bytes_read); |
88 | 0 | *cluster_id = std::stoi(content); |
89 | 0 | } |
90 | 0 | } |
91 | 63 | return Status::OK(); |
92 | 63 | } |
93 | | |
94 | 0 | Status _write_cluster_id_to_path(const std::string& path, int32_t cluster_id) { |
95 | 0 | bool exists = false; |
96 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(path, &exists)); |
97 | 0 | if (!exists) { |
98 | 0 | io::FileWriterPtr file_writer; |
99 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_file(path, &file_writer)); |
100 | 0 | RETURN_IF_ERROR(file_writer->append(std::to_string(cluster_id))); |
101 | 0 | RETURN_IF_ERROR(file_writer->close()); |
102 | 0 | } |
103 | 0 | return Status::OK(); |
104 | 0 | } |
105 | | |
106 | | } // namespace |
107 | | |
108 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_total_capacity, MetricUnit::BYTES); |
109 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_avail_capacity, MetricUnit::BYTES); |
110 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_local_used_capacity, MetricUnit::BYTES); |
111 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_remote_used_capacity, MetricUnit::BYTES); |
112 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_trash_used_capacity, MetricUnit::BYTES); |
113 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_state, MetricUnit::BYTES); |
114 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_compaction_score, MetricUnit::NOUNIT); |
115 | | DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(disks_compaction_num, MetricUnit::NOUNIT); |
116 | | |
117 | | DataDir::DataDir(StorageEngine& engine, const std::string& path, int64_t capacity_bytes, |
118 | | TStorageMedium::type storage_medium) |
119 | | : _engine(engine), |
120 | | _path(path), |
121 | | _available_bytes(0), |
122 | | _disk_capacity_bytes(0), |
123 | | _trash_used_bytes(0), |
124 | | _storage_medium(storage_medium), |
125 | | _is_used(false), |
126 | | _cluster_id(-1), |
127 | 96 | _to_be_deleted(false) { |
128 | 96 | _data_dir_metric_entity = DorisMetrics::instance()->metric_registry()->register_entity( |
129 | 96 | std::string("data_dir.") + path, {{"path", path}}); |
130 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_total_capacity); |
131 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_avail_capacity); |
132 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_local_used_capacity); |
133 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_remote_used_capacity); |
134 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_trash_used_capacity); |
135 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_state); |
136 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_compaction_score); |
137 | 96 | INT_GAUGE_METRIC_REGISTER(_data_dir_metric_entity, disks_compaction_num); |
138 | 96 | } |
139 | | |
140 | 96 | DataDir::~DataDir() { |
141 | 96 | DorisMetrics::instance()->metric_registry()->deregister_entity(_data_dir_metric_entity); |
142 | 96 | delete _meta; |
143 | 96 | } |
144 | | |
145 | 63 | Status DataDir::init(bool init_meta) { |
146 | 63 | bool exists = false; |
147 | 63 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(_path, &exists)); |
148 | 63 | if (!exists) { |
149 | 0 | RETURN_NOT_OK_STATUS_WITH_WARN(Status::IOError("opendir failed, path={}", _path), |
150 | 0 | "check file exist failed"); |
151 | 0 | } |
152 | | |
153 | 63 | RETURN_NOT_OK_STATUS_WITH_WARN(update_capacity(), "update_capacity failed"); |
154 | 63 | RETURN_NOT_OK_STATUS_WITH_WARN(_init_cluster_id(), "_init_cluster_id failed"); |
155 | 63 | RETURN_NOT_OK_STATUS_WITH_WARN(_init_capacity_and_create_shards(), |
156 | 63 | "_init_capacity_and_create_shards failed"); |
157 | 63 | if (init_meta) { |
158 | 63 | RETURN_NOT_OK_STATUS_WITH_WARN(_init_meta(), "_init_meta failed"); |
159 | 63 | } |
160 | | |
161 | 63 | _is_used = true; |
162 | 63 | return Status::OK(); |
163 | 63 | } |
164 | | |
165 | 41 | void DataDir::stop_bg_worker() { |
166 | 41 | _stop_bg_worker = true; |
167 | 41 | } |
168 | | |
169 | 63 | Status DataDir::_init_cluster_id() { |
170 | 63 | auto cluster_id_path = fmt::format("{}/{}", _path, CLUSTER_ID_PREFIX); |
171 | 63 | RETURN_IF_ERROR(read_cluster_id(cluster_id_path, &_cluster_id)); |
172 | 63 | if (_cluster_id == -1) { |
173 | 63 | _cluster_id_incomplete = true; |
174 | 63 | } |
175 | 63 | return Status::OK(); |
176 | 63 | } |
177 | | |
178 | 63 | Status DataDir::_init_capacity_and_create_shards() { |
179 | 63 | RETURN_IF_ERROR(io::global_local_filesystem()->get_space_info(_path, &_disk_capacity_bytes, |
180 | 63 | &_available_bytes)); |
181 | 63 | auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX); |
182 | 63 | bool exists = false; |
183 | 63 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(data_path, &exists)); |
184 | 63 | if (!exists) { |
185 | 63 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(data_path)); |
186 | 63 | } |
187 | 64.5k | for (int i = 0; i < MAX_SHARD_NUM; ++i) { |
188 | 64.5k | auto shard_path = fmt::format("{}/{}", data_path, i); |
189 | 64.5k | RETURN_IF_ERROR(io::global_local_filesystem()->exists(shard_path, &exists)); |
190 | 64.5k | if (!exists) { |
191 | 64.5k | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(shard_path)); |
192 | 64.5k | } |
193 | 64.5k | } |
194 | | |
195 | 63 | return Status::OK(); |
196 | 63 | } |
197 | | |
198 | 64 | Status DataDir::_init_meta() { |
199 | | // init path hash |
200 | 64 | _path_hash = hash_of_path(BackendOptions::get_localhost(), _path); |
201 | 64 | LOG(INFO) << "path: " << _path << ", hash: " << _path_hash; |
202 | | |
203 | | // init meta |
204 | 64 | _meta = new (std::nothrow) OlapMeta(_path); |
205 | 64 | if (_meta == nullptr) { |
206 | 0 | RETURN_NOT_OK_STATUS_WITH_WARN( |
207 | 0 | Status::MemoryAllocFailed("allocate memory for OlapMeta failed"), |
208 | 0 | "new OlapMeta failed"); |
209 | 0 | } |
210 | 64 | Status res = _meta->init(); |
211 | 64 | if (!res.ok()) { |
212 | 0 | RETURN_NOT_OK_STATUS_WITH_WARN(Status::IOError("open rocksdb failed, path={}", _path), |
213 | 0 | "init OlapMeta failed"); |
214 | 0 | } |
215 | 64 | return Status::OK(); |
216 | 64 | } |
217 | | |
218 | 0 | Status DataDir::set_cluster_id(int32_t cluster_id) { |
219 | 0 | if (_cluster_id != -1 && _cluster_id != cluster_id) { |
220 | 0 | LOG(ERROR) << "going to set cluster id to already assigned store, cluster_id=" |
221 | 0 | << _cluster_id << ", new_cluster_id=" << cluster_id; |
222 | 0 | return Status::InternalError("going to set cluster id to already assigned store"); |
223 | 0 | } |
224 | 0 | if (!_cluster_id_incomplete) { |
225 | 0 | return Status::OK(); |
226 | 0 | } |
227 | 0 | auto cluster_id_path = fmt::format("{}/{}", _path, CLUSTER_ID_PREFIX); |
228 | 0 | return _write_cluster_id_to_path(cluster_id_path, cluster_id); |
229 | 0 | } |
230 | | |
231 | 0 | void DataDir::health_check() { |
232 | | // check disk |
233 | 0 | if (_is_used) { |
234 | 0 | Status res = _read_and_write_test_file(); |
235 | 0 | if (!res && res.is<IO_ERROR>()) { |
236 | 0 | LOG(WARNING) << "store read/write test file occur IO Error. path=" << _path |
237 | 0 | << ", err: " << res; |
238 | 0 | _engine.add_broken_path(_path); |
239 | 0 | _is_used = !res.is<IO_ERROR>(); |
240 | 0 | } |
241 | 0 | } |
242 | 0 | disks_state->set_value(_is_used ? 1 : 0); |
243 | 0 | } |
244 | | |
245 | 0 | Status DataDir::_read_and_write_test_file() { |
246 | 0 | auto test_file = fmt::format("{}/{}", _path, kTestFilePath); |
247 | 0 | return read_write_test_file(test_file); |
248 | 0 | } |
249 | | |
250 | 93 | void DataDir::register_tablet(Tablet* tablet) { |
251 | 93 | TabletInfo tablet_info(tablet->tablet_id(), tablet->tablet_uid()); |
252 | | |
253 | 93 | std::lock_guard<std::mutex> l(_mutex); |
254 | 93 | _tablet_set.emplace(std::move(tablet_info)); |
255 | 93 | } |
256 | | |
257 | 46 | void DataDir::deregister_tablet(Tablet* tablet) { |
258 | 46 | TabletInfo tablet_info(tablet->tablet_id(), tablet->tablet_uid()); |
259 | | |
260 | 46 | std::lock_guard<std::mutex> l(_mutex); |
261 | 46 | _tablet_set.erase(tablet_info); |
262 | 46 | } |
263 | | |
264 | 0 | void DataDir::clear_tablets(std::vector<TabletInfo>* tablet_infos) { |
265 | 0 | std::lock_guard<std::mutex> l(_mutex); |
266 | |
|
267 | 0 | tablet_infos->insert(tablet_infos->end(), _tablet_set.begin(), _tablet_set.end()); |
268 | 0 | _tablet_set.clear(); |
269 | 0 | } |
270 | | |
271 | 0 | std::string DataDir::get_absolute_shard_path(int64_t shard_id) { |
272 | 0 | return fmt::format("{}/{}/{}", _path, DATA_PREFIX, shard_id); |
273 | 0 | } |
274 | | |
275 | | std::string DataDir::get_absolute_tablet_path(int64_t shard_id, int64_t tablet_id, |
276 | 0 | int32_t schema_hash) { |
277 | 0 | return fmt::format("{}/{}/{}", get_absolute_shard_path(shard_id), tablet_id, schema_hash); |
278 | 0 | } |
279 | | |
280 | 0 | void DataDir::find_tablet_in_trash(int64_t tablet_id, std::vector<std::string>* paths) { |
281 | | // path: /root_path/trash/time_label/tablet_id/schema_hash |
282 | 0 | auto trash_path = fmt::format("{}/{}", _path, TRASH_PREFIX); |
283 | 0 | bool exists = true; |
284 | 0 | std::vector<io::FileInfo> sub_dirs; |
285 | 0 | Status st = io::global_local_filesystem()->list(trash_path, false, &sub_dirs, &exists); |
286 | 0 | if (!st) { |
287 | 0 | return; |
288 | 0 | } |
289 | | |
290 | 0 | for (auto& sub_dir : sub_dirs) { |
291 | | // sub dir is time_label |
292 | 0 | if (sub_dir.is_file) { |
293 | 0 | continue; |
294 | 0 | } |
295 | 0 | auto sub_path = fmt::format("{}/{}", trash_path, sub_dir.file_name); |
296 | 0 | auto tablet_path = fmt::format("{}/{}", sub_path, tablet_id); |
297 | 0 | st = io::global_local_filesystem()->exists(tablet_path, &exists); |
298 | 0 | if (st && exists) { |
299 | 0 | paths->emplace_back(std::move(tablet_path)); |
300 | 0 | } |
301 | 0 | } |
302 | 0 | } |
303 | | |
304 | | std::string DataDir::get_root_path_from_schema_hash_path_in_trash( |
305 | 0 | const std::string& schema_hash_dir_in_trash) { |
306 | 0 | return io::Path(schema_hash_dir_in_trash) |
307 | 0 | .parent_path() |
308 | 0 | .parent_path() |
309 | 0 | .parent_path() |
310 | 0 | .parent_path() |
311 | 0 | .string(); |
312 | 0 | } |
313 | | |
314 | 41 | Status DataDir::_check_incompatible_old_format_tablet() { |
315 | 41 | auto check_incompatible_old_func = [](int64_t tablet_id, int32_t schema_hash, |
316 | 41 | std::string_view value) -> bool { |
317 | | // if strict check incompatible old format, then log fatal |
318 | 0 | if (config::storage_strict_check_incompatible_old_format) { |
319 | 0 | LOG(FATAL) |
320 | 0 | << "There are incompatible old format metas, current version does not support " |
321 | 0 | << "and it may lead to data missing!!! " |
322 | 0 | << "tablet_id = " << tablet_id << " schema_hash = " << schema_hash; |
323 | 0 | } else { |
324 | 0 | LOG(WARNING) |
325 | 0 | << "There are incompatible old format metas, current version does not support " |
326 | 0 | << "and it may lead to data missing!!! " |
327 | 0 | << "tablet_id = " << tablet_id << " schema_hash = " << schema_hash; |
328 | 0 | } |
329 | 0 | return false; |
330 | 0 | }; |
331 | | |
332 | | // seek old header prefix. when check_incompatible_old_func is called, it has old format in olap_meta |
333 | 41 | Status check_incompatible_old_status = TabletMetaManager::traverse_headers( |
334 | 41 | _meta, check_incompatible_old_func, OLD_HEADER_PREFIX); |
335 | 41 | if (!check_incompatible_old_status) { |
336 | 0 | LOG(WARNING) << "check incompatible old format meta fails, it may lead to data missing!!! " |
337 | 0 | << _path; |
338 | 41 | } else { |
339 | 41 | LOG(INFO) << "successfully check incompatible old format meta " << _path; |
340 | 41 | } |
341 | 41 | return check_incompatible_old_status; |
342 | 41 | } |
343 | | |
344 | | // TODO(ygl): deal with rowsets and tablets when load failed |
345 | 41 | Status DataDir::load() { |
346 | 41 | LOG(INFO) << "start to load tablets from " << _path; |
347 | | |
348 | | // load rowset meta from meta env and create rowset |
349 | | // COMMITTED: add to txn manager |
350 | | // VISIBLE: add to tablet |
351 | | // if one rowset load failed, then the total data dir will not be loaded |
352 | | |
353 | | // necessarily check incompatible old format. when there are old metas, it may load to data missing |
354 | 41 | RETURN_IF_ERROR(_check_incompatible_old_format_tablet()); |
355 | | |
356 | 41 | std::vector<RowsetMetaSharedPtr> dir_rowset_metas; |
357 | 41 | LOG(INFO) << "begin loading rowset from meta"; |
358 | 41 | auto load_rowset_func = [&dir_rowset_metas, this](TabletUid tablet_uid, RowsetId rowset_id, |
359 | 41 | std::string_view meta_str) -> bool { |
360 | 0 | RowsetMetaSharedPtr rowset_meta(new RowsetMeta()); |
361 | 0 | bool parsed = rowset_meta->init(meta_str); |
362 | 0 | if (!parsed) { |
363 | 0 | LOG(WARNING) << "parse rowset meta string failed for rowset_id:" << rowset_id; |
364 | | // return false will break meta iterator, return true to skip this error |
365 | 0 | return true; |
366 | 0 | } |
367 | | |
368 | 0 | if (rowset_meta->has_delete_predicate()) { |
369 | | // copy the delete sub pred v1 to check then |
370 | 0 | auto orig_delete_sub_pred = rowset_meta->delete_predicate().sub_predicates(); |
371 | 0 | auto* delete_pred = rowset_meta->mutable_delete_pred_pb(); |
372 | |
|
373 | 0 | if ((!delete_pred->sub_predicates().empty() && |
374 | 0 | delete_pred->sub_predicates_v2().empty()) || |
375 | 0 | (!delete_pred->in_predicates().empty() && |
376 | 0 | delete_pred->in_predicates()[0].has_column_unique_id())) { |
377 | | // convert pred and write only when delete sub pred v2 is not set or there is in list pred to be set column uid |
378 | 0 | RETURN_IF_ERROR(DeleteHandler::convert_to_sub_pred_v2( |
379 | 0 | delete_pred, rowset_meta->tablet_schema())); |
380 | 0 | LOG(INFO) << fmt::format( |
381 | 0 | "convert rowset with old delete pred: rowset_id={}, tablet_id={}", |
382 | 0 | rowset_id.to_string(), tablet_uid.to_string()); |
383 | 0 | CHECK_EQ(orig_delete_sub_pred.size(), delete_pred->sub_predicates().size()) |
384 | 0 | << "inconsistent sub predicate v1 after conversion"; |
385 | 0 | for (size_t i = 0; i < orig_delete_sub_pred.size(); ++i) { |
386 | 0 | CHECK_STREQ(orig_delete_sub_pred.Get(i).c_str(), |
387 | 0 | delete_pred->sub_predicates().Get(i).c_str()) |
388 | 0 | << "inconsistent sub predicate v1 after conversion"; |
389 | 0 | } |
390 | 0 | std::string result; |
391 | 0 | rowset_meta->serialize(&result); |
392 | 0 | std::string key = |
393 | 0 | ROWSET_PREFIX + tablet_uid.to_string() + "_" + rowset_id.to_string(); |
394 | 0 | RETURN_IF_ERROR(_meta->put(META_COLUMN_FAMILY_INDEX, key, result)); |
395 | 0 | } |
396 | 0 | } |
397 | | |
398 | 0 | if (rowset_meta->partition_id() == 0) { |
399 | 0 | LOG(WARNING) << "rs tablet=" << rowset_meta->tablet_id() << " rowset_id=" << rowset_id |
400 | 0 | << " load from meta but partition id eq 0"; |
401 | 0 | } |
402 | |
|
403 | 0 | dir_rowset_metas.push_back(rowset_meta); |
404 | 0 | return true; |
405 | 0 | }; |
406 | 41 | Status load_rowset_status = RowsetMetaManager::traverse_rowset_metas(_meta, load_rowset_func); |
407 | | |
408 | 41 | if (!load_rowset_status) { |
409 | 0 | LOG(WARNING) << "errors when load rowset meta from meta env, skip this data dir:" << _path; |
410 | 41 | } else { |
411 | 41 | LOG(INFO) << "load rowset from meta finished, data dir: " << _path; |
412 | 41 | } |
413 | | |
414 | | // load tablet |
415 | | // create tablet from tablet meta and add it to tablet mgr |
416 | 41 | LOG(INFO) << "begin loading tablet from meta"; |
417 | 41 | std::set<int64_t> tablet_ids; |
418 | 41 | std::set<int64_t> failed_tablet_ids; |
419 | 41 | auto load_tablet_func = [this, &tablet_ids, &failed_tablet_ids]( |
420 | 41 | int64_t tablet_id, int32_t schema_hash, |
421 | 41 | std::string_view value) -> bool { |
422 | 0 | Status status = _engine.tablet_manager()->load_tablet_from_meta( |
423 | 0 | this, tablet_id, schema_hash, value, false, false, false, false); |
424 | 0 | if (!status.ok() && !status.is<TABLE_ALREADY_DELETED_ERROR>() && |
425 | 0 | !status.is<ENGINE_INSERT_OLD_TABLET>()) { |
426 | | // load_tablet_from_meta() may return Status::Error<TABLE_ALREADY_DELETED_ERROR>() |
427 | | // which means the tablet status is DELETED |
428 | | // This may happen when the tablet was just deleted before the BE restarted, |
429 | | // but it has not been cleared from rocksdb. At this time, restarting the BE |
430 | | // will read the tablet in the DELETE state from rocksdb. These tablets have been |
431 | | // added to the garbage collection queue and will be automatically deleted afterwards. |
432 | | // Therefore, we believe that this situation is not a failure. |
433 | | |
434 | | // Besides, load_tablet_from_meta() may return Status::Error<ENGINE_INSERT_OLD_TABLET>() |
435 | | // when BE is restarting and the older tablet have been added to the |
436 | | // garbage collection queue but not deleted yet. |
437 | | // In this case, since the data_dirs are parallel loaded, a later loaded tablet |
438 | | // may be older than previously loaded one, which should not be acknowledged as a |
439 | | // failure. |
440 | 0 | LOG(WARNING) << "load tablet from header failed. status:" << status |
441 | 0 | << ", tablet=" << tablet_id << "." << schema_hash; |
442 | 0 | failed_tablet_ids.insert(tablet_id); |
443 | 0 | } else { |
444 | 0 | tablet_ids.insert(tablet_id); |
445 | 0 | } |
446 | 0 | return true; |
447 | 0 | }; |
448 | 41 | Status load_tablet_status = TabletMetaManager::traverse_headers(_meta, load_tablet_func); |
449 | 41 | if (!failed_tablet_ids.empty()) { |
450 | 0 | LOG(WARNING) << "load tablets from header failed" |
451 | 0 | << ", loaded tablet: " << tablet_ids.size() |
452 | 0 | << ", error tablet: " << failed_tablet_ids.size() << ", path: " << _path; |
453 | 0 | if (!config::ignore_load_tablet_failure) { |
454 | 0 | LOG(FATAL) << "load tablets encounter failure. stop BE process. path: " << _path; |
455 | 0 | } |
456 | 0 | } |
457 | 41 | if (!load_tablet_status) { |
458 | 0 | LOG(WARNING) << "there is failure when loading tablet headers" |
459 | 0 | << ", loaded tablet: " << tablet_ids.size() |
460 | 0 | << ", error tablet: " << failed_tablet_ids.size() << ", path: " << _path; |
461 | 41 | } else { |
462 | 41 | LOG(INFO) << "load tablet from meta finished" |
463 | 41 | << ", loaded tablet: " << tablet_ids.size() |
464 | 41 | << ", error tablet: " << failed_tablet_ids.size() << ", path: " << _path; |
465 | 41 | } |
466 | | |
467 | 41 | for (int64_t tablet_id : tablet_ids) { |
468 | 0 | TabletSharedPtr tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
469 | 0 | if (tablet && tablet->set_tablet_schema_into_rowset_meta()) { |
470 | 0 | RETURN_IF_ERROR(TabletMetaManager::save(this, tablet->tablet_id(), |
471 | 0 | tablet->schema_hash(), tablet->tablet_meta())); |
472 | 0 | } |
473 | 0 | } |
474 | | |
475 | 41 | auto load_pending_publish_info_func = |
476 | 41 | [&engine = _engine](int64_t tablet_id, int64_t publish_version, std::string_view info) { |
477 | 0 | PendingPublishInfoPB pending_publish_info_pb; |
478 | 0 | bool parsed = pending_publish_info_pb.ParseFromArray(info.data(), info.size()); |
479 | 0 | if (!parsed) { |
480 | 0 | LOG(WARNING) << "parse pending publish info failed, tablet_id: " << tablet_id |
481 | 0 | << " publish_version: " << publish_version; |
482 | 0 | } |
483 | 0 | engine.add_async_publish_task(pending_publish_info_pb.partition_id(), tablet_id, |
484 | 0 | publish_version, |
485 | 0 | pending_publish_info_pb.transaction_id(), true); |
486 | 0 | return true; |
487 | 0 | }; |
488 | 41 | RETURN_IF_ERROR( |
489 | 41 | TabletMetaManager::traverse_pending_publish(_meta, load_pending_publish_info_func)); |
490 | | |
491 | 41 | int64_t rowset_partition_id_eq_0_num = 0; |
492 | 41 | for (auto rowset_meta : dir_rowset_metas) { |
493 | 0 | if (rowset_meta->partition_id() == 0) { |
494 | 0 | ++rowset_partition_id_eq_0_num; |
495 | 0 | } |
496 | 0 | } |
497 | 41 | if (rowset_partition_id_eq_0_num > config::ignore_invalid_partition_id_rowset_num) { |
498 | 0 | LOG(FATAL) << fmt::format( |
499 | 0 | "roswet partition id eq 0 is {} bigger than config {}, be exit, plz check be.INFO", |
500 | 0 | rowset_partition_id_eq_0_num, config::ignore_invalid_partition_id_rowset_num); |
501 | 0 | exit(-1); |
502 | 0 | } |
503 | | |
504 | | // traverse rowset |
505 | | // 1. add committed rowset to txn map |
506 | | // 2. add visible rowset to tablet |
507 | | // ignore any errors when load tablet or rowset, because fe will repair them after report |
508 | 41 | int64_t invalid_rowset_counter = 0; |
509 | 41 | for (auto&& rowset_meta : dir_rowset_metas) { |
510 | 0 | TabletSharedPtr tablet = _engine.tablet_manager()->get_tablet(rowset_meta->tablet_id()); |
511 | | // tablet maybe dropped, but not drop related rowset meta |
512 | 0 | if (tablet == nullptr) { |
513 | 0 | VLOG_NOTICE << "could not find tablet id: " << rowset_meta->tablet_id() |
514 | 0 | << ", schema hash: " << rowset_meta->tablet_schema_hash() |
515 | 0 | << ", for rowset: " << rowset_meta->rowset_id() << ", skip this rowset"; |
516 | 0 | ++invalid_rowset_counter; |
517 | 0 | continue; |
518 | 0 | } |
519 | | |
520 | 0 | if (rowset_meta->partition_id() == 0) { |
521 | 0 | LOG(WARNING) << "skip tablet_id=" << tablet->tablet_id() |
522 | 0 | << " rowset: " << rowset_meta->rowset_id() |
523 | 0 | << " txn: " << rowset_meta->txn_id(); |
524 | 0 | continue; |
525 | 0 | } |
526 | | |
527 | 0 | RowsetSharedPtr rowset; |
528 | 0 | Status create_status = tablet->create_rowset(rowset_meta, &rowset); |
529 | 0 | if (!create_status) { |
530 | 0 | LOG(WARNING) << "could not create rowset from rowsetmeta: " |
531 | 0 | << " rowset_id: " << rowset_meta->rowset_id() |
532 | 0 | << " rowset_type: " << rowset_meta->rowset_type() |
533 | 0 | << " rowset_state: " << rowset_meta->rowset_state(); |
534 | 0 | continue; |
535 | 0 | } |
536 | 0 | if (rowset_meta->rowset_state() == RowsetStatePB::COMMITTED && |
537 | 0 | rowset_meta->tablet_uid() == tablet->tablet_uid()) { |
538 | 0 | if (!rowset_meta->tablet_schema()) { |
539 | 0 | rowset_meta->set_tablet_schema(tablet->tablet_schema()); |
540 | 0 | RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), |
541 | 0 | rowset_meta->rowset_id(), |
542 | 0 | rowset_meta->get_rowset_pb(), false)); |
543 | 0 | } |
544 | 0 | Status commit_txn_status = _engine.txn_manager()->commit_txn( |
545 | 0 | _meta, rowset_meta->partition_id(), rowset_meta->txn_id(), |
546 | 0 | rowset_meta->tablet_id(), rowset_meta->tablet_uid(), rowset_meta->load_id(), |
547 | 0 | rowset, _engine.pending_local_rowsets().add(rowset_meta->rowset_id()), true); |
548 | 0 | if (commit_txn_status || commit_txn_status.is<PUSH_TRANSACTION_ALREADY_EXIST>()) { |
549 | 0 | LOG(INFO) << "successfully to add committed rowset: " << rowset_meta->rowset_id() |
550 | 0 | << " to tablet: " << rowset_meta->tablet_id() |
551 | 0 | << " schema hash: " << rowset_meta->tablet_schema_hash() |
552 | 0 | << " for txn: " << rowset_meta->txn_id(); |
553 | |
|
554 | 0 | } else if (commit_txn_status.is<ErrorCode::INTERNAL_ERROR>()) { |
555 | 0 | LOG(WARNING) << "failed to add committed rowset: " << rowset_meta->rowset_id() |
556 | 0 | << " to tablet: " << rowset_meta->tablet_id() |
557 | 0 | << " for txn: " << rowset_meta->txn_id() |
558 | 0 | << " error: " << commit_txn_status; |
559 | 0 | return commit_txn_status; |
560 | 0 | } else { |
561 | 0 | LOG(WARNING) << "failed to add committed rowset: " << rowset_meta->rowset_id() |
562 | 0 | << " to tablet: " << rowset_meta->tablet_id() |
563 | 0 | << " for txn: " << rowset_meta->txn_id() |
564 | 0 | << " error: " << commit_txn_status; |
565 | 0 | } |
566 | 0 | } else if (rowset_meta->rowset_state() == RowsetStatePB::VISIBLE && |
567 | 0 | rowset_meta->tablet_uid() == tablet->tablet_uid()) { |
568 | 0 | if (!rowset_meta->tablet_schema()) { |
569 | 0 | rowset_meta->set_tablet_schema(tablet->tablet_schema()); |
570 | 0 | RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), |
571 | 0 | rowset_meta->rowset_id(), |
572 | 0 | rowset_meta->get_rowset_pb(), false)); |
573 | 0 | } |
574 | 0 | Status publish_status = tablet->add_rowset(rowset); |
575 | 0 | if (!publish_status && !publish_status.is<PUSH_VERSION_ALREADY_EXIST>()) { |
576 | 0 | LOG(WARNING) << "add visible rowset to tablet failed rowset_id:" |
577 | 0 | << rowset->rowset_id() << " tablet id: " << rowset_meta->tablet_id() |
578 | 0 | << " txn id:" << rowset_meta->txn_id() |
579 | 0 | << " start_version: " << rowset_meta->version().first |
580 | 0 | << " end_version: " << rowset_meta->version().second; |
581 | 0 | } |
582 | 0 | } else { |
583 | 0 | LOG(WARNING) << "find invalid rowset: " << rowset_meta->rowset_id() |
584 | 0 | << " with tablet id: " << rowset_meta->tablet_id() |
585 | 0 | << " tablet uid: " << rowset_meta->tablet_uid() |
586 | 0 | << " schema hash: " << rowset_meta->tablet_schema_hash() |
587 | 0 | << " txn: " << rowset_meta->txn_id() |
588 | 0 | << " current valid tablet uid: " << tablet->tablet_uid(); |
589 | 0 | ++invalid_rowset_counter; |
590 | 0 | } |
591 | 0 | } |
592 | | |
593 | 41 | auto load_delete_bitmap_func = [this](int64_t tablet_id, int64_t version, |
594 | 41 | std::string_view val) { |
595 | 0 | TabletSharedPtr tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
596 | 0 | if (!tablet) { |
597 | 0 | return true; |
598 | 0 | } |
599 | 0 | const std::vector<RowsetMetaSharedPtr>& all_rowsets = tablet->tablet_meta()->all_rs_metas(); |
600 | 0 | RowsetIdUnorderedSet rowset_ids; |
601 | 0 | for (auto& rowset_meta : all_rowsets) { |
602 | 0 | rowset_ids.insert(rowset_meta->rowset_id()); |
603 | 0 | } |
604 | |
|
605 | 0 | DeleteBitmapPB delete_bitmap_pb; |
606 | 0 | delete_bitmap_pb.ParseFromArray(val.data(), val.size()); |
607 | 0 | int rst_ids_size = delete_bitmap_pb.rowset_ids_size(); |
608 | 0 | int seg_ids_size = delete_bitmap_pb.segment_ids_size(); |
609 | 0 | int seg_maps_size = delete_bitmap_pb.segment_delete_bitmaps_size(); |
610 | 0 | CHECK(rst_ids_size == seg_ids_size && seg_ids_size == seg_maps_size); |
611 | |
|
612 | 0 | for (size_t i = 0; i < rst_ids_size; ++i) { |
613 | 0 | RowsetId rst_id; |
614 | 0 | rst_id.init(delete_bitmap_pb.rowset_ids(i)); |
615 | | // only process the rowset in _rs_metas |
616 | 0 | if (rowset_ids.find(rst_id) == rowset_ids.end()) { |
617 | 0 | continue; |
618 | 0 | } |
619 | 0 | auto seg_id = delete_bitmap_pb.segment_ids(i); |
620 | 0 | auto iter = tablet->tablet_meta()->delete_bitmap().delete_bitmap.find( |
621 | 0 | {rst_id, seg_id, version}); |
622 | | // This version of delete bitmap already exists |
623 | 0 | if (iter != tablet->tablet_meta()->delete_bitmap().delete_bitmap.end()) { |
624 | 0 | continue; |
625 | 0 | } |
626 | 0 | auto bitmap = delete_bitmap_pb.segment_delete_bitmaps(i).data(); |
627 | 0 | tablet->tablet_meta()->delete_bitmap().delete_bitmap[{rst_id, seg_id, version}] = |
628 | 0 | roaring::Roaring::read(bitmap); |
629 | 0 | } |
630 | 0 | return true; |
631 | 0 | }; |
632 | 41 | RETURN_IF_ERROR(TabletMetaManager::traverse_delete_bitmap(_meta, load_delete_bitmap_func)); |
633 | | |
634 | | // At startup, we only count these invalid rowset, but do not actually delete it. |
635 | | // The actual delete operation is in StorageEngine::_clean_unused_rowset_metas, |
636 | | // which is cleaned up uniformly by the background cleanup thread. |
637 | 41 | LOG(INFO) << "finish to load tablets from " << _path |
638 | 41 | << ", total rowset meta: " << dir_rowset_metas.size() |
639 | 41 | << ", invalid rowset num: " << invalid_rowset_counter; |
640 | | |
641 | 41 | return Status::OK(); |
642 | 41 | } |
643 | | |
644 | | // gc unused local tablet dir |
645 | 20 | void DataDir::_perform_tablet_gc(const std::string& tablet_schema_hash_path, int16_t shard_id) { |
646 | 20 | if (_stop_bg_worker) { |
647 | 0 | return; |
648 | 0 | } |
649 | | |
650 | 20 | TTabletId tablet_id = -1; |
651 | 20 | TSchemaHash schema_hash = -1; |
652 | 20 | bool is_valid = TabletManager::get_tablet_id_and_schema_hash_from_path( |
653 | 20 | tablet_schema_hash_path, &tablet_id, &schema_hash); |
654 | 20 | if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { |
655 | 0 | LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path; |
656 | 0 | return; |
657 | 0 | } |
658 | | |
659 | 20 | auto tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
660 | 20 | if (!tablet || tablet->data_dir() != this) { |
661 | 10 | if (tablet) { |
662 | 0 | LOG(INFO) << "The tablet in path " << tablet_schema_hash_path |
663 | 0 | << " is not same with the running one: " << tablet->tablet_path() |
664 | 0 | << ", might be the old tablet after migration, try to move it to trash"; |
665 | 0 | } |
666 | 10 | _engine.tablet_manager()->try_delete_unused_tablet_path(this, tablet_id, schema_hash, |
667 | 10 | tablet_schema_hash_path, shard_id); |
668 | 10 | return; |
669 | 10 | } |
670 | | |
671 | 10 | _perform_rowset_gc(tablet_schema_hash_path); |
672 | 10 | } |
673 | | |
674 | | // gc unused local rowsets under tablet dir |
675 | 10 | void DataDir::_perform_rowset_gc(const std::string& tablet_schema_hash_path) { |
676 | 10 | if (_stop_bg_worker) { |
677 | 0 | return; |
678 | 0 | } |
679 | | |
680 | 10 | TTabletId tablet_id = -1; |
681 | 10 | TSchemaHash schema_hash = -1; |
682 | 10 | bool is_valid = doris::TabletManager::get_tablet_id_and_schema_hash_from_path( |
683 | 10 | tablet_schema_hash_path, &tablet_id, &schema_hash); |
684 | 10 | if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { |
685 | 0 | LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path; |
686 | 0 | return; |
687 | 0 | } |
688 | | |
689 | 10 | auto tablet = _engine.tablet_manager()->get_tablet(tablet_id); |
690 | 10 | if (!tablet) { |
691 | | // Could not found the tablet, maybe it's a dropped tablet, will be reclaimed |
692 | | // in the next time `_perform_path_gc_by_tablet` |
693 | 0 | return; |
694 | 0 | } |
695 | | |
696 | 10 | if (tablet->data_dir() != this) { |
697 | | // Current running tablet is not in same data_dir, maybe it's a tablet after migration, |
698 | | // will be reclaimed in the next time `_perform_path_gc_by_tablet` |
699 | 0 | return; |
700 | 0 | } |
701 | | |
702 | 10 | bool exists; |
703 | 10 | std::vector<io::FileInfo> files; |
704 | 10 | auto st = io::global_local_filesystem()->list(tablet_schema_hash_path, true, &files, &exists); |
705 | 10 | if (!st.ok()) [[unlikely]] { |
706 | 0 | LOG(WARNING) << "[path gc] fail to list tablet path " << tablet_schema_hash_path << " : " |
707 | 0 | << st; |
708 | 0 | return; |
709 | 0 | } |
710 | | |
711 | | // Rowset files excluding pending rowsets |
712 | 10 | std::vector<std::pair<RowsetId, std::string /* filename */>> rowsets_not_pending; |
713 | 480 | for (auto&& file : files) { |
714 | 480 | auto rowset_id = extract_rowset_id(file.file_name); |
715 | 480 | if (rowset_id.hi == 0) { |
716 | 0 | continue; // Not a rowset |
717 | 0 | } |
718 | | |
719 | 480 | if (_engine.pending_local_rowsets().contains(rowset_id)) { |
720 | 80 | continue; // Pending rowset file |
721 | 80 | } |
722 | | |
723 | 400 | rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name)); |
724 | 400 | } |
725 | | |
726 | 10 | RowsetIdUnorderedSet rowsets_in_version_map; |
727 | 10 | tablet->traverse_rowsets( |
728 | 30 | [&rowsets_in_version_map](auto& rs) { rowsets_in_version_map.insert(rs->rowset_id()); }, |
729 | 10 | true); |
730 | | |
731 | 80 | auto reclaim_rowset_file = [](const std::string& path) { |
732 | 80 | auto st = io::global_local_filesystem()->delete_file(path); |
733 | 80 | if (!st.ok()) [[unlikely]] { |
734 | 0 | LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " << st; |
735 | 0 | return; |
736 | 0 | } |
737 | 80 | LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log |
738 | 80 | }; |
739 | | |
740 | 100 | auto should_reclaim = [&, this](const RowsetId& rowset_id) { |
741 | 100 | return !rowsets_in_version_map.contains(rowset_id) && |
742 | 100 | !_engine.check_rowset_id_in_unused_rowsets(rowset_id) && |
743 | 100 | RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), rowset_id) |
744 | 40 | .is<META_KEY_NOT_FOUND>(); |
745 | 100 | }; |
746 | | |
747 | | // rowset_id -> is_garbage |
748 | 10 | std::unordered_map<RowsetId, bool> checked_rowsets; |
749 | 400 | for (auto&& [rowset_id, filename] : rowsets_not_pending) { |
750 | 400 | if (_stop_bg_worker) { |
751 | 0 | return; |
752 | 0 | } |
753 | | |
754 | 400 | if (auto it = checked_rowsets.find(rowset_id); it != checked_rowsets.end()) { |
755 | 300 | if (it->second) { // Is checked garbage rowset |
756 | 60 | reclaim_rowset_file(tablet_schema_hash_path + '/' + filename); |
757 | 60 | } |
758 | 300 | continue; |
759 | 300 | } |
760 | | |
761 | 100 | if (should_reclaim(rowset_id)) { |
762 | 20 | if (config::path_gc_check_step > 0 && |
763 | 20 | ++_path_gc_step % config::path_gc_check_step == 0) { |
764 | 0 | std::this_thread::sleep_for( |
765 | 0 | std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); |
766 | 0 | } |
767 | 20 | reclaim_rowset_file(tablet_schema_hash_path + '/' + filename); |
768 | 20 | checked_rowsets.emplace(rowset_id, true); |
769 | 80 | } else { |
770 | 80 | checked_rowsets.emplace(rowset_id, false); |
771 | 80 | } |
772 | 100 | } |
773 | 10 | } |
774 | | |
775 | 1 | void DataDir::perform_path_gc() { |
776 | 1 | if (_stop_bg_worker) { |
777 | 0 | return; |
778 | 0 | } |
779 | | |
780 | 1 | LOG(INFO) << "start to gc data dir " << _path; |
781 | 1 | auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX); |
782 | 1 | std::vector<io::FileInfo> shards; |
783 | 1 | bool exists = true; |
784 | 1 | const auto& fs = io::global_local_filesystem(); |
785 | 1 | auto st = fs->list(data_path, false, &shards, &exists); |
786 | 1 | if (!st.ok()) [[unlikely]] { |
787 | 0 | LOG(WARNING) << "failed to scan data dir: " << st; |
788 | 0 | return; |
789 | 0 | } |
790 | | |
791 | 4 | for (const auto& shard : shards) { |
792 | 4 | if (_stop_bg_worker) { |
793 | 0 | break; |
794 | 0 | } |
795 | | |
796 | 4 | if (shard.is_file) { |
797 | 0 | continue; |
798 | 0 | } |
799 | | |
800 | 4 | auto shard_path = fmt::format("{}/{}", data_path, shard.file_name); |
801 | 4 | std::vector<io::FileInfo> tablet_ids; |
802 | 4 | st = io::global_local_filesystem()->list(shard_path, false, &tablet_ids, &exists); |
803 | 4 | if (!st.ok()) [[unlikely]] { |
804 | 0 | LOG(WARNING) << "fail to walk dir, shard_path=" << shard_path << " : " << st; |
805 | 0 | continue; |
806 | 0 | } |
807 | | |
808 | 20 | for (const auto& tablet_id : tablet_ids) { |
809 | 20 | if (_stop_bg_worker) { |
810 | 0 | break; |
811 | 0 | } |
812 | | |
813 | 20 | if (tablet_id.is_file) { |
814 | 0 | continue; |
815 | 0 | } |
816 | | |
817 | 20 | auto tablet_id_path = fmt::format("{}/{}", shard_path, tablet_id.file_name); |
818 | 20 | std::vector<io::FileInfo> schema_hashes; |
819 | 20 | st = fs->list(tablet_id_path, false, &schema_hashes, &exists); |
820 | 20 | if (!st.ok()) [[unlikely]] { |
821 | 0 | LOG(WARNING) << "fail to walk dir, tablet_id_path=" << tablet_id_path << " : " |
822 | 0 | << st; |
823 | 0 | continue; |
824 | 0 | } |
825 | | |
826 | 20 | for (auto&& schema_hash : schema_hashes) { |
827 | 20 | if (schema_hash.is_file) { |
828 | 0 | continue; |
829 | 0 | } |
830 | | |
831 | 20 | if (config::path_gc_check_step > 0 && |
832 | 20 | ++_path_gc_step % config::path_gc_check_step == 0) { |
833 | 0 | std::this_thread::sleep_for( |
834 | 0 | std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); |
835 | 0 | } |
836 | 20 | int16_t shard_id = -1; |
837 | 20 | try { |
838 | 20 | shard_id = std::stoi(shard.file_name); |
839 | 20 | } catch (const std::exception&) { |
840 | 0 | LOG(WARNING) << "failed to stoi shard_id, shard name=" << shard.file_name; |
841 | 0 | continue; |
842 | 0 | } |
843 | 20 | _perform_tablet_gc(tablet_id_path + '/' + schema_hash.file_name, shard_id); |
844 | 20 | } |
845 | 20 | } |
846 | 4 | } |
847 | | |
848 | 1 | LOG(INFO) << "gc data dir path: " << _path << " finished"; |
849 | 1 | } |
850 | | |
851 | 95 | Status DataDir::update_capacity() { |
852 | 95 | RETURN_IF_ERROR(io::global_local_filesystem()->get_space_info(_path, &_disk_capacity_bytes, |
853 | 95 | &_available_bytes)); |
854 | 84 | disks_total_capacity->set_value(_disk_capacity_bytes); |
855 | 84 | disks_avail_capacity->set_value(_available_bytes); |
856 | 84 | LOG(INFO) << "path: " << _path << " total capacity: " << _disk_capacity_bytes |
857 | 84 | << ", available capacity: " << _available_bytes << ", usage: " << get_usage(0) |
858 | 84 | << ", in_use: " << is_used(); |
859 | | |
860 | 84 | return Status::OK(); |
861 | 95 | } |
862 | | |
863 | 0 | void DataDir::update_trash_capacity() { |
864 | 0 | auto trash_path = fmt::format("{}/{}", _path, TRASH_PREFIX); |
865 | 0 | try { |
866 | 0 | _trash_used_bytes = _engine.get_file_or_directory_size(trash_path); |
867 | 0 | } catch (const std::filesystem::filesystem_error& e) { |
868 | 0 | LOG(WARNING) << "update trash capacity failed, path: " << _path << ", err: " << e.what(); |
869 | 0 | return; |
870 | 0 | } |
871 | 0 | disks_trash_used_capacity->set_value(_trash_used_bytes); |
872 | 0 | LOG(INFO) << "path: " << _path << " trash capacity: " << _trash_used_bytes; |
873 | 0 | } |
874 | | |
875 | 0 | void DataDir::update_local_data_size(int64_t size) { |
876 | 0 | disks_local_used_capacity->set_value(size); |
877 | 0 | } |
878 | | |
879 | 0 | void DataDir::update_remote_data_size(int64_t size) { |
880 | 0 | disks_remote_used_capacity->set_value(size); |
881 | 0 | } |
882 | | |
883 | 0 | size_t DataDir::tablet_size() const { |
884 | 0 | std::lock_guard<std::mutex> l(_mutex); |
885 | 0 | return _tablet_set.size(); |
886 | 0 | } |
887 | | |
888 | 167 | bool DataDir::reach_capacity_limit(int64_t incoming_data_size) { |
889 | 167 | double used_pct = get_usage(incoming_data_size); |
890 | 167 | int64_t left_bytes = _available_bytes - incoming_data_size; |
891 | 167 | if (used_pct >= config::storage_flood_stage_usage_percent / 100.0 && |
892 | 167 | left_bytes <= config::storage_flood_stage_left_capacity_bytes) { |
893 | 0 | LOG(WARNING) << "reach capacity limit. used pct: " << used_pct |
894 | 0 | << ", left bytes: " << left_bytes << ", path: " << _path; |
895 | 0 | return true; |
896 | 0 | } |
897 | 167 | return false; |
898 | 167 | } |
899 | | |
900 | 0 | void DataDir::disks_compaction_score_increment(int64_t delta) { |
901 | 0 | disks_compaction_score->increment(delta); |
902 | 0 | } |
903 | | |
904 | 0 | void DataDir::disks_compaction_num_increment(int64_t delta) { |
905 | 0 | disks_compaction_num->increment(delta); |
906 | 0 | } |
907 | | |
908 | 33 | Status DataDir::move_to_trash(const std::string& tablet_path) { |
909 | 33 | if (config::trash_file_expire_time_sec <= 0) { |
910 | 33 | LOG(INFO) << "delete tablet dir " << tablet_path |
911 | 33 | << " directly due to trash_file_expire_time_sec is 0"; |
912 | 33 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_path)); |
913 | 33 | return delete_tablet_parent_path_if_empty(tablet_path); |
914 | 33 | } |
915 | | |
916 | 0 | Status res = Status::OK(); |
917 | | // 1. get timestamp string |
918 | 0 | string time_str; |
919 | 0 | if ((res = gen_timestamp_string(&time_str)) != Status::OK()) { |
920 | 0 | LOG(WARNING) << "failed to generate time_string when move file to trash.err code=" << res; |
921 | 0 | return res; |
922 | 0 | } |
923 | | |
924 | | // 2. generate new file path |
925 | | // a global counter to avoid file name duplication. |
926 | 0 | static std::atomic<uint64_t> delete_counter(0); |
927 | 0 | auto trash_root_path = |
928 | 0 | fmt::format("{}/{}/{}.{}", _path, TRASH_PREFIX, time_str, delete_counter++); |
929 | 0 | auto fs_tablet_path = io::Path(tablet_path); |
930 | 0 | auto trash_tablet_path = trash_root_path / |
931 | 0 | fs_tablet_path.parent_path().filename() /* tablet_id */ / |
932 | 0 | fs_tablet_path.filename() /* schema_hash */; |
933 | | |
934 | | // 3. create target dir, or the rename() function will fail. |
935 | 0 | auto trash_tablet_parent = trash_tablet_path.parent_path(); |
936 | | // create dir if not exists |
937 | 0 | bool exists = true; |
938 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(trash_tablet_parent, &exists)); |
939 | 0 | if (!exists) { |
940 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(trash_tablet_parent)); |
941 | 0 | } |
942 | | |
943 | | // 4. move tablet to trash |
944 | 0 | VLOG_NOTICE << "move file to trash. " << tablet_path << " -> " << trash_tablet_path; |
945 | 0 | if (rename(tablet_path.c_str(), trash_tablet_path.c_str()) < 0) { |
946 | 0 | return Status::Error<OS_ERROR>("move file to trash failed. file={}, target={}, err={}", |
947 | 0 | tablet_path, trash_tablet_path.native(), Errno::str()); |
948 | 0 | } |
949 | | |
950 | | // 5. check parent dir of source file, delete it when empty |
951 | 0 | RETURN_IF_ERROR(delete_tablet_parent_path_if_empty(tablet_path)); |
952 | | |
953 | 0 | return Status::OK(); |
954 | 0 | } |
955 | | |
956 | 33 | Status DataDir::delete_tablet_parent_path_if_empty(const std::string& tablet_path) { |
957 | 33 | auto fs_tablet_path = io::Path(tablet_path); |
958 | 33 | std::string source_parent_dir = fs_tablet_path.parent_path(); // tablet_id level |
959 | 33 | std::vector<io::FileInfo> sub_files; |
960 | 33 | bool exists = true; |
961 | 33 | RETURN_IF_ERROR( |
962 | 33 | io::global_local_filesystem()->list(source_parent_dir, false, &sub_files, &exists)); |
963 | 33 | if (sub_files.empty()) { |
964 | 33 | LOG(INFO) << "remove empty dir " << source_parent_dir; |
965 | | // no need to exam return status |
966 | 33 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(source_parent_dir)); |
967 | 33 | } |
968 | 33 | return Status::OK(); |
969 | 33 | } |
970 | | |
971 | 0 | void DataDir::perform_remote_rowset_gc() { |
972 | 0 | std::vector<std::pair<std::string, std::string>> gc_kvs; |
973 | 0 | auto traverse_remote_rowset_func = [&gc_kvs](std::string_view key, |
974 | 0 | std::string_view value) -> bool { |
975 | 0 | gc_kvs.emplace_back(key, value); |
976 | 0 | return true; |
977 | 0 | }; |
978 | 0 | static_cast<void>(_meta->iterate(META_COLUMN_FAMILY_INDEX, REMOTE_ROWSET_GC_PREFIX, |
979 | 0 | traverse_remote_rowset_func)); |
980 | 0 | std::vector<std::string> deleted_keys; |
981 | 0 | for (auto& [key, val] : gc_kvs) { |
982 | 0 | auto rowset_id = key.substr(REMOTE_ROWSET_GC_PREFIX.size()); |
983 | 0 | RemoteRowsetGcPB gc_pb; |
984 | 0 | if (!gc_pb.ParseFromString(val)) { |
985 | 0 | LOG(WARNING) << "malformed RemoteRowsetGcPB. rowset_id=" << rowset_id; |
986 | 0 | deleted_keys.push_back(std::move(key)); |
987 | 0 | continue; |
988 | 0 | } |
989 | | |
990 | 0 | auto storage_resource = get_storage_resource(gc_pb.resource_id()); |
991 | 0 | if (!storage_resource) { |
992 | 0 | LOG(WARNING) << "Cannot get file system: " << gc_pb.resource_id(); |
993 | 0 | continue; |
994 | 0 | } |
995 | | |
996 | 0 | std::vector<io::Path> seg_paths; |
997 | 0 | seg_paths.reserve(gc_pb.num_segments()); |
998 | 0 | for (int i = 0; i < gc_pb.num_segments(); ++i) { |
999 | 0 | seg_paths.emplace_back( |
1000 | 0 | storage_resource->first.remote_segment_path(gc_pb.tablet_id(), rowset_id, i)); |
1001 | 0 | } |
1002 | |
|
1003 | 0 | auto& fs = storage_resource->first.fs; |
1004 | 0 | LOG(INFO) << "delete remote rowset. root_path=" << fs->root_path() |
1005 | 0 | << ", rowset_id=" << rowset_id; |
1006 | 0 | auto st = fs->batch_delete(seg_paths); |
1007 | 0 | if (st.ok()) { |
1008 | 0 | deleted_keys.push_back(std::move(key)); |
1009 | 0 | unused_remote_rowset_num << -1; |
1010 | 0 | } else { |
1011 | 0 | LOG(WARNING) << "failed to delete remote rowset. err=" << st; |
1012 | 0 | } |
1013 | 0 | } |
1014 | 0 | for (const auto& key : deleted_keys) { |
1015 | 0 | static_cast<void>(_meta->remove(META_COLUMN_FAMILY_INDEX, key)); |
1016 | 0 | } |
1017 | 0 | } |
1018 | | |
1019 | 0 | void DataDir::perform_remote_tablet_gc() { |
1020 | 0 | std::vector<std::pair<std::string, std::string>> tablet_gc_kvs; |
1021 | 0 | auto traverse_remote_tablet_func = [&tablet_gc_kvs](std::string_view key, |
1022 | 0 | std::string_view value) -> bool { |
1023 | 0 | tablet_gc_kvs.emplace_back(key, value); |
1024 | 0 | return true; |
1025 | 0 | }; |
1026 | 0 | static_cast<void>(_meta->iterate(META_COLUMN_FAMILY_INDEX, REMOTE_TABLET_GC_PREFIX, |
1027 | 0 | traverse_remote_tablet_func)); |
1028 | 0 | std::vector<std::string> deleted_keys; |
1029 | 0 | for (auto& [key, val] : tablet_gc_kvs) { |
1030 | 0 | auto tablet_id = key.substr(REMOTE_TABLET_GC_PREFIX.size()); |
1031 | 0 | RemoteTabletGcPB gc_pb; |
1032 | 0 | if (!gc_pb.ParseFromString(val)) { |
1033 | 0 | LOG(WARNING) << "malformed RemoteTabletGcPB. tablet_id=" << tablet_id; |
1034 | 0 | deleted_keys.push_back(std::move(key)); |
1035 | 0 | continue; |
1036 | 0 | } |
1037 | 0 | bool success = true; |
1038 | 0 | for (auto& resource_id : gc_pb.resource_ids()) { |
1039 | 0 | auto fs = get_filesystem(resource_id); |
1040 | 0 | if (!fs) { |
1041 | 0 | LOG(WARNING) << "could not get file system. resource_id=" << resource_id; |
1042 | 0 | success = false; |
1043 | 0 | continue; |
1044 | 0 | } |
1045 | 0 | LOG(INFO) << "delete remote rowsets of tablet. root_path=" << fs->root_path() |
1046 | 0 | << ", tablet_id=" << tablet_id; |
1047 | 0 | auto st = fs->delete_directory(DATA_PREFIX + '/' + tablet_id); |
1048 | 0 | if (!st.ok()) { |
1049 | 0 | LOG(WARNING) << "failed to delete all remote rowset in tablet. err=" << st; |
1050 | 0 | success = false; |
1051 | 0 | } |
1052 | 0 | } |
1053 | 0 | if (success) { |
1054 | 0 | deleted_keys.push_back(std::move(key)); |
1055 | 0 | } |
1056 | 0 | } |
1057 | 0 | for (const auto& key : deleted_keys) { |
1058 | 0 | static_cast<void>(_meta->remove(META_COLUMN_FAMILY_INDEX, key)); |
1059 | 0 | } |
1060 | 0 | } |
1061 | | |
1062 | | } // namespace doris |