/root/doris/be/src/olap/tablet_manager.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/tablet_manager.h" |
19 | | |
20 | | #include <fmt/format.h> |
21 | | #include <gen_cpp/AgentService_types.h> |
22 | | #include <gen_cpp/BackendService_types.h> |
23 | | #include <gen_cpp/Descriptors_types.h> |
24 | | #include <gen_cpp/MasterService_types.h> |
25 | | #include <gen_cpp/Types_types.h> |
26 | | #include <gen_cpp/olap_file.pb.h> |
27 | | #include <re2/re2.h> |
28 | | #include <unistd.h> |
29 | | |
30 | | #include <algorithm> |
31 | | #include <list> |
32 | | #include <mutex> |
33 | | #include <ostream> |
34 | | |
35 | | #include "bvar/bvar.h" |
36 | | #include "common/compiler_util.h" // IWYU pragma: keep |
37 | | #include "common/config.h" |
38 | | #include "common/logging.h" |
39 | | #include "gutil/integral_types.h" |
40 | | #include "gutil/strings/strcat.h" |
41 | | #include "gutil/strings/substitute.h" |
42 | | #include "io/fs/local_file_system.h" |
43 | | #include "olap/cumulative_compaction_time_series_policy.h" |
44 | | #include "olap/data_dir.h" |
45 | | #include "olap/olap_common.h" |
46 | | #include "olap/olap_define.h" |
47 | | #include "olap/olap_meta.h" |
48 | | #include "olap/pb_helper.h" |
49 | | #include "olap/rowset/beta_rowset.h" |
50 | | #include "olap/rowset/rowset.h" |
51 | | #include "olap/rowset/rowset_meta_manager.h" |
52 | | #include "olap/storage_engine.h" |
53 | | #include "olap/tablet.h" |
54 | | #include "olap/tablet_meta.h" |
55 | | #include "olap/tablet_meta_manager.h" |
56 | | #include "olap/tablet_schema.h" |
57 | | #include "olap/txn_manager.h" |
58 | | #include "runtime/exec_env.h" |
59 | | #include "runtime/memory/mem_tracker.h" |
60 | | #include "runtime/thread_context.h" |
61 | | #include "service/backend_options.h" |
62 | | #include "util/defer_op.h" |
63 | | #include "util/doris_metrics.h" |
64 | | #include "util/histogram.h" |
65 | | #include "util/metrics.h" |
66 | | #include "util/path_util.h" |
67 | | #include "util/scoped_cleanup.h" |
68 | | #include "util/stopwatch.hpp" |
69 | | #include "util/time.h" |
70 | | #include "util/trace.h" |
71 | | #include "util/uid_util.h" |
72 | | |
73 | | namespace doris { |
74 | | class CumulativeCompactionPolicy; |
75 | | } // namespace doris |
76 | | |
77 | | using std::map; |
78 | | using std::set; |
79 | | using std::string; |
80 | | using std::vector; |
81 | | |
82 | | namespace doris { |
83 | | using namespace ErrorCode; |
84 | | |
85 | | DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, MetricUnit::BYTES, "", |
86 | | mem_consumption, Labels({{"type", "tablet_meta"}})); |
87 | | |
88 | | bvar::Adder<int64_t> g_tablet_meta_schema_columns_count("tablet_meta_schema_columns_count"); |
89 | | |
90 | | TabletManager::TabletManager(StorageEngine& engine, int32_t tablet_map_lock_shard_size) |
91 | | : _engine(engine), |
92 | | _tablet_meta_mem_tracker(std::make_shared<MemTracker>( |
93 | | "TabletMeta(experimental)", ExecEnv::GetInstance()->details_mem_tracker_set())), |
94 | | _tablets_shards_size(tablet_map_lock_shard_size), |
95 | 118 | _tablets_shards_mask(tablet_map_lock_shard_size - 1) { |
96 | 118 | CHECK_GT(_tablets_shards_size, 0); |
97 | 118 | CHECK_EQ(_tablets_shards_size & _tablets_shards_mask, 0); |
98 | 118 | _tablets_shards.resize(_tablets_shards_size); |
99 | 118 | REGISTER_HOOK_METRIC(tablet_meta_mem_consumption, |
100 | 118 | [this]() { return _tablet_meta_mem_tracker->consumption(); }); |
101 | 118 | } |
102 | | |
103 | 116 | TabletManager::~TabletManager() { |
104 | 116 | DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); |
105 | 116 | } |
106 | | |
107 | | Status TabletManager::_add_tablet_unlocked(TTabletId tablet_id, const TabletSharedPtr& tablet, |
108 | 71 | bool update_meta, bool force, RuntimeProfile* profile) { |
109 | 71 | if (profile->get_counter("AddTablet") == nullptr) { |
110 | 2 | ADD_TIMER(profile, "AddTablet"); |
111 | 2 | } |
112 | 71 | Status res = Status::OK(); |
113 | 71 | VLOG_NOTICE << "begin to add tablet to TabletManager. " |
114 | 0 | << "tablet_id=" << tablet_id << ", force=" << force; |
115 | | |
116 | 71 | TabletSharedPtr existed_tablet = nullptr; |
117 | 71 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
118 | 71 | const auto& iter = tablet_map.find(tablet_id); |
119 | 71 | if (iter != tablet_map.end()) { |
120 | 2 | existed_tablet = iter->second; |
121 | 2 | } |
122 | | |
123 | 71 | if (existed_tablet == nullptr) { |
124 | 69 | return _add_tablet_to_map_unlocked(tablet_id, tablet, update_meta, false /*keep_files*/, |
125 | 69 | false /*drop_old*/, profile); |
126 | 69 | } |
127 | | // During restore process, the tablet is exist and snapshot loader will replace the tablet's rowsets |
128 | | // and then reload the tablet, the tablet's path will the same |
129 | 2 | if (!force) { |
130 | 2 | if (existed_tablet->tablet_path() == tablet->tablet_path()) { |
131 | 0 | return Status::Error<ENGINE_INSERT_EXISTS_TABLE>( |
132 | 0 | "add the same tablet twice! tablet_id={}, tablet_path={}", tablet_id, |
133 | 0 | tablet->tablet_path()); |
134 | 0 | } |
135 | 2 | if (existed_tablet->data_dir() == tablet->data_dir()) { |
136 | 0 | return Status::Error<ENGINE_INSERT_EXISTS_TABLE>( |
137 | 0 | "add tablet with same data dir twice! tablet_id={}", tablet_id); |
138 | 0 | } |
139 | 2 | } |
140 | | |
141 | 2 | MonotonicStopWatch watch; |
142 | 2 | watch.start(); |
143 | | |
144 | | // During storage migration, the tablet is moved to another disk, have to check |
145 | | // if the new tablet's rowset version is larger than the old one to prevent losting data during |
146 | | // migration |
147 | 2 | int64_t old_time, new_time; |
148 | 2 | int32_t old_version, new_version; |
149 | 2 | { |
150 | 2 | std::shared_lock rdlock(existed_tablet->get_header_lock()); |
151 | 2 | const RowsetSharedPtr old_rowset = existed_tablet->rowset_with_max_version(); |
152 | 2 | const RowsetSharedPtr new_rowset = tablet->rowset_with_max_version(); |
153 | | // If new tablet is empty, it is a newly created schema change tablet. |
154 | | // the old tablet is dropped before add tablet. it should not exist old tablet |
155 | 2 | if (new_rowset == nullptr) { |
156 | | // it seems useless to call unlock and return here. |
157 | | // it could prevent error when log level is changed in the future. |
158 | 0 | return Status::Error<ENGINE_INSERT_EXISTS_TABLE>( |
159 | 0 | "new tablet is empty and old tablet exists. it should not happen. tablet_id={}", |
160 | 0 | tablet_id); |
161 | 0 | } |
162 | 2 | old_time = old_rowset == nullptr ? -1 : old_rowset->creation_time(); |
163 | 2 | new_time = new_rowset->creation_time(); |
164 | 2 | old_version = old_rowset == nullptr ? -1 : old_rowset->end_version(); |
165 | 2 | new_version = new_rowset->end_version(); |
166 | 2 | } |
167 | 2 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "GetExistTabletVersion", "AddTablet"), |
168 | 2 | static_cast<int64_t>(watch.reset())); |
169 | | |
170 | | // In restore process, we replace all origin files in tablet dir with |
171 | | // the downloaded snapshot files. Then we try to reload tablet header. |
172 | | // force == true means we forcibly replace the Tablet in tablet_map |
173 | | // with the new one. But if we do so, the files in the tablet dir will be |
174 | | // dropped when the origin Tablet deconstruct. |
175 | | // So we set keep_files == true to not delete files when the |
176 | | // origin Tablet deconstruct. |
177 | | // During restore process, snapshot loader |
178 | | // replaced the old tablet's rowset with new rowsets, but the tablet path is reused, if drop files |
179 | | // here, the new rowset's file will also be dropped, so use keep files here |
180 | 2 | bool keep_files = force; |
181 | 2 | if (force || |
182 | 2 | (new_version > old_version || (new_version == old_version && new_time >= old_time))) { |
183 | | // check if new tablet's meta is in store and add new tablet's meta to meta store |
184 | 2 | res = _add_tablet_to_map_unlocked(tablet_id, tablet, update_meta, keep_files, |
185 | 2 | true /*drop_old*/, profile); |
186 | 2 | } else { |
187 | 0 | RETURN_IF_ERROR(tablet->set_tablet_state(TABLET_SHUTDOWN)); |
188 | 0 | tablet->save_meta(); |
189 | 0 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "SaveMeta", "AddTablet"), |
190 | 0 | static_cast<int64_t>(watch.reset())); |
191 | 0 | { |
192 | 0 | std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(_shutdown_tablets_lock); |
193 | 0 | _shutdown_tablets.push_back(tablet); |
194 | 0 | } |
195 | |
|
196 | 0 | res = Status::Error<ENGINE_INSERT_OLD_TABLET>( |
197 | 0 | "set tablet to shutdown state. tablet_id={}, tablet_path={}", tablet->tablet_id(), |
198 | 0 | tablet->tablet_path()); |
199 | 0 | } |
200 | 2 | LOG(WARNING) << "add duplicated tablet. force=" << force << ", res=" << res |
201 | 2 | << ", tablet_id=" << tablet_id << ", old_version=" << old_version |
202 | 2 | << ", new_version=" << new_version << ", old_time=" << old_time |
203 | 2 | << ", new_time=" << new_time |
204 | 2 | << ", old_tablet_path=" << existed_tablet->tablet_path() |
205 | 2 | << ", new_tablet_path=" << tablet->tablet_path(); |
206 | | |
207 | 2 | return res; |
208 | 2 | } |
209 | | |
210 | | Status TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, |
211 | | const TabletSharedPtr& tablet, bool update_meta, |
212 | | bool keep_files, bool drop_old, |
213 | 71 | RuntimeProfile* profile) { |
214 | | // check if new tablet's meta is in store and add new tablet's meta to meta store |
215 | 71 | Status res = Status::OK(); |
216 | 71 | MonotonicStopWatch watch; |
217 | 71 | watch.start(); |
218 | 71 | if (update_meta) { |
219 | | // call tablet save meta in order to valid the meta |
220 | 71 | tablet->save_meta(); |
221 | 71 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "SaveMeta", "AddTablet"), |
222 | 71 | static_cast<int64_t>(watch.reset())); |
223 | 71 | } |
224 | 71 | if (drop_old) { |
225 | | // If the new tablet is fresher than the existing one, then replace |
226 | | // the existing tablet with the new one. |
227 | | // Use default replica_id to ignore whether replica_id is match when drop tablet. |
228 | 2 | Status status = _drop_tablet_unlocked(tablet_id, /* replica_id */ 0, keep_files, false); |
229 | 2 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "DropOldTablet", "AddTablet"), |
230 | 2 | static_cast<int64_t>(watch.reset())); |
231 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
232 | 2 | status, strings::Substitute("failed to drop old tablet when add new tablet. " |
233 | 2 | "tablet_id=$0", |
234 | 2 | tablet_id)); |
235 | 2 | } |
236 | | // Register tablet into DataDir, so that we can manage tablet from |
237 | | // the perspective of root path. |
238 | | // Example: unregister all tables when a bad disk found. |
239 | 71 | tablet->register_tablet_into_dir(); |
240 | 71 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
241 | 71 | tablet_map[tablet_id] = tablet; |
242 | 71 | _add_tablet_to_partition(tablet); |
243 | | // TODO: remove multiply 2 of tablet meta mem size |
244 | | // Because table schema will copy in tablet, there will be double mem cost |
245 | | // so here multiply 2 |
246 | 71 | _tablet_meta_mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2); |
247 | 71 | g_tablet_meta_schema_columns_count << tablet->tablet_meta()->tablet_columns_num(); |
248 | 71 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RegisterTabletInfo", "AddTablet"), |
249 | 71 | static_cast<int64_t>(watch.reset())); |
250 | | |
251 | 71 | VLOG_NOTICE << "add tablet to map successfully." |
252 | 0 | << " tablet_id=" << tablet_id; |
253 | | |
254 | 71 | return res; |
255 | 71 | } |
256 | | |
257 | 0 | bool TabletManager::check_tablet_id_exist(TTabletId tablet_id) { |
258 | 0 | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
259 | 0 | return _check_tablet_id_exist_unlocked(tablet_id); |
260 | 0 | } |
261 | | |
262 | 0 | bool TabletManager::_check_tablet_id_exist_unlocked(TTabletId tablet_id) { |
263 | 0 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
264 | 0 | return tablet_map.find(tablet_id) != tablet_map.end(); |
265 | 0 | } |
266 | | |
267 | | Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector<DataDir*> stores, |
268 | 72 | RuntimeProfile* profile) { |
269 | 72 | DorisMetrics::instance()->create_tablet_requests_total->increment(1); |
270 | | |
271 | 72 | int64_t tablet_id = request.tablet_id; |
272 | 72 | LOG(INFO) << "begin to create tablet. tablet_id=" << tablet_id |
273 | 72 | << ", table_id=" << request.table_id << ", partition_id=" << request.partition_id |
274 | 72 | << ", replica_id=" << request.replica_id << ", stores.size=" << stores.size() |
275 | 72 | << ", first store=" << stores[0]->path(); |
276 | | |
277 | | // when we create rollup tablet A(assume on shard-1) from tablet B(assume on shard-2) |
278 | | // we need use write lock on shard-1 and then use read lock on shard-2 |
279 | | // if there have create rollup tablet C(assume on shard-2) from tablet D(assume on shard-1) at the same time, we will meet deadlock |
280 | 72 | std::unique_lock two_tablet_lock(_two_tablet_mtx, std::defer_lock); |
281 | 72 | bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode; |
282 | 72 | bool is_schema_change_or_atomic_restore = |
283 | 72 | request.__isset.base_tablet_id && request.base_tablet_id > 0; |
284 | 72 | bool need_two_lock = |
285 | 72 | is_schema_change_or_atomic_restore && |
286 | 72 | ((_tablets_shards_mask & request.base_tablet_id) != (_tablets_shards_mask & tablet_id)); |
287 | 72 | if (need_two_lock) { |
288 | 0 | SCOPED_TIMER(ADD_TIMER(profile, "GetTwoTableLock")); |
289 | 0 | two_tablet_lock.lock(); |
290 | 0 | } |
291 | | |
292 | 72 | MonotonicStopWatch shard_lock_watch; |
293 | 72 | shard_lock_watch.start(); |
294 | 72 | std::lock_guard wrlock(_get_tablets_shard_lock(tablet_id)); |
295 | 72 | shard_lock_watch.stop(); |
296 | 72 | COUNTER_UPDATE(ADD_TIMER(profile, "GetShardLock"), |
297 | 72 | static_cast<int64_t>(shard_lock_watch.elapsed_time())); |
298 | | // Make create_tablet operation to be idempotent: |
299 | | // 1. Return true if tablet with same tablet_id and schema_hash exist; |
300 | | // false if tablet with same tablet_id but different schema_hash exist. |
301 | | // 2. When this is an alter task, if the tablet(both tablet_id and schema_hash are |
302 | | // same) already exist, then just return true(an duplicate request). But if |
303 | | // tablet_id exist but with different schema_hash, return an error(report task will |
304 | | // eventually trigger its deletion). |
305 | 72 | { |
306 | 72 | SCOPED_TIMER(ADD_TIMER(profile, "GetTabletUnlocked")); |
307 | 72 | if (_get_tablet_unlocked(tablet_id) != nullptr) { |
308 | 3 | LOG(INFO) << "success to create tablet. tablet already exist. tablet_id=" << tablet_id; |
309 | 3 | return Status::OK(); |
310 | 3 | } |
311 | 72 | } |
312 | | |
313 | 69 | TabletSharedPtr base_tablet = nullptr; |
314 | | // If the CreateTabletReq has base_tablet_id then it is a alter-tablet request |
315 | 69 | if (is_schema_change_or_atomic_restore) { |
316 | | // if base_tablet_id's lock diffrent with new_tablet_id, we need lock it. |
317 | 0 | if (need_two_lock) { |
318 | 0 | SCOPED_TIMER(ADD_TIMER(profile, "GetBaseTablet")); |
319 | 0 | base_tablet = get_tablet(request.base_tablet_id); |
320 | 0 | two_tablet_lock.unlock(); |
321 | 0 | } else { |
322 | 0 | SCOPED_TIMER(ADD_TIMER(profile, "GetBaseTabletUnlocked")); |
323 | 0 | base_tablet = _get_tablet_unlocked(request.base_tablet_id); |
324 | 0 | } |
325 | 0 | if (base_tablet == nullptr) { |
326 | 0 | DorisMetrics::instance()->create_tablet_requests_failed->increment(1); |
327 | 0 | return Status::Error<TABLE_CREATE_META_ERROR>( |
328 | 0 | "fail to create tablet(change schema/atomic restore), base tablet does not " |
329 | 0 | "exist. new_tablet_id={}, base_tablet_id={}", |
330 | 0 | tablet_id, request.base_tablet_id); |
331 | 0 | } |
332 | | // If we are doing schema-change or atomic-restore, we should use the same data dir |
333 | | // TODO(lingbin): A litter trick here, the directory should be determined before |
334 | | // entering this method |
335 | | // |
336 | | // ATTN: Since all restored replicas will be saved to HDD, so no storage_medium check here. |
337 | 0 | if (in_restore_mode || |
338 | 0 | request.storage_medium == base_tablet->data_dir()->storage_medium()) { |
339 | 0 | LOG(INFO) << "create tablet use the base tablet data dir. tablet_id=" << tablet_id |
340 | 0 | << ", base tablet_id=" << request.base_tablet_id |
341 | 0 | << ", data dir=" << base_tablet->data_dir()->path(); |
342 | 0 | stores.clear(); |
343 | 0 | stores.push_back(base_tablet->data_dir()); |
344 | 0 | } |
345 | 0 | } |
346 | | |
347 | | // set alter type to schema-change. it is useless |
348 | 69 | TabletSharedPtr tablet = _internal_create_tablet_unlocked( |
349 | 69 | request, is_schema_change_or_atomic_restore, base_tablet.get(), stores, profile); |
350 | 69 | if (tablet == nullptr) { |
351 | 0 | DorisMetrics::instance()->create_tablet_requests_failed->increment(1); |
352 | 0 | return Status::Error<CE_CMD_PARAMS_ERROR>("fail to create tablet. tablet_id={}", |
353 | 0 | request.tablet_id); |
354 | 0 | } |
355 | | |
356 | 69 | LOG(INFO) << "success to create tablet. tablet_id=" << tablet_id |
357 | 69 | << ", tablet_path=" << tablet->tablet_path(); |
358 | 69 | return Status::OK(); |
359 | 69 | } |
360 | | |
361 | | TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( |
362 | | const TCreateTabletReq& request, const bool is_schema_change, const Tablet* base_tablet, |
363 | 69 | const std::vector<DataDir*>& data_dirs, RuntimeProfile* profile) { |
364 | | // If in schema-change state, base_tablet must also be provided. |
365 | | // i.e., is_schema_change and base_tablet are either assigned or not assigned |
366 | 69 | DCHECK((is_schema_change && base_tablet) || (!is_schema_change && !base_tablet)); |
367 | | |
368 | | // NOTE: The existence of tablet_id and schema_hash has already been checked, |
369 | | // no need check again here. |
370 | | |
371 | 69 | const std::string parent_timer_name = "InternalCreateTablet"; |
372 | 69 | SCOPED_TIMER(ADD_TIMER(profile, parent_timer_name)); |
373 | | |
374 | 69 | MonotonicStopWatch watch; |
375 | 69 | watch.start(); |
376 | 69 | auto create_meta_timer = ADD_CHILD_TIMER(profile, "CreateMeta", parent_timer_name); |
377 | 69 | auto tablet = _create_tablet_meta_and_dir_unlocked(request, is_schema_change, base_tablet, |
378 | 69 | data_dirs, profile); |
379 | 69 | COUNTER_UPDATE(create_meta_timer, static_cast<int64_t>(watch.reset())); |
380 | 69 | if (tablet == nullptr) { |
381 | 0 | return nullptr; |
382 | 0 | } |
383 | | |
384 | 69 | int64_t new_tablet_id = request.tablet_id; |
385 | 69 | int32_t new_schema_hash = request.tablet_schema.schema_hash; |
386 | | |
387 | | // should remove the tablet's pending_id no matter create-tablet success or not |
388 | 69 | DataDir* data_dir = tablet->data_dir(); |
389 | | |
390 | | // TODO(yiguolei) |
391 | | // the following code is very difficult to understand because it mixed alter tablet v2 |
392 | | // and alter tablet v1 should remove alter tablet v1 code after v0.12 |
393 | 69 | Status res = Status::OK(); |
394 | 69 | bool is_tablet_added = false; |
395 | 69 | do { |
396 | 69 | res = tablet->init(); |
397 | 69 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "TabletInit", parent_timer_name), |
398 | 69 | static_cast<int64_t>(watch.reset())); |
399 | 69 | if (!res.ok()) { |
400 | 0 | LOG(WARNING) << "tablet init failed. tablet:" << tablet->tablet_id(); |
401 | 0 | break; |
402 | 0 | } |
403 | | |
404 | | // Create init version if this is not a restore mode replica and request.version is set |
405 | | // bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode; |
406 | | // if (!in_restore_mode && request.__isset.version) { |
407 | | // create initial rowset before add it to storage engine could omit many locks |
408 | 69 | res = tablet->create_initial_rowset(request.version); |
409 | 69 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "InitRowset", parent_timer_name), |
410 | 69 | static_cast<int64_t>(watch.reset())); |
411 | 69 | if (!res.ok()) { |
412 | 0 | LOG(WARNING) << "fail to create initial version for tablet. res=" << res; |
413 | 0 | break; |
414 | 0 | } |
415 | | |
416 | 69 | if (is_schema_change) { |
417 | | // if this is a new alter tablet, has to set its state to not ready |
418 | | // because schema change handler depends on it to check whether history data |
419 | | // convert finished |
420 | 0 | static_cast<void>(tablet->set_tablet_state(TabletState::TABLET_NOTREADY)); |
421 | 0 | } |
422 | | // Add tablet to StorageEngine will make it visible to user |
423 | | // Will persist tablet meta |
424 | 69 | auto add_tablet_timer = ADD_CHILD_TIMER(profile, "AddTablet", parent_timer_name); |
425 | 69 | res = _add_tablet_unlocked(new_tablet_id, tablet, /*update_meta*/ true, false, profile); |
426 | 69 | COUNTER_UPDATE(add_tablet_timer, static_cast<int64_t>(watch.reset())); |
427 | 69 | if (!res.ok()) { |
428 | 0 | LOG(WARNING) << "fail to add tablet to StorageEngine. res=" << res; |
429 | 0 | break; |
430 | 0 | } |
431 | 69 | is_tablet_added = true; |
432 | | |
433 | | // TODO(lingbin): The following logic seems useless, can be removed? |
434 | | // Because if _add_tablet_unlocked() return OK, we must can get it from map. |
435 | 69 | TabletSharedPtr tablet_ptr = _get_tablet_unlocked(new_tablet_id); |
436 | 69 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "GetTablet", parent_timer_name), |
437 | 69 | static_cast<int64_t>(watch.reset())); |
438 | 69 | if (tablet_ptr == nullptr) { |
439 | 0 | res = Status::Error<TABLE_NOT_FOUND>("fail to get tablet. res={}", res); |
440 | 0 | break; |
441 | 0 | } |
442 | 69 | } while (false); |
443 | | |
444 | 69 | if (res.ok()) { |
445 | 69 | return tablet; |
446 | 69 | } |
447 | | // something is wrong, we need clear environment |
448 | 0 | if (is_tablet_added) { |
449 | 0 | Status status = _drop_tablet_unlocked(new_tablet_id, request.replica_id, false, false); |
450 | 0 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "DropTablet", parent_timer_name), |
451 | 0 | static_cast<int64_t>(watch.reset())); |
452 | 0 | if (!status.ok()) { |
453 | 0 | LOG(WARNING) << "fail to drop tablet when create tablet failed. res=" << res; |
454 | 0 | } |
455 | 0 | } else { |
456 | 0 | tablet->delete_all_files(); |
457 | 0 | static_cast<void>(TabletMetaManager::remove(data_dir, new_tablet_id, new_schema_hash)); |
458 | 0 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RemoveTabletFiles", parent_timer_name), |
459 | 0 | static_cast<int64_t>(watch.reset())); |
460 | 0 | } |
461 | 0 | return nullptr; |
462 | 69 | } |
463 | | |
464 | 69 | static string _gen_tablet_dir(const string& dir, int16_t shard_id, int64_t tablet_id) { |
465 | 69 | string path = dir; |
466 | 69 | path = path_util::join_path_segments(path, DATA_PREFIX); |
467 | 69 | path = path_util::join_path_segments(path, std::to_string(shard_id)); |
468 | 69 | path = path_util::join_path_segments(path, std::to_string(tablet_id)); |
469 | 69 | return path; |
470 | 69 | } |
471 | | |
472 | | TabletSharedPtr TabletManager::_create_tablet_meta_and_dir_unlocked( |
473 | | const TCreateTabletReq& request, const bool is_schema_change, const Tablet* base_tablet, |
474 | 69 | const std::vector<DataDir*>& data_dirs, RuntimeProfile* profile) { |
475 | 69 | string pending_id = StrCat(TABLET_ID_PREFIX, request.tablet_id); |
476 | | // Many attempts are made here in the hope that even if a disk fails, it can still continue. |
477 | 69 | std::string parent_timer_name = "CreateMeta"; |
478 | 69 | MonotonicStopWatch watch; |
479 | 69 | watch.start(); |
480 | 69 | for (auto& data_dir : data_dirs) { |
481 | 69 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RemovePendingIds", parent_timer_name), |
482 | 69 | static_cast<int64_t>(watch.reset())); |
483 | | |
484 | 69 | TabletMetaSharedPtr tablet_meta; |
485 | | // if create meta failed, do not need to clean dir, because it is only in memory |
486 | 69 | Status res = _create_tablet_meta_unlocked(request, data_dir, is_schema_change, base_tablet, |
487 | 69 | &tablet_meta); |
488 | 69 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "CreateMetaUnlock", parent_timer_name), |
489 | 69 | static_cast<int64_t>(watch.reset())); |
490 | 69 | if (!res.ok()) { |
491 | 0 | LOG(WARNING) << "fail to create tablet meta. res=" << res |
492 | 0 | << ", root=" << data_dir->path(); |
493 | 0 | continue; |
494 | 0 | } |
495 | | |
496 | 69 | string tablet_dir = |
497 | 69 | _gen_tablet_dir(data_dir->path(), tablet_meta->shard_id(), request.tablet_id); |
498 | 69 | string schema_hash_dir = path_util::join_path_segments( |
499 | 69 | tablet_dir, std::to_string(request.tablet_schema.schema_hash)); |
500 | | |
501 | | // Because the tablet is removed asynchronously, so that the dir may still exist when BE |
502 | | // receive create-tablet request again, For example retried schema-change request |
503 | 69 | bool exists = true; |
504 | 69 | res = io::global_local_filesystem()->exists(schema_hash_dir, &exists); |
505 | 69 | if (!res.ok()) { |
506 | 0 | continue; |
507 | 0 | } |
508 | 69 | if (exists) { |
509 | 0 | LOG(WARNING) << "skip this dir because tablet path exist, path=" << schema_hash_dir; |
510 | 0 | continue; |
511 | 69 | } else { |
512 | 69 | Status st = io::global_local_filesystem()->create_directory(schema_hash_dir); |
513 | 69 | if (!st.ok()) { |
514 | 0 | continue; |
515 | 0 | } |
516 | 69 | } |
517 | | |
518 | 69 | if (tablet_meta->partition_id() <= 0) { |
519 | 5 | LOG(WARNING) << "invalid partition id " << tablet_meta->partition_id() << ", tablet " |
520 | 5 | << tablet_meta->tablet_id(); |
521 | 5 | } |
522 | 69 | TabletSharedPtr new_tablet = |
523 | 69 | std::make_shared<Tablet>(_engine, std::move(tablet_meta), data_dir); |
524 | 69 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "CreateTabletFromMeta", parent_timer_name), |
525 | 69 | static_cast<int64_t>(watch.reset())); |
526 | 69 | return new_tablet; |
527 | 69 | } |
528 | 0 | return nullptr; |
529 | 69 | } |
530 | | |
531 | | Status TabletManager::drop_tablet(TTabletId tablet_id, TReplicaId replica_id, |
532 | 24 | bool is_drop_table_or_partition) { |
533 | 24 | auto& shard = _get_tablets_shard(tablet_id); |
534 | 24 | std::lock_guard wrlock(shard.lock); |
535 | 24 | return _drop_tablet_unlocked(tablet_id, replica_id, false, is_drop_table_or_partition); |
536 | 24 | } |
537 | | |
538 | | // Drop specified tablet. |
539 | | Status TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, |
540 | 26 | bool keep_files, bool is_drop_table_or_partition) { |
541 | 26 | LOG(INFO) << "begin drop tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id |
542 | 26 | << ", is_drop_table_or_partition=" << is_drop_table_or_partition; |
543 | 26 | DorisMetrics::instance()->drop_tablet_requests_total->increment(1); |
544 | | |
545 | 26 | RETURN_IF_ERROR(register_transition_tablet(tablet_id, "drop tablet")); |
546 | 26 | Defer defer {[&]() { unregister_transition_tablet(tablet_id, "drop tablet"); }}; |
547 | | |
548 | | // Fetch tablet which need to be dropped |
549 | 26 | TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id); |
550 | 26 | if (to_drop_tablet == nullptr) { |
551 | 1 | LOG(WARNING) << "fail to drop tablet because it does not exist. " |
552 | 1 | << "tablet_id=" << tablet_id; |
553 | 1 | return Status::OK(); |
554 | 1 | } |
555 | | |
556 | | // We should compare replica id to avoid dropping new cloned tablet. |
557 | | // Iff request replica id is 0, FE may be an older release, then we drop this tablet as before. |
558 | 25 | if (to_drop_tablet->replica_id() != replica_id && replica_id != 0) { |
559 | 0 | return Status::Aborted("replica_id not match({} vs {})", to_drop_tablet->replica_id(), |
560 | 0 | replica_id); |
561 | 0 | } |
562 | | |
563 | 25 | _remove_tablet_from_partition(to_drop_tablet); |
564 | 25 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
565 | 25 | tablet_map.erase(tablet_id); |
566 | | |
567 | 25 | to_drop_tablet->clear_cache(); |
568 | | |
569 | 25 | if (!keep_files) { |
570 | | // drop tablet will update tablet meta, should lock |
571 | 25 | std::lock_guard<std::shared_mutex> wrlock(to_drop_tablet->get_header_lock()); |
572 | 25 | SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); |
573 | 25 | LOG(INFO) << "set tablet to shutdown state and remove it from memory. " |
574 | 25 | << "tablet_id=" << tablet_id << ", tablet_path=" << to_drop_tablet->tablet_path(); |
575 | | // NOTE: has to update tablet here, but must not update tablet meta directly. |
576 | | // because other thread may hold the tablet object, they may save meta too. |
577 | | // If update meta directly here, other thread may override the meta |
578 | | // and the tablet will be loaded at restart time. |
579 | | // To avoid this exception, we first set the state of the tablet to `SHUTDOWN`. |
580 | 25 | RETURN_IF_ERROR(to_drop_tablet->set_tablet_state(TABLET_SHUTDOWN)); |
581 | | // We must record unused remote rowsets path info to OlapMeta before tablet state is marked as TABLET_SHUTDOWN in OlapMeta, |
582 | | // otherwise if BE shutdown after saving tablet state, these remote rowsets path info will lost. |
583 | 25 | if (is_drop_table_or_partition) { |
584 | 0 | RETURN_IF_ERROR(to_drop_tablet->remove_all_remote_rowsets()); |
585 | 0 | } |
586 | 25 | to_drop_tablet->save_meta(); |
587 | 25 | { |
588 | 25 | std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock); |
589 | 25 | _shutdown_tablets.push_back(to_drop_tablet); |
590 | 25 | } |
591 | 25 | } |
592 | | |
593 | 25 | to_drop_tablet->deregister_tablet_from_dir(); |
594 | 25 | _tablet_meta_mem_tracker->release(to_drop_tablet->tablet_meta()->mem_size() * 2); |
595 | 25 | g_tablet_meta_schema_columns_count << -to_drop_tablet->tablet_meta()->tablet_columns_num(); |
596 | 25 | return Status::OK(); |
597 | 25 | } |
598 | | |
599 | 2.27k | TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, bool include_deleted, string* err) { |
600 | 2.27k | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
601 | 2.27k | return _get_tablet_unlocked(tablet_id, include_deleted, err); |
602 | 2.27k | } |
603 | | |
604 | 35 | std::vector<TabletSharedPtr> TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) { |
605 | 35 | std::vector<TabletSharedPtr> res; |
606 | 35 | for_each_tablet([&](const TabletSharedPtr& tablet) { res.emplace_back(tablet); }, |
607 | 35 | std::move(filter)); |
608 | 35 | return res; |
609 | 35 | } |
610 | | |
611 | | void TabletManager::for_each_tablet(std::function<void(const TabletSharedPtr&)>&& handler, |
612 | 133 | std::function<bool(Tablet*)>&& filter) { |
613 | 133 | std::vector<TabletSharedPtr> tablets; |
614 | 133 | for (const auto& tablets_shard : _tablets_shards) { |
615 | 133 | tablets.clear(); |
616 | 133 | { |
617 | 133 | std::shared_lock rdlock(tablets_shard.lock); |
618 | 133 | for (const auto& [id, tablet] : tablets_shard.tablet_map) { |
619 | 4 | if (filter(tablet.get())) { |
620 | 4 | tablets.emplace_back(tablet); |
621 | 4 | } |
622 | 4 | } |
623 | 133 | } |
624 | 133 | for (const auto& tablet : tablets) { |
625 | 4 | handler(tablet); |
626 | 4 | } |
627 | 133 | } |
628 | 133 | } |
629 | | |
630 | | TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, bool include_deleted, |
631 | 2.27k | string* err) { |
632 | 2.27k | TabletSharedPtr tablet; |
633 | 2.27k | tablet = _get_tablet_unlocked(tablet_id); |
634 | 2.27k | if (tablet == nullptr && include_deleted) { |
635 | 4 | std::shared_lock rdlock(_shutdown_tablets_lock); |
636 | 4 | for (auto& deleted_tablet : _shutdown_tablets) { |
637 | 2 | CHECK(deleted_tablet != nullptr) << "deleted tablet is nullptr"; |
638 | 2 | if (deleted_tablet->tablet_id() == tablet_id) { |
639 | 2 | tablet = deleted_tablet; |
640 | 2 | break; |
641 | 2 | } |
642 | 2 | } |
643 | 4 | } |
644 | | |
645 | 2.27k | if (tablet == nullptr) { |
646 | 17 | if (err != nullptr) { |
647 | 1 | *err = "tablet does not exist. " + BackendOptions::get_localhost(); |
648 | 1 | } |
649 | 17 | return nullptr; |
650 | 17 | } |
651 | | #ifndef BE_TEST |
652 | | if (!tablet->is_used()) { |
653 | | LOG(WARNING) << "tablet cannot be used. tablet=" << tablet_id; |
654 | | if (err != nullptr) { |
655 | | *err = "tablet cannot be used. " + BackendOptions::get_localhost(); |
656 | | } |
657 | | return nullptr; |
658 | | } |
659 | | #endif |
660 | | |
661 | 2.25k | return tablet; |
662 | 2.27k | } |
663 | | |
664 | | TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TabletUid tablet_uid, |
665 | 0 | bool include_deleted, string* err) { |
666 | 0 | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
667 | 0 | TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, include_deleted, err); |
668 | 0 | if (tablet != nullptr && tablet->tablet_uid() == tablet_uid) { |
669 | 0 | return tablet; |
670 | 0 | } |
671 | 0 | return nullptr; |
672 | 0 | } |
673 | | |
674 | 0 | uint64_t TabletManager::get_rowset_nums() { |
675 | 0 | uint64_t rowset_nums = 0; |
676 | 0 | for_each_tablet([&](const TabletSharedPtr& tablet) { rowset_nums += tablet->version_count(); }, |
677 | 0 | filter_all_tablets); |
678 | 0 | return rowset_nums; |
679 | 0 | } |
680 | | |
681 | 0 | uint64_t TabletManager::get_segment_nums() { |
682 | 0 | uint64_t segment_nums = 0; |
683 | 0 | for_each_tablet([&](const TabletSharedPtr& tablet) { segment_nums += tablet->segment_count(); }, |
684 | 0 | filter_all_tablets); |
685 | 0 | return segment_nums; |
686 | 0 | } |
687 | | |
688 | | bool TabletManager::get_tablet_id_and_schema_hash_from_path(const string& path, |
689 | | TTabletId* tablet_id, |
690 | 83 | TSchemaHash* schema_hash) { |
691 | | // the path like: /data/14/10080/964828783/ |
692 | 83 | static re2::RE2 normal_re("/data/\\d+/(\\d+)/(\\d+)($|/)"); |
693 | | // match tablet schema hash data path, for example, the path is /data/1/16791/29998 |
694 | | // 1 is shard id , 16791 is tablet id, 29998 is schema hash |
695 | 83 | if (RE2::PartialMatch(path, normal_re, tablet_id, schema_hash)) { |
696 | 79 | return true; |
697 | 79 | } |
698 | | |
699 | | // If we can't match normal path pattern, this may be a path which is a empty tablet |
700 | | // directory. Use this pattern to match empty tablet directory. In this case schema_hash |
701 | | // will be set to zero. |
702 | 4 | static re2::RE2 empty_tablet_re("/data/\\d+/(\\d+)($|/$)"); |
703 | 4 | if (!RE2::PartialMatch(path, empty_tablet_re, tablet_id)) { |
704 | 2 | return false; |
705 | 2 | } |
706 | 2 | *schema_hash = 0; |
707 | 2 | return true; |
708 | 4 | } |
709 | | |
710 | 3 | bool TabletManager::get_rowset_id_from_path(const string& path, RowsetId* rowset_id) { |
711 | | // the path like: /data/14/10080/964828783/02000000000000969144d8725cb62765f9af6cd3125d5a91_0.dat |
712 | 3 | static re2::RE2 re("/data/\\d+/\\d+/\\d+/([A-Fa-f0-9]+)_.*"); |
713 | 3 | string id_str; |
714 | 3 | bool ret = RE2::PartialMatch(path, re, &id_str); |
715 | 3 | if (ret) { |
716 | 1 | rowset_id->init(id_str); |
717 | 1 | return true; |
718 | 1 | } |
719 | 2 | return false; |
720 | 3 | } |
721 | | |
722 | 0 | void TabletManager::get_tablet_stat(TTabletStatResult* result) { |
723 | 0 | std::shared_ptr<std::vector<TTabletStat>> local_cache; |
724 | 0 | { |
725 | 0 | std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex); |
726 | 0 | local_cache = _tablet_stat_list_cache; |
727 | 0 | } |
728 | 0 | result->__set_tablet_stat_list(*local_cache); |
729 | 0 | } |
730 | | |
731 | | TabletSharedPtr TabletManager::find_best_tablet_to_compaction( |
732 | | CompactionType compaction_type, DataDir* data_dir, |
733 | | const std::unordered_set<TabletSharedPtr>& tablet_submitted_compaction, uint32_t* score, |
734 | | const std::unordered_map<std::string_view, std::shared_ptr<CumulativeCompactionPolicy>>& |
735 | 41 | all_cumulative_compaction_policies) { |
736 | 41 | int64_t now_ms = UnixMillis(); |
737 | 41 | const string& compaction_type_str = |
738 | 41 | compaction_type == CompactionType::BASE_COMPACTION ? "base" : "cumulative"; |
739 | 41 | uint32_t highest_score = 0; |
740 | 41 | uint32_t compaction_score = 0; |
741 | 41 | TabletSharedPtr best_tablet; |
742 | 41 | auto handler = [&](const TabletSharedPtr& tablet_ptr) { |
743 | 4 | if (tablet_ptr->tablet_meta()->tablet_schema()->disable_auto_compaction()) { |
744 | 0 | LOG_EVERY_N(INFO, 500) << "Tablet " << tablet_ptr->tablet_id() |
745 | 0 | << " will be ignored by automatic compaction tasks since it's " |
746 | 0 | << "set to disabled automatic compaction."; |
747 | 0 | return; |
748 | 0 | } |
749 | | |
750 | 4 | if (config::enable_skip_tablet_compaction && |
751 | 4 | tablet_ptr->should_skip_compaction(compaction_type, UnixSeconds())) { |
752 | 2 | return; |
753 | 2 | } |
754 | 2 | if (!tablet_ptr->can_do_compaction(data_dir->path_hash(), compaction_type)) { |
755 | 1 | return; |
756 | 1 | } |
757 | | |
758 | 1 | auto search = tablet_submitted_compaction.find(tablet_ptr); |
759 | 1 | if (search != tablet_submitted_compaction.end()) { |
760 | 0 | return; |
761 | 0 | } |
762 | | |
763 | 1 | int64_t last_failure_ms = tablet_ptr->last_cumu_compaction_failure_time(); |
764 | 1 | if (compaction_type == CompactionType::BASE_COMPACTION) { |
765 | 0 | last_failure_ms = tablet_ptr->last_base_compaction_failure_time(); |
766 | 0 | } |
767 | 1 | if (now_ms - last_failure_ms <= 5000) { |
768 | 0 | VLOG_DEBUG << "Too often to check compaction, skip it. " |
769 | 0 | << "compaction_type=" << compaction_type_str |
770 | 0 | << ", last_failure_time_ms=" << last_failure_ms |
771 | 0 | << ", tablet_id=" << tablet_ptr->tablet_id(); |
772 | 0 | return; |
773 | 0 | } |
774 | | |
775 | 1 | if (compaction_type == CompactionType::BASE_COMPACTION) { |
776 | 0 | std::unique_lock<std::mutex> lock(tablet_ptr->get_base_compaction_lock(), |
777 | 0 | std::try_to_lock); |
778 | 0 | if (!lock.owns_lock()) { |
779 | 0 | LOG(INFO) << "can not get base lock: " << tablet_ptr->tablet_id(); |
780 | 0 | return; |
781 | 0 | } |
782 | 1 | } else { |
783 | 1 | std::unique_lock<std::mutex> lock(tablet_ptr->get_cumulative_compaction_lock(), |
784 | 1 | std::try_to_lock); |
785 | 1 | if (!lock.owns_lock()) { |
786 | 0 | LOG(INFO) << "can not get cumu lock: " << tablet_ptr->tablet_id(); |
787 | 0 | return; |
788 | 0 | } |
789 | 1 | } |
790 | 1 | auto cumulative_compaction_policy = all_cumulative_compaction_policies.at( |
791 | 1 | tablet_ptr->tablet_meta()->compaction_policy()); |
792 | 1 | uint32_t current_compaction_score = |
793 | 1 | tablet_ptr->calc_compaction_score(compaction_type, cumulative_compaction_policy); |
794 | 1 | if (current_compaction_score < 5) { |
795 | 1 | tablet_ptr->set_skip_compaction(true, compaction_type, UnixSeconds()); |
796 | 1 | } |
797 | 1 | if (current_compaction_score > highest_score) { |
798 | 1 | highest_score = current_compaction_score; |
799 | 1 | compaction_score = current_compaction_score; |
800 | 1 | best_tablet = tablet_ptr; |
801 | 1 | } |
802 | 1 | }; |
803 | | |
804 | 41 | for_each_tablet(handler, filter_all_tablets); |
805 | 41 | if (best_tablet != nullptr) { |
806 | 1 | VLOG_CRITICAL << "Found the best tablet for compaction. " |
807 | 0 | << "compaction_type=" << compaction_type_str |
808 | 0 | << ", tablet_id=" << best_tablet->tablet_id() << ", path=" << data_dir->path() |
809 | 0 | << ", compaction_score=" << compaction_score |
810 | 0 | << ", highest_score=" << highest_score; |
811 | 1 | *score = compaction_score; |
812 | 1 | } |
813 | 41 | return best_tablet; |
814 | 41 | } |
815 | | |
816 | | Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_id, |
817 | | TSchemaHash schema_hash, const string& meta_binary, |
818 | | bool update_meta, bool force, bool restore, |
819 | 2 | bool check_path) { |
820 | 2 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
821 | 2 | Status status = tablet_meta->deserialize(meta_binary); |
822 | 2 | if (!status.ok()) { |
823 | 0 | return Status::Error<HEADER_PB_PARSE_FAILED>( |
824 | 0 | "fail to load tablet because can not parse meta_binary string. tablet_id={}, " |
825 | 0 | "schema_hash={}, path={}, status={}", |
826 | 0 | tablet_id, schema_hash, data_dir->path(), status); |
827 | 0 | } |
828 | 2 | tablet_meta->init_rs_metas_fs(data_dir->fs()); |
829 | | |
830 | | // check if tablet meta is valid |
831 | 2 | if (tablet_meta->tablet_id() != tablet_id || tablet_meta->schema_hash() != schema_hash) { |
832 | 0 | return Status::Error<HEADER_PB_PARSE_FAILED>( |
833 | 0 | "fail to load tablet because meet invalid tablet meta. trying to load " |
834 | 0 | "tablet(tablet_id={}, schema_hash={}), but meet tablet={}, path={}", |
835 | 0 | tablet_id, schema_hash, tablet_meta->tablet_id(), data_dir->path()); |
836 | 0 | } |
837 | 2 | if (tablet_meta->tablet_uid().hi == 0 && tablet_meta->tablet_uid().lo == 0) { |
838 | 0 | return Status::Error<HEADER_PB_PARSE_FAILED>( |
839 | 0 | "fail to load tablet because its uid == 0. tablet={}, path={}", |
840 | 0 | tablet_meta->tablet_id(), data_dir->path()); |
841 | 0 | } |
842 | | |
843 | 2 | if (restore) { |
844 | | // we're restoring tablet from trash, tablet state should be changed from shutdown back to running |
845 | 0 | tablet_meta->set_tablet_state(TABLET_RUNNING); |
846 | 0 | } |
847 | | |
848 | 2 | if (tablet_meta->partition_id() == 0) { |
849 | 0 | LOG(WARNING) << "tablet=" << tablet_id << " load from meta but partition id eq 0"; |
850 | 0 | } |
851 | | |
852 | 2 | TabletSharedPtr tablet = std::make_shared<Tablet>(_engine, std::move(tablet_meta), data_dir); |
853 | | |
854 | | // NOTE: method load_tablet_from_meta could be called by two cases as below |
855 | | // case 1: BE start; |
856 | | // case 2: Clone Task/Restore |
857 | | // For case 1 doesn't need path check because BE is just starting and not ready, |
858 | | // just check tablet meta status to judge whether tablet is delete is enough. |
859 | | // For case 2, If a tablet has just been copied to local BE, |
860 | | // it may be cleared by gc-thread(see perform_tablet_gc) because the tablet meta may not be loaded to memory. |
861 | | // So clone task should check path and then failed and retry in this case. |
862 | 2 | if (check_path) { |
863 | 2 | bool exists = true; |
864 | 2 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(tablet->tablet_path(), &exists)); |
865 | 2 | if (!exists) { |
866 | 0 | return Status::Error<TABLE_ALREADY_DELETED_ERROR>( |
867 | 0 | "tablet path not exists, create tablet failed, path={}", tablet->tablet_path()); |
868 | 0 | } |
869 | 2 | } |
870 | | |
871 | 2 | if (tablet->tablet_meta()->tablet_state() == TABLET_SHUTDOWN) { |
872 | 0 | { |
873 | 0 | std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(_shutdown_tablets_lock); |
874 | 0 | _shutdown_tablets.push_back(tablet); |
875 | 0 | } |
876 | 0 | return Status::Error<TABLE_ALREADY_DELETED_ERROR>( |
877 | 0 | "fail to load tablet because it is to be deleted. tablet_id={}, schema_hash={}, " |
878 | 0 | "path={}", |
879 | 0 | tablet_id, schema_hash, data_dir->path()); |
880 | 0 | } |
881 | | // NOTE: We do not check tablet's initial version here, because if BE restarts when |
882 | | // one tablet is doing schema-change, we may meet empty tablet. |
883 | 2 | if (tablet->max_version().first == -1 && tablet->tablet_state() == TABLET_RUNNING) { |
884 | | // tablet state is invalid, drop tablet |
885 | 0 | return Status::Error<TABLE_INDEX_VALIDATE_ERROR>( |
886 | 0 | "fail to load tablet. it is in running state but without delta. tablet={}, path={}", |
887 | 0 | tablet->tablet_id(), data_dir->path()); |
888 | 0 | } |
889 | | |
890 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
891 | 2 | tablet->init(), |
892 | 2 | strings::Substitute("tablet init failed. tablet=$0", tablet->tablet_id())); |
893 | | |
894 | 2 | RuntimeProfile profile("CreateTablet"); |
895 | 2 | std::lock_guard<std::shared_mutex> wrlock(_get_tablets_shard_lock(tablet_id)); |
896 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
897 | 2 | _add_tablet_unlocked(tablet_id, tablet, update_meta, force, &profile), |
898 | 2 | strings::Substitute("fail to add tablet. tablet=$0", tablet->tablet_id())); |
899 | | |
900 | 2 | return Status::OK(); |
901 | 2 | } |
902 | | |
903 | | Status TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_id, |
904 | | SchemaHash schema_hash, const string& schema_hash_path, |
905 | 2 | bool force, bool restore) { |
906 | 2 | LOG(INFO) << "begin to load tablet from dir. " |
907 | 2 | << " tablet_id=" << tablet_id << " schema_hash=" << schema_hash |
908 | 2 | << " path = " << schema_hash_path << " force = " << force << " restore = " << restore; |
909 | | // not add lock here, because load_tablet_from_meta already add lock |
910 | 2 | std::string header_path = TabletMeta::construct_header_file_path(schema_hash_path, tablet_id); |
911 | | // should change shard id before load tablet |
912 | 2 | std::string shard_path = |
913 | 2 | path_util::dir_name(path_util::dir_name(path_util::dir_name(header_path))); |
914 | 2 | std::string shard_str = shard_path.substr(shard_path.find_last_of('/') + 1); |
915 | 2 | int32_t shard = stol(shard_str); |
916 | | |
917 | 2 | bool exists = false; |
918 | 2 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(header_path, &exists)); |
919 | 2 | if (!exists) { |
920 | 0 | return Status::Error<FILE_NOT_EXIST>("fail to find header file. [header_path={}]", |
921 | 0 | header_path); |
922 | 0 | } |
923 | | |
924 | 2 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
925 | 2 | if (!tablet_meta->create_from_file(header_path).ok()) { |
926 | 0 | return Status::Error<ENGINE_LOAD_INDEX_TABLE_ERROR>( |
927 | 0 | "fail to load tablet_meta. file_path={}", header_path); |
928 | 0 | } |
929 | 2 | TabletUid tablet_uid = TabletUid::gen_uid(); |
930 | | |
931 | | // remove rowset binlog metas |
932 | 2 | auto binlog_metas_file = fmt::format("{}/rowset_binlog_metas.pb", schema_hash_path); |
933 | 2 | bool binlog_metas_file_exists = false; |
934 | 2 | auto file_exists_status = |
935 | 2 | io::global_local_filesystem()->exists(binlog_metas_file, &binlog_metas_file_exists); |
936 | 2 | if (!file_exists_status.ok()) { |
937 | 0 | return file_exists_status; |
938 | 0 | } |
939 | 2 | bool contain_binlog = false; |
940 | 2 | RowsetBinlogMetasPB rowset_binlog_metas_pb; |
941 | 2 | if (binlog_metas_file_exists) { |
942 | 0 | auto binlog_meta_filesize = std::filesystem::file_size(binlog_metas_file); |
943 | 0 | if (binlog_meta_filesize > 0) { |
944 | 0 | contain_binlog = true; |
945 | 0 | RETURN_IF_ERROR(read_pb(binlog_metas_file, &rowset_binlog_metas_pb)); |
946 | 0 | VLOG_DEBUG << "load rowset binlog metas from file. file_path=" << binlog_metas_file; |
947 | 0 | } |
948 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(binlog_metas_file)); |
949 | 0 | } |
950 | 2 | if (contain_binlog) { |
951 | 0 | auto binlog_dir = fmt::format("{}/_binlog", schema_hash_path); |
952 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(binlog_dir)); |
953 | | |
954 | 0 | std::vector<io::FileInfo> files; |
955 | 0 | RETURN_IF_ERROR( |
956 | 0 | io::global_local_filesystem()->list(schema_hash_path, true, &files, &exists)); |
957 | 0 | for (auto& file : files) { |
958 | 0 | auto& filename = file.file_name; |
959 | 0 | std::string new_suffix; |
960 | 0 | std::string old_suffix; |
961 | |
|
962 | 0 | if (filename.ends_with(".binlog")) { |
963 | 0 | old_suffix = ".binlog"; |
964 | 0 | new_suffix = ".dat"; |
965 | 0 | } else if (filename.ends_with(".binlog-index")) { |
966 | 0 | old_suffix = ".binlog-index"; |
967 | 0 | new_suffix = ".idx"; |
968 | 0 | } else { |
969 | 0 | continue; |
970 | 0 | } |
971 | | |
972 | 0 | std::string new_filename = filename; |
973 | 0 | new_filename.replace(filename.size() - old_suffix.size(), old_suffix.size(), |
974 | 0 | new_suffix); |
975 | 0 | auto from = fmt::format("{}/{}", schema_hash_path, filename); |
976 | 0 | auto to = fmt::format("{}/_binlog/{}", schema_hash_path, new_filename); |
977 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->rename(from, to)); |
978 | 0 | } |
979 | | |
980 | 0 | auto* meta = store->get_meta(); |
981 | | // if ingest binlog metas error, it will be gc in gc_unused_binlog_metas |
982 | 0 | RETURN_IF_ERROR( |
983 | 0 | RowsetMetaManager::ingest_binlog_metas(meta, tablet_uid, &rowset_binlog_metas_pb)); |
984 | 0 | } |
985 | | |
986 | | // has to change shard id here, because meta file maybe copied from other source |
987 | | // its shard is different from local shard |
988 | 2 | tablet_meta->set_shard_id(shard); |
989 | | // load dir is called by clone, restore, storage migration |
990 | | // should change tablet uid when tablet object changed |
991 | 2 | tablet_meta->set_tablet_uid(std::move(tablet_uid)); |
992 | 2 | std::string meta_binary; |
993 | 2 | tablet_meta->serialize(&meta_binary); |
994 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
995 | 2 | load_tablet_from_meta(store, tablet_id, schema_hash, meta_binary, true, force, restore, |
996 | 2 | true), |
997 | 2 | strings::Substitute("fail to load tablet. header_path=$0", header_path)); |
998 | | |
999 | 2 | return Status::OK(); |
1000 | 2 | } |
1001 | | |
1002 | 0 | Status TabletManager::report_tablet_info(TTabletInfo* tablet_info) { |
1003 | 0 | LOG(INFO) << "begin to process report tablet info." |
1004 | 0 | << "tablet_id=" << tablet_info->tablet_id; |
1005 | |
|
1006 | 0 | Status res = Status::OK(); |
1007 | |
|
1008 | 0 | TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id); |
1009 | 0 | if (tablet == nullptr) { |
1010 | 0 | return Status::Error<TABLE_NOT_FOUND>("can't find tablet={}", tablet_info->tablet_id); |
1011 | 0 | } |
1012 | | |
1013 | 0 | tablet->build_tablet_report_info(tablet_info); |
1014 | 0 | VLOG_TRACE << "success to process report tablet info."; |
1015 | 0 | return res; |
1016 | 0 | } |
1017 | | |
1018 | 0 | void TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>* tablets_info) { |
1019 | 0 | DCHECK(tablets_info != nullptr); |
1020 | 0 | VLOG_NOTICE << "begin to build all report tablets info"; |
1021 | | |
1022 | | // build the expired txn map first, outside the tablet map lock |
1023 | 0 | std::map<TabletInfo, std::vector<int64_t>> expire_txn_map; |
1024 | 0 | _engine.txn_manager()->build_expire_txn_map(&expire_txn_map); |
1025 | 0 | LOG(INFO) << "find expired transactions for " << expire_txn_map.size() << " tablets"; |
1026 | |
|
1027 | 0 | HistogramStat tablet_version_num_hist; |
1028 | 0 | auto local_cache = std::make_shared<std::vector<TTabletStat>>(); |
1029 | 0 | auto handler = [&](const TabletSharedPtr& tablet) { |
1030 | 0 | auto& t_tablet = (*tablets_info)[tablet->tablet_id()]; |
1031 | 0 | TTabletInfo& tablet_info = t_tablet.tablet_infos.emplace_back(); |
1032 | 0 | tablet->build_tablet_report_info(&tablet_info, true, true); |
1033 | | // find expired transaction corresponding to this tablet |
1034 | 0 | TabletInfo tinfo(tablet->tablet_id(), tablet->tablet_uid()); |
1035 | 0 | auto find = expire_txn_map.find(tinfo); |
1036 | 0 | if (find != expire_txn_map.end()) { |
1037 | 0 | tablet_info.__set_transaction_ids(find->second); |
1038 | 0 | expire_txn_map.erase(find); |
1039 | 0 | } |
1040 | 0 | tablet_version_num_hist.add(tablet_info.total_version_count); |
1041 | 0 | auto& t_tablet_stat = local_cache->emplace_back(); |
1042 | 0 | t_tablet_stat.__set_tablet_id(tablet_info.tablet_id); |
1043 | 0 | t_tablet_stat.__set_data_size(tablet_info.data_size); |
1044 | 0 | t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size); |
1045 | 0 | t_tablet_stat.__set_row_count(tablet_info.row_count); |
1046 | 0 | t_tablet_stat.__set_total_version_count(tablet_info.total_version_count); |
1047 | 0 | t_tablet_stat.__set_visible_version_count(tablet_info.visible_version_count); |
1048 | 0 | t_tablet_stat.__set_visible_version(tablet_info.version); |
1049 | 0 | }; |
1050 | 0 | for_each_tablet(handler, filter_all_tablets); |
1051 | |
|
1052 | 0 | { |
1053 | 0 | std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex); |
1054 | 0 | _tablet_stat_list_cache.swap(local_cache); |
1055 | 0 | } |
1056 | 0 | DorisMetrics::instance()->tablet_version_num_distribution->set_histogram( |
1057 | 0 | tablet_version_num_hist); |
1058 | 0 | LOG(INFO) << "success to build all report tablets info. tablet_count=" << tablets_info->size(); |
1059 | 0 | } |
1060 | | |
1061 | 21 | Status TabletManager::start_trash_sweep() { |
1062 | 21 | DBUG_EXECUTE_IF("TabletManager.start_trash_sweep.sleep", DBUG_BLOCK); |
1063 | 21 | std::unique_lock<std::mutex> lock(_gc_tablets_lock, std::defer_lock); |
1064 | 21 | if (!lock.try_lock()) { |
1065 | 0 | return Status::OK(); |
1066 | 0 | } |
1067 | | |
1068 | 21 | for_each_tablet([](const TabletSharedPtr& tablet) { tablet->delete_expired_stale_rowset(); }, |
1069 | 21 | filter_all_tablets); |
1070 | | |
1071 | 21 | std::list<TabletSharedPtr>::iterator last_it; |
1072 | 21 | { |
1073 | 21 | std::shared_lock rdlock(_shutdown_tablets_lock); |
1074 | 21 | last_it = _shutdown_tablets.begin(); |
1075 | 21 | if (last_it == _shutdown_tablets.end()) { |
1076 | 17 | return Status::OK(); |
1077 | 17 | } |
1078 | 21 | } |
1079 | | |
1080 | 7 | auto get_batch_tablets = [this, &last_it](int limit) { |
1081 | 7 | std::vector<TabletSharedPtr> batch_tablets; |
1082 | 7 | std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock); |
1083 | 11 | while (last_it != _shutdown_tablets.end() && batch_tablets.size() < limit) { |
1084 | | // it means current tablet is referenced by other thread |
1085 | 4 | if (last_it->use_count() > 1) { |
1086 | 1 | last_it++; |
1087 | 3 | } else { |
1088 | 3 | batch_tablets.push_back(*last_it); |
1089 | 3 | last_it = _shutdown_tablets.erase(last_it); |
1090 | 3 | } |
1091 | 4 | } |
1092 | | |
1093 | 7 | return batch_tablets; |
1094 | 7 | }; |
1095 | | |
1096 | 4 | std::list<TabletSharedPtr> failed_tablets; |
1097 | | // return true if need continue delete |
1098 | 4 | auto delete_one_batch = [this, get_batch_tablets, &failed_tablets]() -> bool { |
1099 | 4 | int limit = 200; |
1100 | 7 | for (;;) { |
1101 | 7 | auto batch_tablets = get_batch_tablets(limit); |
1102 | 7 | for (const auto& tablet : batch_tablets) { |
1103 | 3 | if (_move_tablet_to_trash(tablet)) { |
1104 | 3 | limit--; |
1105 | 3 | } else { |
1106 | 0 | failed_tablets.push_back(tablet); |
1107 | 0 | } |
1108 | 3 | } |
1109 | 7 | if (limit <= 0) { |
1110 | 0 | return true; |
1111 | 0 | } |
1112 | 7 | if (batch_tablets.empty()) { |
1113 | 4 | return false; |
1114 | 4 | } |
1115 | 7 | } |
1116 | | |
1117 | 0 | return false; |
1118 | 4 | }; |
1119 | | |
1120 | 4 | while (delete_one_batch()) { |
1121 | | #ifndef BE_TEST |
1122 | | sleep(1); |
1123 | | #endif |
1124 | 0 | } |
1125 | | |
1126 | 4 | if (!failed_tablets.empty()) { |
1127 | 0 | std::lock_guard<std::shared_mutex> wrlock(_shutdown_tablets_lock); |
1128 | 0 | _shutdown_tablets.splice(_shutdown_tablets.end(), failed_tablets); |
1129 | 0 | } |
1130 | | |
1131 | 4 | return Status::OK(); |
1132 | 21 | } |
1133 | | |
1134 | 3 | bool TabletManager::_move_tablet_to_trash(const TabletSharedPtr& tablet) { |
1135 | 3 | RETURN_IF_ERROR(register_transition_tablet(tablet->tablet_id(), "move to trash")); |
1136 | 3 | Defer defer {[&]() { unregister_transition_tablet(tablet->tablet_id(), "move to trash"); }}; |
1137 | | |
1138 | 3 | TabletSharedPtr tablet_in_not_shutdown = get_tablet(tablet->tablet_id()); |
1139 | 3 | if (tablet_in_not_shutdown) { |
1140 | 0 | TSchemaHash schema_hash_not_shutdown = tablet_in_not_shutdown->schema_hash(); |
1141 | 0 | size_t path_hash_not_shutdown = tablet_in_not_shutdown->data_dir()->path_hash(); |
1142 | 0 | if (tablet->schema_hash() == schema_hash_not_shutdown && |
1143 | 0 | tablet->data_dir()->path_hash() == path_hash_not_shutdown) { |
1144 | 0 | tablet->clear_cache(); |
1145 | | // shard_id in memory not eq shard_id in shutdown |
1146 | 0 | if (tablet_in_not_shutdown->tablet_path() != tablet->tablet_path()) { |
1147 | 0 | LOG(INFO) << "tablet path not eq shutdown tablet path, move it to trash, tablet_id=" |
1148 | 0 | << tablet_in_not_shutdown->tablet_id() |
1149 | 0 | << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() |
1150 | 0 | << " shutdown tablet path=" << tablet->tablet_path(); |
1151 | 0 | return tablet->data_dir()->move_to_trash(tablet->tablet_path()); |
1152 | 0 | } else { |
1153 | 0 | LOG(INFO) << "tablet path eq shutdown tablet path, not move to trash, tablet_id=" |
1154 | 0 | << tablet_in_not_shutdown->tablet_id() |
1155 | 0 | << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() |
1156 | 0 | << " shutdown tablet path=" << tablet->tablet_path(); |
1157 | 0 | return true; |
1158 | 0 | } |
1159 | 0 | } |
1160 | 0 | } |
1161 | | |
1162 | 3 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
1163 | 3 | int64_t get_meta_ts = MonotonicMicros(); |
1164 | 3 | Status check_st = TabletMetaManager::get_meta(tablet->data_dir(), tablet->tablet_id(), |
1165 | 3 | tablet->schema_hash(), tablet_meta); |
1166 | 3 | if (check_st.ok()) { |
1167 | 3 | if (tablet_meta->tablet_state() != TABLET_SHUTDOWN || |
1168 | 3 | tablet_meta->tablet_uid() != tablet->tablet_uid()) { |
1169 | 0 | LOG(WARNING) << "tablet's state changed to normal, skip remove dirs" |
1170 | 0 | << " tablet id = " << tablet_meta->tablet_id() |
1171 | 0 | << " schema hash = " << tablet_meta->schema_hash() |
1172 | 0 | << " old tablet_uid=" << tablet->tablet_uid() |
1173 | 0 | << " cur tablet_uid=" << tablet_meta->tablet_uid(); |
1174 | 0 | return true; |
1175 | 0 | } |
1176 | | |
1177 | 3 | tablet->clear_cache(); |
1178 | | |
1179 | | // move data to trash |
1180 | 3 | const auto& tablet_path = tablet->tablet_path(); |
1181 | 3 | bool exists = false; |
1182 | 3 | Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists); |
1183 | 3 | if (!exists_st) { |
1184 | 0 | return false; |
1185 | 0 | } |
1186 | 3 | if (exists) { |
1187 | | // take snapshot of tablet meta |
1188 | 3 | auto meta_file_path = fmt::format("{}/{}.hdr", tablet_path, tablet->tablet_id()); |
1189 | 3 | int64_t save_meta_ts = MonotonicMicros(); |
1190 | 3 | auto save_st = tablet->tablet_meta()->save(meta_file_path); |
1191 | 3 | if (!save_st.ok()) { |
1192 | 0 | LOG(WARNING) << "failed to save meta, tablet_id=" << tablet_meta->tablet_id() |
1193 | 0 | << ", tablet_uid=" << tablet_meta->tablet_uid() |
1194 | 0 | << ", error=" << save_st; |
1195 | 0 | return false; |
1196 | 0 | } |
1197 | 3 | int64_t now = MonotonicMicros(); |
1198 | 3 | LOG(INFO) << "start to move tablet to trash. " << tablet_path |
1199 | 3 | << ". rocksdb get meta cost " << (save_meta_ts - get_meta_ts) |
1200 | 3 | << " us, rocksdb save meta cost " << (now - save_meta_ts) << " us"; |
1201 | 3 | Status rm_st = tablet->data_dir()->move_to_trash(tablet_path); |
1202 | 3 | if (!rm_st.ok()) { |
1203 | 0 | LOG(WARNING) << "fail to move dir to trash. " << tablet_path; |
1204 | 0 | return false; |
1205 | 0 | } |
1206 | 3 | } |
1207 | | // remove tablet meta |
1208 | 3 | auto remove_st = TabletMetaManager::remove(tablet->data_dir(), tablet->tablet_id(), |
1209 | 3 | tablet->schema_hash()); |
1210 | 3 | if (!remove_st.ok()) { |
1211 | 0 | LOG(WARNING) << "failed to remove meta, tablet_id=" << tablet_meta->tablet_id() |
1212 | 0 | << ", tablet_uid=" << tablet_meta->tablet_uid() << ", error=" << remove_st; |
1213 | 0 | return false; |
1214 | 0 | } |
1215 | 3 | LOG(INFO) << "successfully move tablet to trash. " |
1216 | 3 | << "tablet_id=" << tablet->tablet_id() |
1217 | 3 | << ", schema_hash=" << tablet->schema_hash() << ", tablet_path=" << tablet_path; |
1218 | 3 | return true; |
1219 | 3 | } else { |
1220 | 0 | tablet->clear_cache(); |
1221 | | // if could not find tablet info in meta store, then check if dir existed |
1222 | 0 | const auto& tablet_path = tablet->tablet_path(); |
1223 | 0 | bool exists = false; |
1224 | 0 | Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists); |
1225 | 0 | if (!exists_st) { |
1226 | 0 | return false; |
1227 | 0 | } |
1228 | 0 | if (exists) { |
1229 | 0 | if (check_st.is<META_KEY_NOT_FOUND>()) { |
1230 | 0 | LOG(INFO) << "could not find tablet meta in rocksdb, so just delete it path " |
1231 | 0 | << "tablet_id=" << tablet->tablet_id() |
1232 | 0 | << ", schema_hash=" << tablet->schema_hash() |
1233 | 0 | << ", delete tablet_path=" << tablet_path; |
1234 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_path)); |
1235 | 0 | RETURN_IF_ERROR(DataDir::delete_tablet_parent_path_if_empty(tablet_path)); |
1236 | 0 | return true; |
1237 | 0 | } |
1238 | 0 | LOG(WARNING) << "errors while load meta from store, skip this tablet. " |
1239 | 0 | << "tablet_id=" << tablet->tablet_id() |
1240 | 0 | << ", schema_hash=" << tablet->schema_hash(); |
1241 | 0 | return false; |
1242 | 0 | } else { |
1243 | 0 | LOG(INFO) << "could not find tablet dir, skip it and remove it from gc-queue. " |
1244 | 0 | << "tablet_id=" << tablet->tablet_id() |
1245 | 0 | << ", schema_hash=" << tablet->schema_hash() |
1246 | 0 | << ", tablet_path=" << tablet_path; |
1247 | 0 | return true; |
1248 | 0 | } |
1249 | 0 | } |
1250 | 3 | } |
1251 | | |
1252 | 41 | Status TabletManager::register_transition_tablet(int64_t tablet_id, std::string reason) { |
1253 | 41 | tablets_shard& shard = _get_tablets_shard(tablet_id); |
1254 | 41 | std::thread::id thread_id = std::this_thread::get_id(); |
1255 | 41 | std::lock_guard<std::mutex> lk(shard.lock_for_transition); |
1256 | 41 | if (auto search = shard.tablets_under_transition.find(tablet_id); |
1257 | 41 | search == shard.tablets_under_transition.end()) { |
1258 | | // not found |
1259 | 39 | shard.tablets_under_transition[tablet_id] = std::make_tuple(reason, thread_id, 1); |
1260 | 39 | LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason |
1261 | 39 | << " lock times=1 thread_id_in_map=" << thread_id; |
1262 | 39 | return Status::OK(); |
1263 | 39 | } else { |
1264 | | // found |
1265 | 2 | auto& [r, thread_id_in_map, lock_times] = search->second; |
1266 | 2 | if (thread_id != thread_id_in_map) { |
1267 | | // other thread, failed |
1268 | 0 | LOG(INFO) << "tablet_id = " << tablet_id << " is doing " << r |
1269 | 0 | << " thread_id_in_map=" << thread_id_in_map << " , add reason=" << reason |
1270 | 0 | << " thread_id=" << thread_id; |
1271 | 0 | return Status::InternalError<false>("{} failed try later, tablet_id={}", reason, |
1272 | 0 | tablet_id); |
1273 | 0 | } |
1274 | | // add lock times |
1275 | 2 | ++lock_times; |
1276 | 2 | LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason |
1277 | 2 | << " lock times=" << lock_times << " thread_id_in_map=" << thread_id_in_map; |
1278 | 2 | return Status::OK(); |
1279 | 2 | } |
1280 | 41 | } |
1281 | | |
1282 | 41 | void TabletManager::unregister_transition_tablet(int64_t tablet_id, std::string reason) { |
1283 | 41 | tablets_shard& shard = _get_tablets_shard(tablet_id); |
1284 | 41 | std::thread::id thread_id = std::this_thread::get_id(); |
1285 | 41 | std::lock_guard<std::mutex> lk(shard.lock_for_transition); |
1286 | 41 | if (auto search = shard.tablets_under_transition.find(tablet_id); |
1287 | 41 | search == shard.tablets_under_transition.end()) { |
1288 | | // impossible, bug |
1289 | 0 | DCHECK(false) << "tablet " << tablet_id |
1290 | 0 | << " must be found, before unreg must have been reg"; |
1291 | 41 | } else { |
1292 | 41 | auto& [r, thread_id_in_map, lock_times] = search->second; |
1293 | 41 | if (thread_id_in_map != thread_id) { |
1294 | | // impossible, bug |
1295 | 0 | DCHECK(false) << "tablet " << tablet_id << " unreg thread must same reg thread"; |
1296 | 0 | } |
1297 | | // sub lock times |
1298 | 41 | --lock_times; |
1299 | 41 | if (lock_times != 0) { |
1300 | 2 | LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason |
1301 | 2 | << " left=" << lock_times << " thread_id_in_map=" << thread_id_in_map; |
1302 | 39 | } else { |
1303 | 39 | LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason |
1304 | 39 | << " thread_id_in_map=" << thread_id_in_map; |
1305 | 39 | shard.tablets_under_transition.erase(tablet_id); |
1306 | 39 | } |
1307 | 41 | } |
1308 | 41 | } |
1309 | | |
1310 | | void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id, |
1311 | | SchemaHash schema_hash, |
1312 | | const string& schema_hash_path, |
1313 | 10 | int16_t shard_id) { |
1314 | | // acquire the read lock, so that there is no creating tablet or load tablet from meta tasks |
1315 | | // create tablet and load tablet task should check whether the dir exists |
1316 | 10 | tablets_shard& shard = _get_tablets_shard(tablet_id); |
1317 | 10 | std::shared_lock rdlock(shard.lock); |
1318 | | |
1319 | | // check if meta already exists |
1320 | 10 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
1321 | 10 | Status check_st = TabletMetaManager::get_meta(data_dir, tablet_id, schema_hash, tablet_meta); |
1322 | 10 | if (check_st.ok() && tablet_meta->shard_id() == shard_id) { |
1323 | 0 | return; |
1324 | 0 | } |
1325 | | |
1326 | 10 | LOG(INFO) << "tablet meta not exists, try delete tablet path " << schema_hash_path; |
1327 | | |
1328 | 10 | bool succ = register_transition_tablet(tablet_id, "path gc"); |
1329 | 10 | if (!succ) { |
1330 | 0 | return; |
1331 | 0 | } |
1332 | 10 | Defer defer {[&]() { unregister_transition_tablet(tablet_id, "path gc"); }}; |
1333 | | |
1334 | 10 | TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id); |
1335 | 10 | if (tablet != nullptr && tablet->tablet_path() == schema_hash_path) { |
1336 | 0 | LOG(INFO) << "tablet , skip delete the path " << schema_hash_path; |
1337 | 0 | return; |
1338 | 0 | } |
1339 | | |
1340 | | // TODO(ygl): may do other checks in the future |
1341 | 10 | bool exists = false; |
1342 | 10 | Status exists_st = io::global_local_filesystem()->exists(schema_hash_path, &exists); |
1343 | 10 | if (exists_st && exists) { |
1344 | 10 | LOG(INFO) << "start to move tablet to trash. tablet_path = " << schema_hash_path; |
1345 | 10 | Status rm_st = data_dir->move_to_trash(schema_hash_path); |
1346 | 10 | if (!rm_st.ok()) { |
1347 | 0 | LOG(WARNING) << "fail to move dir to trash. dir=" << schema_hash_path; |
1348 | 10 | } else { |
1349 | 10 | LOG(INFO) << "move path " << schema_hash_path << " to trash successfully"; |
1350 | 10 | } |
1351 | 10 | } |
1352 | 10 | } |
1353 | | |
1354 | | void TabletManager::update_root_path_info(std::map<string, DataDirInfo>* path_map, |
1355 | 17 | size_t* tablet_count) { |
1356 | 17 | DCHECK(tablet_count); |
1357 | 17 | *tablet_count = 0; |
1358 | 17 | auto filter = [path_map, tablet_count](Tablet* t) -> bool { |
1359 | 0 | ++(*tablet_count); |
1360 | 0 | auto iter = path_map->find(t->data_dir()->path()); |
1361 | 0 | return iter != path_map->end() && iter->second.is_used; |
1362 | 0 | }; |
1363 | | |
1364 | 17 | auto handler = [&](const TabletSharedPtr& tablet) { |
1365 | 0 | auto& data_dir_info = (*path_map)[tablet->data_dir()->path()]; |
1366 | 0 | data_dir_info.local_used_capacity += tablet->tablet_local_size(); |
1367 | 0 | data_dir_info.remote_used_capacity += tablet->tablet_remote_size(); |
1368 | 0 | }; |
1369 | | |
1370 | 17 | for_each_tablet(handler, filter); |
1371 | 17 | } |
1372 | | |
1373 | | void TabletManager::get_partition_related_tablets(int64_t partition_id, |
1374 | 0 | std::set<TabletInfo>* tablet_infos) { |
1375 | 0 | std::shared_lock rdlock(_partitions_lock); |
1376 | 0 | auto it = _partitions.find(partition_id); |
1377 | 0 | if (it != _partitions.end()) { |
1378 | 0 | *tablet_infos = it->second.tablets; |
1379 | 0 | } |
1380 | 0 | } |
1381 | | |
1382 | 0 | void TabletManager::get_partitions_visible_version(std::map<int64_t, int64_t>* partitions_version) { |
1383 | 0 | std::shared_lock rdlock(_partitions_lock); |
1384 | 0 | for (const auto& [partition_id, partition] : _partitions) { |
1385 | 0 | partitions_version->insert( |
1386 | 0 | {partition_id, partition.visible_version->version.load(std::memory_order_relaxed)}); |
1387 | 0 | } |
1388 | 0 | } |
1389 | | |
1390 | | void TabletManager::update_partitions_visible_version( |
1391 | 0 | const std::map<int64_t, int64_t>& partitions_version) { |
1392 | 0 | std::shared_lock rdlock(_partitions_lock); |
1393 | 0 | for (auto [partition_id, version] : partitions_version) { |
1394 | 0 | auto it = _partitions.find(partition_id); |
1395 | 0 | if (it != _partitions.end()) { |
1396 | 0 | it->second.visible_version->update_version_monoto(version); |
1397 | 0 | } |
1398 | 0 | } |
1399 | 0 | } |
1400 | | |
1401 | 18 | void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) { |
1402 | 18 | auto filter = [data_dir](Tablet* tablet) -> bool { |
1403 | 0 | return tablet->tablet_state() == TABLET_RUNNING && |
1404 | 0 | tablet->data_dir()->path_hash() == data_dir->path_hash() && tablet->is_used() && |
1405 | 0 | tablet->init_succeeded(); |
1406 | 0 | }; |
1407 | | |
1408 | 18 | std::vector<TabletSharedPtr> related_tablets = get_all_tablet(filter); |
1409 | 18 | int counter = 0; |
1410 | 18 | MonotonicStopWatch watch; |
1411 | 18 | watch.start(); |
1412 | 18 | for (TabletSharedPtr tablet : related_tablets) { |
1413 | 0 | if (tablet->do_tablet_meta_checkpoint()) { |
1414 | 0 | ++counter; |
1415 | 0 | } |
1416 | 0 | } |
1417 | 18 | int64_t cost = watch.elapsed_time() / 1000 / 1000; |
1418 | 18 | LOG(INFO) << "finish to do meta checkpoint on dir: " << data_dir->path() |
1419 | 18 | << ", number: " << counter << ", cost(ms): " << cost; |
1420 | 18 | } |
1421 | | |
1422 | | Status TabletManager::_create_tablet_meta_unlocked(const TCreateTabletReq& request, DataDir* store, |
1423 | | const bool is_schema_change, |
1424 | | const Tablet* base_tablet, |
1425 | 69 | TabletMetaSharedPtr* tablet_meta) { |
1426 | 69 | uint32_t next_unique_id = 0; |
1427 | 69 | std::unordered_map<uint32_t, uint32_t> col_idx_to_unique_id; |
1428 | 69 | if (!is_schema_change) { |
1429 | 1.23k | for (uint32_t col_idx = 0; col_idx < request.tablet_schema.columns.size(); ++col_idx) { |
1430 | 1.16k | col_idx_to_unique_id[col_idx] = col_idx; |
1431 | 1.16k | } |
1432 | 69 | next_unique_id = request.tablet_schema.columns.size(); |
1433 | 69 | } else { |
1434 | 0 | next_unique_id = base_tablet->next_unique_id(); |
1435 | 0 | auto& new_columns = request.tablet_schema.columns; |
1436 | 0 | for (uint32_t new_col_idx = 0; new_col_idx < new_columns.size(); ++new_col_idx) { |
1437 | 0 | const TColumn& column = new_columns[new_col_idx]; |
1438 | | // For schema change, compare old_tablet and new_tablet: |
1439 | | // 1. if column exist in both new_tablet and old_tablet, choose the column's |
1440 | | // unique_id in old_tablet to be the column's ordinal number in new_tablet |
1441 | | // 2. if column exists only in new_tablet, assign next_unique_id of old_tablet |
1442 | | // to the new column |
1443 | 0 | int32_t old_col_idx = base_tablet->tablet_schema()->field_index(column.column_name); |
1444 | 0 | if (old_col_idx != -1) { |
1445 | 0 | uint32_t old_unique_id = |
1446 | 0 | base_tablet->tablet_schema()->column(old_col_idx).unique_id(); |
1447 | 0 | col_idx_to_unique_id[new_col_idx] = old_unique_id; |
1448 | 0 | } else { |
1449 | | // Not exist in old tablet, it is a new added column |
1450 | 0 | col_idx_to_unique_id[new_col_idx] = next_unique_id++; |
1451 | 0 | } |
1452 | 0 | } |
1453 | 0 | } |
1454 | 69 | VLOG_NOTICE << "creating tablet meta. next_unique_id=" << next_unique_id; |
1455 | | |
1456 | | // We generate a new tablet_uid for this new tablet. |
1457 | 69 | uint64_t shard_id = store->get_shard(); |
1458 | 69 | *tablet_meta = TabletMeta::create(request, TabletUid::gen_uid(), shard_id, next_unique_id, |
1459 | 69 | col_idx_to_unique_id); |
1460 | 69 | if (request.__isset.storage_format) { |
1461 | 51 | if (request.storage_format == TStorageFormat::DEFAULT) { |
1462 | 0 | (*tablet_meta)->set_preferred_rowset_type(_engine.default_rowset_type()); |
1463 | 51 | } else if (request.storage_format == TStorageFormat::V1) { |
1464 | 0 | (*tablet_meta)->set_preferred_rowset_type(ALPHA_ROWSET); |
1465 | 51 | } else if (request.storage_format == TStorageFormat::V2) { |
1466 | 51 | (*tablet_meta)->set_preferred_rowset_type(BETA_ROWSET); |
1467 | 51 | } else { |
1468 | 0 | return Status::Error<CE_CMD_PARAMS_ERROR>("invalid TStorageFormat: {}", |
1469 | 0 | request.storage_format); |
1470 | 0 | } |
1471 | 51 | } |
1472 | 69 | return Status::OK(); |
1473 | 69 | } |
1474 | | |
1475 | 2.45k | TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id) { |
1476 | 2.45k | VLOG_NOTICE << "begin to get tablet. tablet_id=" << tablet_id; |
1477 | 2.45k | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
1478 | 2.45k | const auto& iter = tablet_map.find(tablet_id); |
1479 | 2.45k | if (iter != tablet_map.end()) { |
1480 | 2.35k | return iter->second; |
1481 | 2.35k | } |
1482 | 99 | return nullptr; |
1483 | 2.45k | } |
1484 | | |
1485 | 71 | void TabletManager::_add_tablet_to_partition(const TabletSharedPtr& tablet) { |
1486 | 71 | std::lock_guard<std::shared_mutex> wrlock(_partitions_lock); |
1487 | 71 | auto& partition = _partitions[tablet->partition_id()]; |
1488 | 71 | partition.tablets.insert(tablet->get_tablet_info()); |
1489 | 71 | tablet->set_visible_version( |
1490 | 71 | std::static_pointer_cast<const VersionWithTime>(partition.visible_version)); |
1491 | 71 | } |
1492 | | |
1493 | 25 | void TabletManager::_remove_tablet_from_partition(const TabletSharedPtr& tablet) { |
1494 | 25 | tablet->set_visible_version(nullptr); |
1495 | 25 | std::lock_guard<std::shared_mutex> wrlock(_partitions_lock); |
1496 | 25 | auto it = _partitions.find(tablet->partition_id()); |
1497 | 25 | if (it == _partitions.end()) { |
1498 | 0 | return; |
1499 | 0 | } |
1500 | | |
1501 | 25 | auto& tablets = it->second.tablets; |
1502 | 25 | tablets.erase(tablet->get_tablet_info()); |
1503 | 25 | if (tablets.empty()) { |
1504 | 25 | _partitions.erase(it); |
1505 | 25 | } |
1506 | 25 | } |
1507 | | |
1508 | | void TabletManager::obtain_specific_quantity_tablets(vector<TabletInfo>& tablets_info, |
1509 | 0 | int64_t num) { |
1510 | 0 | for (const auto& tablets_shard : _tablets_shards) { |
1511 | 0 | std::shared_lock rdlock(tablets_shard.lock); |
1512 | 0 | for (const auto& item : tablets_shard.tablet_map) { |
1513 | 0 | TabletSharedPtr tablet = item.second; |
1514 | 0 | if (tablets_info.size() >= num) { |
1515 | 0 | return; |
1516 | 0 | } |
1517 | 0 | if (tablet == nullptr) { |
1518 | 0 | continue; |
1519 | 0 | } |
1520 | 0 | tablets_info.push_back(tablet->get_tablet_info()); |
1521 | 0 | } |
1522 | 0 | } |
1523 | 0 | } |
1524 | | |
1525 | 2.34k | std::shared_mutex& TabletManager::_get_tablets_shard_lock(TTabletId tabletId) { |
1526 | 2.34k | return _get_tablets_shard(tabletId).lock; |
1527 | 2.34k | } |
1528 | | |
1529 | 2.64k | TabletManager::tablet_map_t& TabletManager::_get_tablet_map(TTabletId tabletId) { |
1530 | 2.64k | return _get_tablets_shard(tabletId).tablet_map; |
1531 | 2.64k | } |
1532 | | |
1533 | 5.10k | TabletManager::tablets_shard& TabletManager::_get_tablets_shard(TTabletId tabletId) { |
1534 | 5.10k | return _tablets_shards[tabletId & _tablets_shards_mask]; |
1535 | 5.10k | } |
1536 | | |
1537 | | void TabletManager::get_tablets_distribution_on_different_disks( |
1538 | | std::map<int64_t, std::map<DataDir*, int64_t>>& tablets_num_on_disk, |
1539 | 0 | std::map<int64_t, std::map<DataDir*, std::vector<TabletSize>>>& tablets_info_on_disk) { |
1540 | 0 | std::vector<DataDir*> data_dirs = _engine.get_stores(); |
1541 | 0 | std::map<int64_t, Partition> partitions; |
1542 | 0 | { |
1543 | | // When drop tablet, '_partitions_lock' is locked in 'tablet_shard_lock'. |
1544 | | // To avoid locking 'tablet_shard_lock' in '_partitions_lock', we lock and |
1545 | | // copy _partitions here. |
1546 | 0 | std::shared_lock rdlock(_partitions_lock); |
1547 | 0 | partitions = _partitions; |
1548 | 0 | } |
1549 | |
|
1550 | 0 | for (const auto& [partition_id, partition] : partitions) { |
1551 | 0 | std::map<DataDir*, int64_t> tablets_num; |
1552 | 0 | std::map<DataDir*, std::vector<TabletSize>> tablets_info; |
1553 | 0 | for (auto* data_dir : data_dirs) { |
1554 | 0 | tablets_num[data_dir] = 0; |
1555 | 0 | } |
1556 | |
|
1557 | 0 | for (const auto& tablet_info : partition.tablets) { |
1558 | | // get_tablet() will hold 'tablet_shard_lock' |
1559 | 0 | TabletSharedPtr tablet = get_tablet(tablet_info.tablet_id); |
1560 | 0 | if (tablet == nullptr) { |
1561 | 0 | continue; |
1562 | 0 | } |
1563 | 0 | DataDir* data_dir = tablet->data_dir(); |
1564 | 0 | size_t tablet_footprint = tablet->tablet_footprint(); |
1565 | 0 | tablets_num[data_dir]++; |
1566 | 0 | TabletSize tablet_size(tablet_info.tablet_id, tablet_footprint); |
1567 | 0 | tablets_info[data_dir].push_back(tablet_size); |
1568 | 0 | } |
1569 | 0 | tablets_num_on_disk[partition_id] = tablets_num; |
1570 | 0 | tablets_info_on_disk[partition_id] = tablets_info; |
1571 | 0 | } |
1572 | 0 | } |
1573 | | |
1574 | | struct SortCtx { |
1575 | | SortCtx(TabletSharedPtr tablet, RowsetSharedPtr rowset, int64_t cooldown_timestamp, |
1576 | | int64_t file_size) |
1577 | 0 | : tablet(tablet), cooldown_timestamp(cooldown_timestamp), file_size(file_size) {} |
1578 | | TabletSharedPtr tablet; |
1579 | | RowsetSharedPtr rowset; |
1580 | | // to ensure the tablet with -1 would always be greater than other |
1581 | | uint64_t cooldown_timestamp; |
1582 | | int64_t file_size; |
1583 | 0 | bool operator<(const SortCtx& other) const { |
1584 | 0 | if (this->cooldown_timestamp == other.cooldown_timestamp) { |
1585 | 0 | return this->file_size > other.file_size; |
1586 | 0 | } |
1587 | 0 | return this->cooldown_timestamp < other.cooldown_timestamp; |
1588 | 0 | } |
1589 | | }; |
1590 | | |
1591 | | void TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets, |
1592 | | std::vector<RowsetSharedPtr>* rowsets, |
1593 | 19 | std::function<bool(const TabletSharedPtr&)> skip_tablet) { |
1594 | 19 | std::vector<SortCtx> sort_ctx_vec; |
1595 | 19 | std::vector<std::weak_ptr<Tablet>> candidates; |
1596 | 19 | for_each_tablet([&](const TabletSharedPtr& tablet) { candidates.emplace_back(tablet); }, |
1597 | 19 | filter_all_tablets); |
1598 | 19 | auto get_cooldown_tablet = [&sort_ctx_vec, &skip_tablet](std::weak_ptr<Tablet>& t) { |
1599 | 0 | const TabletSharedPtr& tablet = t.lock(); |
1600 | 0 | RowsetSharedPtr rowset = nullptr; |
1601 | 0 | if (UNLIKELY(nullptr == tablet)) { |
1602 | 0 | return; |
1603 | 0 | } |
1604 | 0 | std::shared_lock rdlock(tablet->get_header_lock()); |
1605 | 0 | int64_t cooldown_timestamp = -1; |
1606 | 0 | size_t file_size = -1; |
1607 | 0 | if (!skip_tablet(tablet) && |
1608 | 0 | (rowset = tablet->need_cooldown(&cooldown_timestamp, &file_size))) { |
1609 | 0 | sort_ctx_vec.emplace_back(tablet, rowset, cooldown_timestamp, file_size); |
1610 | 0 | } |
1611 | 0 | }; |
1612 | 19 | std::for_each(candidates.begin(), candidates.end(), get_cooldown_tablet); |
1613 | | |
1614 | 19 | std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end()); |
1615 | | |
1616 | 19 | for (SortCtx& ctx : sort_ctx_vec) { |
1617 | 0 | VLOG_DEBUG << "get cooldown tablet: " << ctx.tablet->tablet_id(); |
1618 | 0 | tablets->push_back(std::move(ctx.tablet)); |
1619 | 0 | rowsets->push_back(std::move(ctx.rowset)); |
1620 | 0 | } |
1621 | 19 | } |
1622 | | |
1623 | 0 | void TabletManager::get_all_tablets_storage_format(TCheckStorageFormatResult* result) { |
1624 | 0 | DCHECK(result != nullptr); |
1625 | 0 | auto handler = [result](const TabletSharedPtr& tablet) { |
1626 | 0 | if (tablet->all_beta()) { |
1627 | 0 | result->v2_tablets.push_back(tablet->tablet_id()); |
1628 | 0 | } else { |
1629 | 0 | result->v1_tablets.push_back(tablet->tablet_id()); |
1630 | 0 | } |
1631 | 0 | }; |
1632 | |
|
1633 | 0 | for_each_tablet(handler, filter_all_tablets); |
1634 | 0 | result->__isset.v1_tablets = true; |
1635 | 0 | result->__isset.v2_tablets = true; |
1636 | 0 | } |
1637 | | |
1638 | 0 | std::set<int64_t> TabletManager::check_all_tablet_segment(bool repair) { |
1639 | 0 | std::set<int64_t> bad_tablets; |
1640 | 0 | std::map<int64_t, std::vector<int64_t>> repair_shard_bad_tablets; |
1641 | 0 | auto handler = [&](const TabletSharedPtr& tablet) { |
1642 | 0 | if (!tablet->check_all_rowset_segment()) { |
1643 | 0 | int64_t tablet_id = tablet->tablet_id(); |
1644 | 0 | bad_tablets.insert(tablet_id); |
1645 | 0 | if (repair) { |
1646 | 0 | repair_shard_bad_tablets[tablet_id & _tablets_shards_mask].push_back(tablet_id); |
1647 | 0 | } |
1648 | 0 | } |
1649 | 0 | }; |
1650 | 0 | for_each_tablet(handler, filter_all_tablets); |
1651 | |
|
1652 | 0 | for (const auto& [shard_index, shard_tablets] : repair_shard_bad_tablets) { |
1653 | 0 | auto& tablets_shard = _tablets_shards[shard_index]; |
1654 | 0 | auto& tablet_map = tablets_shard.tablet_map; |
1655 | 0 | std::lock_guard<std::shared_mutex> wrlock(tablets_shard.lock); |
1656 | 0 | for (auto tablet_id : shard_tablets) { |
1657 | 0 | auto it = tablet_map.find(tablet_id); |
1658 | 0 | if (it == tablet_map.end()) { |
1659 | 0 | bad_tablets.erase(tablet_id); |
1660 | 0 | LOG(WARNING) << "Bad tablet has be removed. tablet_id=" << tablet_id; |
1661 | 0 | } else { |
1662 | 0 | const auto& tablet = it->second; |
1663 | 0 | static_cast<void>(tablet->set_tablet_state(TABLET_SHUTDOWN)); |
1664 | 0 | tablet->save_meta(); |
1665 | 0 | { |
1666 | 0 | std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock( |
1667 | 0 | _shutdown_tablets_lock); |
1668 | 0 | _shutdown_tablets.push_back(tablet); |
1669 | 0 | } |
1670 | 0 | LOG(WARNING) << "There are some segments lost, set tablet to shutdown state." |
1671 | 0 | << "tablet_id=" << tablet->tablet_id() |
1672 | 0 | << ", tablet_path=" << tablet->tablet_path(); |
1673 | 0 | } |
1674 | 0 | } |
1675 | 0 | } |
1676 | |
|
1677 | 0 | return bad_tablets; |
1678 | 0 | } |
1679 | | |
1680 | | bool TabletManager::update_tablet_partition_id(::doris::TPartitionId partition_id, |
1681 | 0 | ::doris::TTabletId tablet_id) { |
1682 | 0 | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
1683 | 0 | TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id); |
1684 | 0 | if (tablet == nullptr) { |
1685 | 0 | LOG(WARNING) << "get tablet err partition_id: " << partition_id |
1686 | 0 | << " tablet_id:" << tablet_id; |
1687 | 0 | return false; |
1688 | 0 | } |
1689 | 0 | _remove_tablet_from_partition(tablet); |
1690 | 0 | auto st = tablet->tablet_meta()->set_partition_id(partition_id); |
1691 | 0 | if (!st.ok()) { |
1692 | 0 | LOG(WARNING) << "set partition id err partition_id: " << partition_id |
1693 | 0 | << " tablet_id:" << tablet_id; |
1694 | 0 | return false; |
1695 | 0 | } |
1696 | 0 | _add_tablet_to_partition(tablet); |
1697 | 0 | return true; |
1698 | 0 | } |
1699 | | |
1700 | | } // end namespace doris |