/root/doris/be/src/olap/tablet_manager.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include "olap/tablet_manager.h" |
19 | | |
20 | | #include <fmt/format.h> |
21 | | #include <gen_cpp/AgentService_types.h> |
22 | | #include <gen_cpp/BackendService_types.h> |
23 | | #include <gen_cpp/Descriptors_types.h> |
24 | | #include <gen_cpp/MasterService_types.h> |
25 | | #include <gen_cpp/Types_types.h> |
26 | | #include <gen_cpp/olap_file.pb.h> |
27 | | #include <re2/re2.h> |
28 | | #include <unistd.h> |
29 | | |
30 | | #include <algorithm> |
31 | | #include <list> |
32 | | #include <mutex> |
33 | | #include <ostream> |
34 | | |
35 | | // IWYU pragma: no_include <opentelemetry/common/threadlocal.h> |
36 | | #include "common/compiler_util.h" // IWYU pragma: keep |
37 | | #include "common/config.h" |
38 | | #include "common/logging.h" |
39 | | #include "gutil/integral_types.h" |
40 | | #include "gutil/strings/strcat.h" |
41 | | #include "gutil/strings/substitute.h" |
42 | | #include "io/fs/local_file_system.h" |
43 | | #include "olap/cumulative_compaction_time_series_policy.h" |
44 | | #include "olap/data_dir.h" |
45 | | #include "olap/olap_common.h" |
46 | | #include "olap/olap_define.h" |
47 | | #include "olap/olap_meta.h" |
48 | | #include "olap/pb_helper.h" |
49 | | #include "olap/rowset/beta_rowset.h" |
50 | | #include "olap/rowset/rowset.h" |
51 | | #include "olap/rowset/rowset_meta_manager.h" |
52 | | #include "olap/storage_engine.h" |
53 | | #include "olap/tablet.h" |
54 | | #include "olap/tablet_meta.h" |
55 | | #include "olap/tablet_meta_manager.h" |
56 | | #include "olap/tablet_schema.h" |
57 | | #include "olap/txn_manager.h" |
58 | | #include "runtime/exec_env.h" |
59 | | #include "runtime/memory/mem_tracker.h" |
60 | | #include "runtime/thread_context.h" |
61 | | #include "service/backend_options.h" |
62 | | #include "util/defer_op.h" |
63 | | #include "util/doris_metrics.h" |
64 | | #include "util/histogram.h" |
65 | | #include "util/metrics.h" |
66 | | #include "util/path_util.h" |
67 | | #include "util/scoped_cleanup.h" |
68 | | #include "util/stopwatch.hpp" |
69 | | #include "util/time.h" |
70 | | #include "util/trace.h" |
71 | | #include "util/uid_util.h" |
72 | | |
73 | | namespace doris { |
74 | | class CumulativeCompactionPolicy; |
75 | | } // namespace doris |
76 | | |
77 | | using std::map; |
78 | | using std::set; |
79 | | using std::string; |
80 | | using std::vector; |
81 | | |
82 | | namespace doris { |
83 | | using namespace ErrorCode; |
84 | | |
85 | | DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, MetricUnit::BYTES, "", |
86 | | mem_consumption, Labels({{"type", "tablet_meta"}})); |
87 | | |
88 | | TabletManager::TabletManager(int32_t tablet_map_lock_shard_size) |
89 | | : _mem_tracker(std::make_shared<MemTracker>( |
90 | | "TabletManager", ExecEnv::GetInstance()->experimental_mem_tracker())), |
91 | | _tablet_meta_mem_tracker(std::make_shared<MemTracker>( |
92 | | "TabletMeta", ExecEnv::GetInstance()->experimental_mem_tracker())), |
93 | | _tablets_shards_size(tablet_map_lock_shard_size), |
94 | 57 | _tablets_shards_mask(tablet_map_lock_shard_size - 1) { |
95 | 57 | CHECK_GT(_tablets_shards_size, 0); |
96 | 57 | CHECK_EQ(_tablets_shards_size & _tablets_shards_mask, 0); |
97 | 57 | _tablets_shards.resize(_tablets_shards_size); |
98 | 57 | REGISTER_HOOK_METRIC(tablet_meta_mem_consumption, |
99 | 57 | [this]() { return _mem_tracker->consumption(); }); |
100 | 57 | } |
101 | | |
102 | 55 | TabletManager::~TabletManager() { |
103 | 55 | DEREGISTER_HOOK_METRIC(tablet_meta_mem_consumption); |
104 | 55 | } |
105 | | |
106 | | Status TabletManager::_add_tablet_unlocked(TTabletId tablet_id, const TabletSharedPtr& tablet, |
107 | 26 | bool update_meta, bool force, RuntimeProfile* profile) { |
108 | 26 | if (profile->get_counter("AddTablet") == nullptr) { |
109 | 2 | ADD_TIMER(profile, "AddTablet"); |
110 | 2 | } |
111 | 26 | Status res = Status::OK(); |
112 | 26 | VLOG_NOTICE << "begin to add tablet to TabletManager. " |
113 | 0 | << "tablet_id=" << tablet_id << ", force=" << force; |
114 | | |
115 | 26 | TabletSharedPtr existed_tablet = nullptr; |
116 | 26 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
117 | 26 | const auto& iter = tablet_map.find(tablet_id); |
118 | 26 | if (iter != tablet_map.end()) { |
119 | 2 | existed_tablet = iter->second; |
120 | 2 | } |
121 | | |
122 | 26 | if (existed_tablet == nullptr) { |
123 | 24 | return _add_tablet_to_map_unlocked(tablet_id, tablet, update_meta, false /*keep_files*/, |
124 | 24 | false /*drop_old*/, profile); |
125 | 24 | } |
126 | | // During restore process, the tablet is exist and snapshot loader will replace the tablet's rowsets |
127 | | // and then reload the tablet, the tablet's path will the same |
128 | 2 | if (!force) { |
129 | 2 | if (existed_tablet->tablet_path() == tablet->tablet_path()) { |
130 | 0 | return Status::Error<ENGINE_INSERT_EXISTS_TABLE>( |
131 | 0 | "add the same tablet twice! tablet_id={}, tablet_path={}", tablet_id, |
132 | 0 | tablet->tablet_path()); |
133 | 0 | } |
134 | 2 | if (existed_tablet->data_dir() == tablet->data_dir()) { |
135 | 0 | return Status::Error<ENGINE_INSERT_EXISTS_TABLE>( |
136 | 0 | "add tablet with same data dir twice! tablet_id={}", tablet_id); |
137 | 0 | } |
138 | 2 | } |
139 | | |
140 | 2 | MonotonicStopWatch watch; |
141 | 2 | watch.start(); |
142 | | |
143 | | // During storage migration, the tablet is moved to another disk, have to check |
144 | | // if the new tablet's rowset version is larger than the old one to prevent losting data during |
145 | | // migration |
146 | 2 | int64_t old_time, new_time; |
147 | 2 | int32_t old_version, new_version; |
148 | 2 | { |
149 | 2 | std::shared_lock rdlock(existed_tablet->get_header_lock()); |
150 | 2 | const RowsetSharedPtr old_rowset = existed_tablet->rowset_with_max_version(); |
151 | 2 | const RowsetSharedPtr new_rowset = tablet->rowset_with_max_version(); |
152 | | // If new tablet is empty, it is a newly created schema change tablet. |
153 | | // the old tablet is dropped before add tablet. it should not exist old tablet |
154 | 2 | if (new_rowset == nullptr) { |
155 | | // it seems useless to call unlock and return here. |
156 | | // it could prevent error when log level is changed in the future. |
157 | 0 | return Status::Error<ENGINE_INSERT_EXISTS_TABLE>( |
158 | 0 | "new tablet is empty and old tablet exists. it should not happen. tablet_id={}", |
159 | 0 | tablet_id); |
160 | 0 | } |
161 | 2 | old_time = old_rowset == nullptr ? -1 : old_rowset->creation_time(); |
162 | 2 | new_time = new_rowset->creation_time(); |
163 | 2 | old_version = old_rowset == nullptr ? -1 : old_rowset->end_version(); |
164 | 2 | new_version = new_rowset->end_version(); |
165 | 2 | } |
166 | 2 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "GetExistTabletVersion", "AddTablet"), |
167 | 2 | static_cast<int64_t>(watch.reset())); |
168 | | |
169 | | // In restore process, we replace all origin files in tablet dir with |
170 | | // the downloaded snapshot files. Then we try to reload tablet header. |
171 | | // force == true means we forcibly replace the Tablet in tablet_map |
172 | | // with the new one. But if we do so, the files in the tablet dir will be |
173 | | // dropped when the origin Tablet deconstruct. |
174 | | // So we set keep_files == true to not delete files when the |
175 | | // origin Tablet deconstruct. |
176 | | // During restore process, snapshot loader |
177 | | // replaced the old tablet's rowset with new rowsets, but the tablet path is reused, if drop files |
178 | | // here, the new rowset's file will also be dropped, so use keep files here |
179 | 2 | bool keep_files = force ? true : false; |
180 | 2 | if (force || |
181 | 2 | (new_version > old_version || (new_version == old_version && new_time >= old_time))) { |
182 | | // check if new tablet's meta is in store and add new tablet's meta to meta store |
183 | 2 | res = _add_tablet_to_map_unlocked(tablet_id, tablet, update_meta, keep_files, |
184 | 2 | true /*drop_old*/, profile); |
185 | 2 | } else { |
186 | 0 | tablet->set_tablet_state(TABLET_SHUTDOWN); |
187 | 0 | tablet->save_meta(); |
188 | 0 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "SaveMeta", "AddTablet"), |
189 | 0 | static_cast<int64_t>(watch.reset())); |
190 | 0 | { |
191 | 0 | std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(_shutdown_tablets_lock); |
192 | 0 | _shutdown_tablets.push_back(tablet); |
193 | 0 | } |
194 | |
|
195 | 0 | res = Status::Error<ENGINE_INSERT_OLD_TABLET>( |
196 | 0 | "set tablet to shutdown state. tablet_id={}, tablet_path={}", tablet->tablet_id(), |
197 | 0 | tablet->tablet_path()); |
198 | 0 | } |
199 | 2 | LOG(WARNING) << "add duplicated tablet. force=" << force << ", res=" << res |
200 | 2 | << ", tablet_id=" << tablet_id << ", old_version=" << old_version |
201 | 2 | << ", new_version=" << new_version << ", old_time=" << old_time |
202 | 2 | << ", new_time=" << new_time |
203 | 2 | << ", old_tablet_path=" << existed_tablet->tablet_path() |
204 | 2 | << ", new_tablet_path=" << tablet->tablet_path(); |
205 | | |
206 | 2 | return res; |
207 | 2 | } |
208 | | |
209 | | Status TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id, |
210 | | const TabletSharedPtr& tablet, bool update_meta, |
211 | | bool keep_files, bool drop_old, |
212 | 26 | RuntimeProfile* profile) { |
213 | | // check if new tablet's meta is in store and add new tablet's meta to meta store |
214 | 26 | Status res = Status::OK(); |
215 | 26 | MonotonicStopWatch watch; |
216 | 26 | watch.start(); |
217 | 26 | if (update_meta) { |
218 | | // call tablet save meta in order to valid the meta |
219 | 26 | tablet->save_meta(); |
220 | 26 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "SaveMeta", "AddTablet"), |
221 | 26 | static_cast<int64_t>(watch.reset())); |
222 | 26 | } |
223 | 26 | if (drop_old) { |
224 | | // If the new tablet is fresher than the existing one, then replace |
225 | | // the existing tablet with the new one. |
226 | | // Use default replica_id to ignore whether replica_id is match when drop tablet. |
227 | 2 | Status status = _drop_tablet_unlocked(tablet_id, /* replica_id */ 0, keep_files, false); |
228 | 2 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "DropOldTablet", "AddTablet"), |
229 | 2 | static_cast<int64_t>(watch.reset())); |
230 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
231 | 2 | status, strings::Substitute("failed to drop old tablet when add new tablet. " |
232 | 2 | "tablet_id=$0", |
233 | 2 | tablet_id)); |
234 | 2 | } |
235 | | // Register tablet into DataDir, so that we can manage tablet from |
236 | | // the perspective of root path. |
237 | | // Example: unregister all tables when a bad disk found. |
238 | 26 | tablet->register_tablet_into_dir(); |
239 | 26 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
240 | 26 | tablet_map[tablet_id] = tablet; |
241 | 26 | _add_tablet_to_partition(tablet); |
242 | | // TODO: remove multiply 2 of tablet meta mem size |
243 | | // Because table schema will copy in tablet, there will be double mem cost |
244 | | // so here multiply 2 |
245 | 26 | _tablet_meta_mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2); |
246 | 26 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RegisterTabletInfo", "AddTablet"), |
247 | 26 | static_cast<int64_t>(watch.reset())); |
248 | | |
249 | 26 | VLOG_NOTICE << "add tablet to map successfully." |
250 | 0 | << " tablet_id=" << tablet_id; |
251 | | |
252 | 26 | return res; |
253 | 26 | } |
254 | | |
255 | 0 | bool TabletManager::check_tablet_id_exist(TTabletId tablet_id) { |
256 | 0 | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
257 | 0 | return _check_tablet_id_exist_unlocked(tablet_id); |
258 | 0 | } |
259 | | |
260 | 0 | bool TabletManager::_check_tablet_id_exist_unlocked(TTabletId tablet_id) { |
261 | 0 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
262 | 0 | return tablet_map.find(tablet_id) != tablet_map.end(); |
263 | 0 | } |
264 | | |
265 | | Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector<DataDir*> stores, |
266 | 27 | RuntimeProfile* profile) { |
267 | 27 | SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); |
268 | 27 | DorisMetrics::instance()->create_tablet_requests_total->increment(1); |
269 | | |
270 | 27 | int64_t tablet_id = request.tablet_id; |
271 | 27 | LOG(INFO) << "begin to create tablet. tablet_id=" << tablet_id |
272 | 27 | << ", table_id=" << request.table_id << ", partition_id=" << request.partition_id |
273 | 27 | << ", replica_id=" << request.replica_id; |
274 | | |
275 | | // when we create rollup tablet A(assume on shard-1) from tablet B(assume on shard-2) |
276 | | // we need use write lock on shard-1 and then use read lock on shard-2 |
277 | | // if there have create rollup tablet C(assume on shard-2) from tablet D(assume on shard-1) at the same time, we will meet deadlock |
278 | 27 | std::unique_lock two_tablet_lock(_two_tablet_mtx, std::defer_lock); |
279 | 27 | bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode; |
280 | 27 | bool is_schema_change_or_atomic_restore = |
281 | 27 | request.__isset.base_tablet_id && request.base_tablet_id > 0; |
282 | 27 | bool need_two_lock = |
283 | 27 | is_schema_change_or_atomic_restore && |
284 | 27 | ((_tablets_shards_mask & request.base_tablet_id) != (_tablets_shards_mask & tablet_id)); |
285 | 27 | if (need_two_lock) { |
286 | 0 | SCOPED_TIMER(ADD_TIMER(profile, "GetTwoTableLock")); |
287 | 0 | two_tablet_lock.lock(); |
288 | 0 | } |
289 | | |
290 | 27 | MonotonicStopWatch shard_lock_watch; |
291 | 27 | shard_lock_watch.start(); |
292 | 27 | std::lock_guard wrlock(_get_tablets_shard_lock(tablet_id)); |
293 | 27 | shard_lock_watch.stop(); |
294 | 27 | COUNTER_UPDATE(ADD_TIMER(profile, "GetShardLock"), |
295 | 27 | static_cast<int64_t>(shard_lock_watch.elapsed_time())); |
296 | | // Make create_tablet operation to be idempotent: |
297 | | // 1. Return true if tablet with same tablet_id and schema_hash exist; |
298 | | // false if tablet with same tablet_id but different schema_hash exist. |
299 | | // 2. When this is an alter task, if the tablet(both tablet_id and schema_hash are |
300 | | // same) already exist, then just return true(an duplicate request). But if |
301 | | // tablet_id exist but with different schema_hash, return an error(report task will |
302 | | // eventually trigger its deletion). |
303 | 27 | { |
304 | 27 | SCOPED_TIMER(ADD_TIMER(profile, "GetTabletUnlocked")); |
305 | 27 | if (_get_tablet_unlocked(tablet_id) != nullptr) { |
306 | 3 | LOG(INFO) << "success to create tablet. tablet already exist. tablet_id=" << tablet_id; |
307 | 3 | return Status::OK(); |
308 | 3 | } |
309 | 27 | } |
310 | | |
311 | 24 | TabletSharedPtr base_tablet = nullptr; |
312 | | // If the CreateTabletReq has base_tablet_id then it is a alter-tablet request |
313 | 24 | if (is_schema_change_or_atomic_restore) { |
314 | | // if base_tablet_id's lock diffrent with new_tablet_id, we need lock it. |
315 | 0 | if (need_two_lock) { |
316 | 0 | SCOPED_TIMER(ADD_TIMER(profile, "GetBaseTablet")); |
317 | 0 | base_tablet = get_tablet(request.base_tablet_id); |
318 | 0 | two_tablet_lock.unlock(); |
319 | 0 | } else { |
320 | 0 | SCOPED_TIMER(ADD_TIMER(profile, "GetBaseTabletUnlocked")); |
321 | 0 | base_tablet = _get_tablet_unlocked(request.base_tablet_id); |
322 | 0 | } |
323 | 0 | if (base_tablet == nullptr) { |
324 | 0 | DorisMetrics::instance()->create_tablet_requests_failed->increment(1); |
325 | 0 | return Status::Error<TABLE_CREATE_META_ERROR>( |
326 | 0 | "fail to create tablet(change schema/atomic restore), base tablet does not " |
327 | 0 | "exist. new_tablet_id={}, base_tablet_id={}", |
328 | 0 | tablet_id, request.base_tablet_id); |
329 | 0 | } |
330 | | // If we are doing schema-change or atomic-restore, we should use the same data dir |
331 | | // TODO(lingbin): A litter trick here, the directory should be determined before |
332 | | // entering this method |
333 | | // |
334 | | // ATTN: Since all restored replicas will be saved to HDD, so no storage_medium check here. |
335 | 0 | if (in_restore_mode || |
336 | 0 | request.storage_medium == base_tablet->data_dir()->storage_medium()) { |
337 | 0 | LOG(INFO) << "create tablet use the base tablet data dir. tablet_id=" << tablet_id |
338 | 0 | << ", base tablet_id=" << request.base_tablet_id |
339 | 0 | << ", data dir=" << base_tablet->data_dir()->path(); |
340 | 0 | stores.clear(); |
341 | 0 | stores.push_back(base_tablet->data_dir()); |
342 | 0 | } |
343 | 0 | } |
344 | | |
345 | | // set alter type to schema-change. it is useless |
346 | 24 | TabletSharedPtr tablet = _internal_create_tablet_unlocked( |
347 | 24 | request, is_schema_change_or_atomic_restore, base_tablet.get(), stores, profile); |
348 | 24 | if (tablet == nullptr) { |
349 | 0 | DorisMetrics::instance()->create_tablet_requests_failed->increment(1); |
350 | 0 | return Status::Error<CE_CMD_PARAMS_ERROR>("fail to create tablet. tablet_id={}", |
351 | 0 | request.tablet_id); |
352 | 0 | } |
353 | | |
354 | 24 | LOG(INFO) << "success to create tablet. tablet_id=" << tablet_id; |
355 | 24 | return Status::OK(); |
356 | 24 | } |
357 | | |
358 | | TabletSharedPtr TabletManager::_internal_create_tablet_unlocked( |
359 | | const TCreateTabletReq& request, const bool is_schema_change, const Tablet* base_tablet, |
360 | 24 | const std::vector<DataDir*>& data_dirs, RuntimeProfile* profile) { |
361 | | // If in schema-change state, base_tablet must also be provided. |
362 | | // i.e., is_schema_change and base_tablet are either assigned or not assigned |
363 | 24 | DCHECK((is_schema_change && base_tablet) || (!is_schema_change && !base_tablet)); |
364 | | |
365 | | // NOTE: The existence of tablet_id and schema_hash has already been checked, |
366 | | // no need check again here. |
367 | | |
368 | 24 | const std::string parent_timer_name = "InternalCreateTablet"; |
369 | 24 | SCOPED_TIMER(ADD_TIMER(profile, parent_timer_name)); |
370 | | |
371 | 24 | MonotonicStopWatch watch; |
372 | 24 | watch.start(); |
373 | 24 | auto create_meta_timer = ADD_CHILD_TIMER(profile, "CreateMeta", parent_timer_name); |
374 | 24 | auto tablet = _create_tablet_meta_and_dir_unlocked(request, is_schema_change, base_tablet, |
375 | 24 | data_dirs, profile); |
376 | 24 | COUNTER_UPDATE(create_meta_timer, static_cast<int64_t>(watch.reset())); |
377 | 24 | if (tablet == nullptr) { |
378 | 0 | return nullptr; |
379 | 0 | } |
380 | | |
381 | 24 | int64_t new_tablet_id = request.tablet_id; |
382 | 24 | int32_t new_schema_hash = request.tablet_schema.schema_hash; |
383 | | |
384 | | // should remove the tablet's pending_id no matter create-tablet success or not |
385 | 24 | DataDir* data_dir = tablet->data_dir(); |
386 | | |
387 | | // TODO(yiguolei) |
388 | | // the following code is very difficult to understand because it mixed alter tablet v2 |
389 | | // and alter tablet v1 should remove alter tablet v1 code after v0.12 |
390 | 24 | Status res = Status::OK(); |
391 | 24 | bool is_tablet_added = false; |
392 | 24 | do { |
393 | 24 | res = tablet->init(); |
394 | 24 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "TabletInit", parent_timer_name), |
395 | 24 | static_cast<int64_t>(watch.reset())); |
396 | 24 | if (!res.ok()) { |
397 | 0 | LOG(WARNING) << "tablet init failed. tablet:" << tablet->full_name(); |
398 | 0 | break; |
399 | 0 | } |
400 | | |
401 | | // Create init version if this is not a restore mode replica and request.version is set |
402 | | // bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode; |
403 | | // if (!in_restore_mode && request.__isset.version) { |
404 | | // create initial rowset before add it to storage engine could omit many locks |
405 | 24 | res = tablet->create_initial_rowset(request.version); |
406 | 24 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "InitRowset", parent_timer_name), |
407 | 24 | static_cast<int64_t>(watch.reset())); |
408 | 24 | if (!res.ok()) { |
409 | 0 | LOG(WARNING) << "fail to create initial version for tablet. res=" << res; |
410 | 0 | break; |
411 | 0 | } |
412 | | |
413 | 24 | if (is_schema_change) { |
414 | | // if this is a new alter tablet, has to set its state to not ready |
415 | | // because schema change handler depends on it to check whether history data |
416 | | // convert finished |
417 | 0 | tablet->set_tablet_state(TabletState::TABLET_NOTREADY); |
418 | 0 | } |
419 | | // Add tablet to StorageEngine will make it visible to user |
420 | | // Will persist tablet meta |
421 | 24 | auto add_tablet_timer = ADD_CHILD_TIMER(profile, "AddTablet", parent_timer_name); |
422 | 24 | res = _add_tablet_unlocked(new_tablet_id, tablet, /*update_meta*/ true, false, profile); |
423 | 24 | COUNTER_UPDATE(add_tablet_timer, static_cast<int64_t>(watch.reset())); |
424 | 24 | if (!res.ok()) { |
425 | 0 | LOG(WARNING) << "fail to add tablet to StorageEngine. res=" << res; |
426 | 0 | break; |
427 | 0 | } |
428 | 24 | is_tablet_added = true; |
429 | | |
430 | | // TODO(lingbin): The following logic seems useless, can be removed? |
431 | | // Because if _add_tablet_unlocked() return OK, we must can get it from map. |
432 | 24 | TabletSharedPtr tablet_ptr = _get_tablet_unlocked(new_tablet_id); |
433 | 24 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "GetTablet", parent_timer_name), |
434 | 24 | static_cast<int64_t>(watch.reset())); |
435 | 24 | if (tablet_ptr == nullptr) { |
436 | 0 | res = Status::Error<TABLE_NOT_FOUND>("fail to get tablet. res={}", res); |
437 | 0 | break; |
438 | 0 | } |
439 | 24 | } while (false); |
440 | | |
441 | 24 | if (res.ok()) { |
442 | 24 | return tablet; |
443 | 24 | } |
444 | | // something is wrong, we need clear environment |
445 | 0 | if (is_tablet_added) { |
446 | 0 | Status status = _drop_tablet_unlocked(new_tablet_id, request.replica_id, false, false); |
447 | 0 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "DropTablet", parent_timer_name), |
448 | 0 | static_cast<int64_t>(watch.reset())); |
449 | 0 | if (!status.ok()) { |
450 | 0 | LOG(WARNING) << "fail to drop tablet when create tablet failed. res=" << res; |
451 | 0 | } |
452 | 0 | } else { |
453 | 0 | tablet->delete_all_files(); |
454 | 0 | TabletMetaManager::remove(data_dir, new_tablet_id, new_schema_hash); |
455 | 0 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RemoveTabletFiles", parent_timer_name), |
456 | 0 | static_cast<int64_t>(watch.reset())); |
457 | 0 | } |
458 | 0 | return nullptr; |
459 | 24 | } |
460 | | |
461 | 24 | static string _gen_tablet_dir(const string& dir, int16_t shard_id, int64_t tablet_id) { |
462 | 24 | string path = dir; |
463 | 24 | path = path_util::join_path_segments(path, DATA_PREFIX); |
464 | 24 | path = path_util::join_path_segments(path, std::to_string(shard_id)); |
465 | 24 | path = path_util::join_path_segments(path, std::to_string(tablet_id)); |
466 | 24 | return path; |
467 | 24 | } |
468 | | |
469 | | TabletSharedPtr TabletManager::_create_tablet_meta_and_dir_unlocked( |
470 | | const TCreateTabletReq& request, const bool is_schema_change, const Tablet* base_tablet, |
471 | 24 | const std::vector<DataDir*>& data_dirs, RuntimeProfile* profile) { |
472 | 24 | string pending_id = StrCat(TABLET_ID_PREFIX, request.tablet_id); |
473 | | // Many attempts are made here in the hope that even if a disk fails, it can still continue. |
474 | 24 | std::string parent_timer_name = "CreateMeta"; |
475 | 24 | MonotonicStopWatch watch; |
476 | 24 | watch.start(); |
477 | 24 | for (auto& data_dir : data_dirs) { |
478 | 24 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RemovePendingIds", parent_timer_name), |
479 | 24 | static_cast<int64_t>(watch.reset())); |
480 | | |
481 | 24 | TabletMetaSharedPtr tablet_meta; |
482 | | // if create meta failed, do not need to clean dir, because it is only in memory |
483 | 24 | Status res = _create_tablet_meta_unlocked(request, data_dir, is_schema_change, base_tablet, |
484 | 24 | &tablet_meta); |
485 | 24 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "CreateMetaUnlock", parent_timer_name), |
486 | 24 | static_cast<int64_t>(watch.reset())); |
487 | 24 | if (!res.ok()) { |
488 | 0 | LOG(WARNING) << "fail to create tablet meta. res=" << res |
489 | 0 | << ", root=" << data_dir->path(); |
490 | 0 | continue; |
491 | 0 | } |
492 | | |
493 | 24 | string tablet_dir = |
494 | 24 | _gen_tablet_dir(data_dir->path(), tablet_meta->shard_id(), request.tablet_id); |
495 | 24 | string schema_hash_dir = path_util::join_path_segments( |
496 | 24 | tablet_dir, std::to_string(request.tablet_schema.schema_hash)); |
497 | | |
498 | | // Because the tablet is removed asynchronously, so that the dir may still exist when BE |
499 | | // receive create-tablet request again, For example retried schema-change request |
500 | 24 | bool exists = true; |
501 | 24 | res = io::global_local_filesystem()->exists(schema_hash_dir, &exists); |
502 | 24 | if (!res.ok()) { |
503 | 0 | continue; |
504 | 0 | } |
505 | 24 | if (exists) { |
506 | 0 | LOG(WARNING) << "skip this dir because tablet path exist, path=" << schema_hash_dir; |
507 | 0 | continue; |
508 | 24 | } else { |
509 | 24 | Status st = io::global_local_filesystem()->create_directory(schema_hash_dir); |
510 | 24 | if (!st.ok()) { |
511 | 0 | continue; |
512 | 0 | } |
513 | 24 | } |
514 | | |
515 | 24 | if (tablet_meta->partition_id() <= 0) { |
516 | 1 | LOG(WARNING) << "invalid partition id " << tablet_meta->partition_id() << ", tablet " |
517 | 1 | << tablet_meta->tablet_id(); |
518 | 1 | } |
519 | | |
520 | 24 | TabletSharedPtr new_tablet = Tablet::create_tablet_from_meta(tablet_meta, data_dir); |
521 | 24 | COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "CreateTabletFromMeta", parent_timer_name), |
522 | 24 | static_cast<int64_t>(watch.reset())); |
523 | 24 | DCHECK(new_tablet != nullptr); |
524 | 24 | return new_tablet; |
525 | 24 | } |
526 | 0 | return nullptr; |
527 | 24 | } |
528 | | |
529 | | Status TabletManager::drop_tablet(TTabletId tablet_id, TReplicaId replica_id, |
530 | 22 | bool is_drop_table_or_partition) { |
531 | 22 | auto& shard = _get_tablets_shard(tablet_id); |
532 | 22 | std::lock_guard wrlock(shard.lock); |
533 | 22 | return _drop_tablet_unlocked(tablet_id, replica_id, false, is_drop_table_or_partition); |
534 | 22 | } |
535 | | |
536 | | // Drop specified tablet. |
537 | | Status TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId replica_id, |
538 | 24 | bool keep_files, bool is_drop_table_or_partition) { |
539 | 24 | LOG(INFO) << "begin drop tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id |
540 | 24 | << ", is_drop_table_or_partition=" << is_drop_table_or_partition; |
541 | 24 | DorisMetrics::instance()->drop_tablet_requests_total->increment(1); |
542 | | |
543 | 24 | RETURN_IF_ERROR(register_transition_tablet(tablet_id, "drop tablet")); |
544 | 24 | Defer defer {[&]() { unregister_transition_tablet(tablet_id, "drop tablet"); }}; |
545 | | |
546 | | // Fetch tablet which need to be dropped |
547 | 24 | TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id); |
548 | 24 | if (to_drop_tablet == nullptr) { |
549 | 1 | LOG(WARNING) << "fail to drop tablet because it does not exist. " |
550 | 1 | << "tablet_id=" << tablet_id; |
551 | 1 | return Status::OK(); |
552 | 1 | } |
553 | | |
554 | | // We should compare replica id to avoid dropping new cloned tablet. |
555 | | // Iff request replica id is 0, FE may be an older release, then we drop this tablet as before. |
556 | 23 | if (to_drop_tablet->replica_id() != replica_id && replica_id != 0) { |
557 | 0 | return Status::Aborted("replica_id not match({} vs {})", to_drop_tablet->replica_id(), |
558 | 0 | replica_id); |
559 | 0 | } |
560 | | |
561 | 23 | _remove_tablet_from_partition(to_drop_tablet); |
562 | 23 | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
563 | 23 | tablet_map.erase(tablet_id); |
564 | 23 | to_drop_tablet->clear_cache(); |
565 | | |
566 | 23 | if (!keep_files) { |
567 | | // drop tablet will update tablet meta, should lock |
568 | 23 | std::lock_guard<std::shared_mutex> wrlock(to_drop_tablet->get_header_lock()); |
569 | 23 | SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); |
570 | 23 | LOG(INFO) << "set tablet to shutdown state and remove it from memory. " |
571 | 23 | << "tablet_id=" << tablet_id << ", tablet_path=" << to_drop_tablet->tablet_path(); |
572 | | // NOTE: has to update tablet here, but must not update tablet meta directly. |
573 | | // because other thread may hold the tablet object, they may save meta too. |
574 | | // If update meta directly here, other thread may override the meta |
575 | | // and the tablet will be loaded at restart time. |
576 | | // To avoid this exception, we first set the state of the tablet to `SHUTDOWN`. |
577 | 23 | to_drop_tablet->set_tablet_state(TABLET_SHUTDOWN); |
578 | | // We must record unused remote rowsets path info to OlapMeta before tablet state is marked as TABLET_SHUTDOWN in OlapMeta, |
579 | | // otherwise if BE shutdown after saving tablet state, these remote rowsets path info will lost. |
580 | 23 | if (is_drop_table_or_partition) { |
581 | 0 | RETURN_IF_ERROR(to_drop_tablet->remove_all_remote_rowsets()); |
582 | 0 | } |
583 | 23 | to_drop_tablet->save_meta(); |
584 | 23 | { |
585 | 23 | std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock); |
586 | 23 | _shutdown_tablets.push_back(to_drop_tablet); |
587 | 23 | } |
588 | 23 | } |
589 | | |
590 | 23 | to_drop_tablet->deregister_tablet_from_dir(); |
591 | 23 | _tablet_meta_mem_tracker->release(to_drop_tablet->tablet_meta()->mem_size() * 2); |
592 | 23 | return Status::OK(); |
593 | 23 | } |
594 | | |
595 | 2.10k | TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, bool include_deleted, string* err) { |
596 | 2.10k | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
597 | 2.10k | return _get_tablet_unlocked(tablet_id, include_deleted, err); |
598 | 2.10k | } |
599 | | |
600 | 25 | std::vector<TabletSharedPtr> TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) { |
601 | 25 | std::vector<TabletSharedPtr> res; |
602 | 25 | for_each_tablet([&](const TabletSharedPtr& tablet) { res.emplace_back(tablet); }, |
603 | 25 | std::move(filter)); |
604 | 25 | return res; |
605 | 25 | } |
606 | | |
607 | | void TabletManager::for_each_tablet(std::function<void(const TabletSharedPtr&)>&& handler, |
608 | 143 | std::function<bool(Tablet*)>&& filter) { |
609 | 143 | std::vector<TabletSharedPtr> tablets; |
610 | 143 | for (const auto& tablets_shard : _tablets_shards) { |
611 | 143 | tablets.clear(); |
612 | 143 | { |
613 | 143 | std::shared_lock rdlock(tablets_shard.lock); |
614 | 143 | for (const auto& [id, tablet] : tablets_shard.tablet_map) { |
615 | 4 | if (filter(tablet.get())) { |
616 | 4 | tablets.emplace_back(tablet); |
617 | 4 | } |
618 | 4 | } |
619 | 143 | } |
620 | 143 | for (const auto& tablet : tablets) { |
621 | 4 | handler(tablet); |
622 | 4 | } |
623 | 143 | } |
624 | 143 | } |
625 | | |
626 | | std::pair<TabletSharedPtr, Status> TabletManager::get_tablet_and_status(TTabletId tablet_id, |
627 | 0 | bool include_deleted) { |
628 | 0 | std::string err; |
629 | 0 | auto tablet = get_tablet(tablet_id, include_deleted, &err); |
630 | 0 | if (tablet == nullptr) { |
631 | 0 | return {tablet, |
632 | 0 | Status::InternalError("failed to get tablet: {}, reason: {}", tablet_id, err)}; |
633 | 0 | } |
634 | | |
635 | 0 | return {tablet, Status::OK()}; |
636 | 0 | } |
637 | | |
638 | | TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, bool include_deleted, |
639 | 2.10k | string* err) { |
640 | 2.10k | TabletSharedPtr tablet; |
641 | 2.10k | tablet = _get_tablet_unlocked(tablet_id); |
642 | 2.10k | if (tablet == nullptr && include_deleted) { |
643 | 3 | std::shared_lock rdlock(_shutdown_tablets_lock); |
644 | 3 | for (auto& deleted_tablet : _shutdown_tablets) { |
645 | 2 | CHECK(deleted_tablet != nullptr) << "deleted tablet is nullptr"; |
646 | 2 | if (deleted_tablet->tablet_id() == tablet_id) { |
647 | 2 | tablet = deleted_tablet; |
648 | 2 | break; |
649 | 2 | } |
650 | 2 | } |
651 | 3 | } |
652 | | |
653 | 2.10k | if (tablet == nullptr) { |
654 | 11 | if (err != nullptr) { |
655 | 0 | *err = "tablet does not exist. " + BackendOptions::get_localhost(); |
656 | 0 | } |
657 | 11 | return nullptr; |
658 | 11 | } |
659 | | |
660 | 2.09k | if (!tablet->is_used()) { |
661 | 0 | LOG(WARNING) << "tablet cannot be used. tablet=" << tablet_id; |
662 | 0 | if (err != nullptr) { |
663 | 0 | *err = "tablet cannot be used. " + BackendOptions::get_localhost(); |
664 | 0 | } |
665 | 0 | return nullptr; |
666 | 0 | } |
667 | | |
668 | 2.09k | return tablet; |
669 | 2.09k | } |
670 | | |
671 | | TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TabletUid tablet_uid, |
672 | 0 | bool include_deleted, string* err) { |
673 | 0 | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
674 | 0 | TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, include_deleted, err); |
675 | 0 | if (tablet != nullptr && tablet->tablet_uid() == tablet_uid) { |
676 | 0 | return tablet; |
677 | 0 | } |
678 | 0 | return nullptr; |
679 | 0 | } |
680 | | |
681 | 0 | uint64_t TabletManager::get_rowset_nums() { |
682 | 0 | uint64_t rowset_nums = 0; |
683 | 0 | for_each_tablet([&](const TabletSharedPtr& tablet) { rowset_nums += tablet->version_count(); }, |
684 | 0 | filter_all_tablets); |
685 | 0 | return rowset_nums; |
686 | 0 | } |
687 | | |
688 | 0 | uint64_t TabletManager::get_segment_nums() { |
689 | 0 | uint64_t segment_nums = 0; |
690 | 0 | for_each_tablet([&](const TabletSharedPtr& tablet) { segment_nums += tablet->segment_count(); }, |
691 | 0 | filter_all_tablets); |
692 | 0 | return segment_nums; |
693 | 0 | } |
694 | | |
695 | | bool TabletManager::get_tablet_id_and_schema_hash_from_path(const string& path, |
696 | | TTabletId* tablet_id, |
697 | 11 | TSchemaHash* schema_hash) { |
698 | | // the path like: /data/14/10080/964828783/ |
699 | 11 | static re2::RE2 normal_re("/data/\\d+/(\\d+)/(\\d+)($|/)"); |
700 | | // match tablet schema hash data path, for example, the path is /data/1/16791/29998 |
701 | | // 1 is shard id , 16791 is tablet id, 29998 is schema hash |
702 | 11 | if (RE2::PartialMatch(path, normal_re, tablet_id, schema_hash)) { |
703 | 7 | return true; |
704 | 7 | } |
705 | | |
706 | | // If we can't match normal path pattern, this may be a path which is a empty tablet |
707 | | // directory. Use this pattern to match empty tablet directory. In this case schema_hash |
708 | | // will be set to zero. |
709 | 4 | static re2::RE2 empty_tablet_re("/data/\\d+/(\\d+)($|/$)"); |
710 | 4 | if (!RE2::PartialMatch(path, empty_tablet_re, tablet_id)) { |
711 | 2 | return false; |
712 | 2 | } |
713 | 2 | *schema_hash = 0; |
714 | 2 | return true; |
715 | 4 | } |
716 | | |
717 | 4 | bool TabletManager::get_rowset_id_from_path(const string& path, RowsetId* rowset_id) { |
718 | | // the path like: /data/14/10080/964828783/02000000000000969144d8725cb62765f9af6cd3125d5a91_0.dat |
719 | 4 | static re2::RE2 re("/data/\\d+/\\d+/\\d+/([A-Fa-f0-9]+)_.*"); |
720 | 4 | string id_str; |
721 | 4 | bool ret = RE2::PartialMatch(path, re, &id_str); |
722 | 4 | if (ret) { |
723 | 2 | rowset_id->init(id_str); |
724 | 2 | return true; |
725 | 2 | } |
726 | 2 | return false; |
727 | 4 | } |
728 | | |
729 | 0 | void TabletManager::get_tablet_stat(TTabletStatResult* result) { |
730 | 0 | std::shared_ptr<std::vector<TTabletStat>> local_cache; |
731 | 0 | { |
732 | 0 | std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex); |
733 | 0 | local_cache = _tablet_stat_list_cache; |
734 | 0 | } |
735 | 0 | result->__set_tablet_stat_list(*local_cache); |
736 | 0 | } |
737 | | |
738 | | TabletSharedPtr TabletManager::find_best_tablet_to_compaction( |
739 | | CompactionType compaction_type, DataDir* data_dir, |
740 | | const std::unordered_set<TTabletId>& tablet_submitted_compaction, uint32_t* score, |
741 | | const std::unordered_map<std::string_view, std::shared_ptr<CumulativeCompactionPolicy>>& |
742 | 76 | all_cumulative_compaction_policies) { |
743 | 76 | int64_t now_ms = UnixMillis(); |
744 | 76 | const string& compaction_type_str = |
745 | 76 | compaction_type == CompactionType::BASE_COMPACTION ? "base" : "cumulative"; |
746 | 76 | uint32_t highest_score = 0; |
747 | 76 | uint32_t compaction_score = 0; |
748 | 76 | TabletSharedPtr best_tablet; |
749 | 76 | auto handler = [&](const TabletSharedPtr& tablet_ptr) { |
750 | 4 | if (tablet_ptr->tablet_meta()->tablet_schema()->disable_auto_compaction()) { |
751 | 0 | LOG_EVERY_N(INFO, 500) << "Tablet " << tablet_ptr->tablet_id() |
752 | 0 | << " will be ignored by automatic compaction tasks since it's " |
753 | 0 | << "set to disabled automatic compaction."; |
754 | 0 | return; |
755 | 0 | } |
756 | | |
757 | 4 | if (config::enable_skip_tablet_compaction && |
758 | 4 | tablet_ptr->should_skip_compaction(compaction_type, UnixSeconds())) { |
759 | 2 | return; |
760 | 2 | } |
761 | 2 | if (!tablet_ptr->can_do_compaction(data_dir->path_hash(), compaction_type)) { |
762 | 1 | return; |
763 | 1 | } |
764 | | |
765 | 1 | auto search = tablet_submitted_compaction.find(tablet_ptr->tablet_id()); |
766 | 1 | if (search != tablet_submitted_compaction.end()) { |
767 | 0 | return; |
768 | 0 | } |
769 | | |
770 | 1 | int64_t last_failure_ms = tablet_ptr->last_cumu_compaction_failure_time(); |
771 | 1 | if (compaction_type == CompactionType::BASE_COMPACTION) { |
772 | 0 | last_failure_ms = tablet_ptr->last_base_compaction_failure_time(); |
773 | 0 | } |
774 | 1 | if (now_ms - last_failure_ms <= 5000) { |
775 | 0 | VLOG_DEBUG << "Too often to check compaction, skip it. " |
776 | 0 | << "compaction_type=" << compaction_type_str |
777 | 0 | << ", last_failure_time_ms=" << last_failure_ms |
778 | 0 | << ", tablet_id=" << tablet_ptr->tablet_id(); |
779 | 0 | return; |
780 | 0 | } |
781 | | |
782 | 1 | if (compaction_type == CompactionType::BASE_COMPACTION) { |
783 | 0 | std::unique_lock<std::mutex> lock(tablet_ptr->get_base_compaction_lock(), |
784 | 0 | std::try_to_lock); |
785 | 0 | if (!lock.owns_lock()) { |
786 | 0 | LOG(INFO) << "can not get base lock: " << tablet_ptr->tablet_id(); |
787 | 0 | return; |
788 | 0 | } |
789 | 1 | } else { |
790 | 1 | std::unique_lock<std::mutex> lock(tablet_ptr->get_cumulative_compaction_lock(), |
791 | 1 | std::try_to_lock); |
792 | 1 | if (!lock.owns_lock()) { |
793 | 0 | LOG(INFO) << "can not get cumu lock: " << tablet_ptr->tablet_id(); |
794 | 0 | return; |
795 | 0 | } |
796 | 1 | } |
797 | 1 | auto cumulative_compaction_policy = all_cumulative_compaction_policies.at( |
798 | 1 | tablet_ptr->tablet_meta()->compaction_policy()); |
799 | 1 | uint32_t current_compaction_score = |
800 | 1 | tablet_ptr->calc_compaction_score(compaction_type, cumulative_compaction_policy); |
801 | 1 | if (current_compaction_score < 5) { |
802 | 1 | tablet_ptr->set_skip_compaction(true, compaction_type, UnixSeconds()); |
803 | 1 | } |
804 | 1 | if (current_compaction_score > highest_score) { |
805 | 1 | highest_score = current_compaction_score; |
806 | 1 | compaction_score = current_compaction_score; |
807 | 1 | best_tablet = tablet_ptr; |
808 | 1 | } |
809 | 1 | }; |
810 | | |
811 | 76 | for_each_tablet(handler, filter_all_tablets); |
812 | 76 | if (best_tablet != nullptr) { |
813 | 1 | VLOG_CRITICAL << "Found the best tablet for compaction. " |
814 | 0 | << "compaction_type=" << compaction_type_str |
815 | 0 | << ", tablet_id=" << best_tablet->tablet_id() << ", path=" << data_dir->path() |
816 | 0 | << ", compaction_score=" << compaction_score |
817 | 0 | << ", highest_score=" << highest_score; |
818 | 1 | *score = compaction_score; |
819 | 1 | } |
820 | 76 | return best_tablet; |
821 | 76 | } |
822 | | |
823 | | Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_id, |
824 | | TSchemaHash schema_hash, const string& meta_binary, |
825 | | bool update_meta, bool force, bool restore, |
826 | 2 | bool check_path) { |
827 | 2 | SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); |
828 | 2 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
829 | 2 | Status status = tablet_meta->deserialize(meta_binary); |
830 | 2 | if (!status.ok()) { |
831 | 0 | return Status::Error<HEADER_PB_PARSE_FAILED>( |
832 | 0 | "fail to load tablet because can not parse meta_binary string. tablet_id={}, " |
833 | 0 | "schema_hash={}, path={}, status={}", |
834 | 0 | tablet_id, schema_hash, data_dir->path(), status); |
835 | 0 | } |
836 | 2 | tablet_meta->init_rs_metas_fs(data_dir->fs()); |
837 | | |
838 | | // check if tablet meta is valid |
839 | 2 | if (tablet_meta->tablet_id() != tablet_id || tablet_meta->schema_hash() != schema_hash) { |
840 | 0 | return Status::Error<HEADER_PB_PARSE_FAILED>( |
841 | 0 | "fail to load tablet because meet invalid tablet meta. trying to load " |
842 | 0 | "tablet(tablet_id={}, schema_hash={}), but meet tablet={}, path={}", |
843 | 0 | tablet_id, schema_hash, tablet_meta->full_name(), data_dir->path()); |
844 | 0 | } |
845 | 2 | if (tablet_meta->tablet_uid().hi == 0 && tablet_meta->tablet_uid().lo == 0) { |
846 | 0 | return Status::Error<HEADER_PB_PARSE_FAILED>( |
847 | 0 | "fail to load tablet because its uid == 0. tablet={}, path={}", |
848 | 0 | tablet_meta->full_name(), data_dir->path()); |
849 | 0 | } |
850 | | |
851 | 2 | if (restore) { |
852 | | // we're restoring tablet from trash, tablet state should be changed from shutdown back to running |
853 | 0 | tablet_meta->set_tablet_state(TABLET_RUNNING); |
854 | 0 | } |
855 | | |
856 | 2 | if (tablet_meta->partition_id() == 0) { |
857 | 0 | LOG(WARNING) << "tablet=" << tablet_id << " load from meta but partition id eq 0"; |
858 | 0 | } |
859 | | |
860 | 2 | TabletSharedPtr tablet = Tablet::create_tablet_from_meta(tablet_meta, data_dir); |
861 | 2 | if (tablet == nullptr) { |
862 | 0 | return Status::Error<TABLE_CREATE_FROM_HEADER_ERROR>( |
863 | 0 | "fail to load tablet. tablet_id={}, schema_hash={}", tablet_id, schema_hash); |
864 | 0 | } |
865 | | |
866 | | // NOTE: method load_tablet_from_meta could be called by two cases as below |
867 | | // case 1: BE start; |
868 | | // case 2: Clone Task/Restore |
869 | | // For case 1 doesn't need path check because BE is just starting and not ready, |
870 | | // just check tablet meta status to judge whether tablet is delete is enough. |
871 | | // For case 2, If a tablet has just been copied to local BE, |
872 | | // it may be cleared by gc-thread(see perform_path_gc_by_tablet) because the tablet meta may not be loaded to memory. |
873 | | // So clone task should check path and then failed and retry in this case. |
874 | 2 | if (check_path) { |
875 | 2 | bool exists = true; |
876 | 2 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(tablet->tablet_path(), &exists)); |
877 | 2 | if (!exists) { |
878 | 0 | return Status::Error<TABLE_ALREADY_DELETED_ERROR>( |
879 | 0 | "tablet path not exists, create tablet failed, path={}", tablet->tablet_path()); |
880 | 0 | } |
881 | 2 | } |
882 | | |
883 | 2 | if (tablet_meta->tablet_state() == TABLET_SHUTDOWN) { |
884 | 0 | { |
885 | 0 | std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(_shutdown_tablets_lock); |
886 | 0 | _shutdown_tablets.push_back(tablet); |
887 | 0 | } |
888 | 0 | return Status::Error<TABLE_ALREADY_DELETED_ERROR>( |
889 | 0 | "fail to load tablet because it is to be deleted. tablet_id={}, schema_hash={}, " |
890 | 0 | "path={}", |
891 | 0 | tablet_id, schema_hash, data_dir->path()); |
892 | 0 | } |
893 | | // NOTE: We do not check tablet's initial version here, because if BE restarts when |
894 | | // one tablet is doing schema-change, we may meet empty tablet. |
895 | 2 | if (tablet->max_version().first == -1 && tablet->tablet_state() == TABLET_RUNNING) { |
896 | | // tablet state is invalid, drop tablet |
897 | 0 | return Status::Error<TABLE_INDEX_VALIDATE_ERROR>( |
898 | 0 | "fail to load tablet. it is in running state but without delta. tablet={}, path={}", |
899 | 0 | tablet->full_name(), data_dir->path()); |
900 | 0 | } |
901 | | |
902 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
903 | 2 | tablet->init(), |
904 | 2 | strings::Substitute("tablet init failed. tablet=$0", tablet->full_name())); |
905 | | |
906 | 2 | RuntimeProfile profile("CreateTablet"); |
907 | 2 | std::lock_guard<std::shared_mutex> wrlock(_get_tablets_shard_lock(tablet_id)); |
908 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
909 | 2 | _add_tablet_unlocked(tablet_id, tablet, update_meta, force, &profile), |
910 | 2 | strings::Substitute("fail to add tablet. tablet=$0", tablet->full_name())); |
911 | | |
912 | 2 | return Status::OK(); |
913 | 2 | } |
914 | | |
915 | | Status TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_id, |
916 | | SchemaHash schema_hash, const string& schema_hash_path, |
917 | 2 | bool force, bool restore) { |
918 | 2 | SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); |
919 | 2 | LOG(INFO) << "begin to load tablet from dir. " |
920 | 2 | << " tablet_id=" << tablet_id << " schema_hash=" << schema_hash |
921 | 2 | << " path = " << schema_hash_path << " force = " << force << " restore = " << restore; |
922 | | // not add lock here, because load_tablet_from_meta already add lock |
923 | 2 | std::string header_path = TabletMeta::construct_header_file_path(schema_hash_path, tablet_id); |
924 | | // should change shard id before load tablet |
925 | 2 | std::string shard_path = |
926 | 2 | path_util::dir_name(path_util::dir_name(path_util::dir_name(header_path))); |
927 | 2 | std::string shard_str = shard_path.substr(shard_path.find_last_of('/') + 1); |
928 | 2 | int32_t shard = stol(shard_str); |
929 | | |
930 | 2 | bool exists = false; |
931 | 2 | RETURN_IF_ERROR(io::global_local_filesystem()->exists(header_path, &exists)); |
932 | 2 | if (!exists) { |
933 | 0 | return Status::Error<FILE_NOT_EXIST>("fail to find header file. [header_path={}]", |
934 | 0 | header_path); |
935 | 0 | } |
936 | | |
937 | 2 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
938 | 2 | if (!tablet_meta->create_from_file(header_path).ok()) { |
939 | 0 | return Status::Error<ENGINE_LOAD_INDEX_TABLE_ERROR>( |
940 | 0 | "fail to load tablet_meta. file_path={}", header_path); |
941 | 0 | } |
942 | 2 | TabletUid tablet_uid = TabletUid::gen_uid(); |
943 | | |
944 | | // remove rowset binlog metas |
945 | 2 | auto binlog_metas_file = fmt::format("{}/rowset_binlog_metas.pb", schema_hash_path); |
946 | 2 | bool binlog_metas_file_exists = false; |
947 | 2 | auto file_exists_status = |
948 | 2 | io::global_local_filesystem()->exists(binlog_metas_file, &binlog_metas_file_exists); |
949 | 2 | if (!file_exists_status.ok()) { |
950 | 0 | return file_exists_status; |
951 | 0 | } |
952 | 2 | bool contain_binlog = false; |
953 | 2 | RowsetBinlogMetasPB rowset_binlog_metas_pb; |
954 | 2 | if (binlog_metas_file_exists) { |
955 | 0 | auto binlog_meta_filesize = std::filesystem::file_size(binlog_metas_file); |
956 | 0 | if (binlog_meta_filesize > 0) { |
957 | 0 | contain_binlog = true; |
958 | 0 | RETURN_IF_ERROR(read_pb(binlog_metas_file, &rowset_binlog_metas_pb)); |
959 | 0 | VLOG_DEBUG << "load rowset binlog metas from file. file_path=" << binlog_metas_file; |
960 | 0 | } |
961 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(binlog_metas_file)); |
962 | 0 | } |
963 | 2 | if (contain_binlog) { |
964 | 0 | auto binlog_dir = fmt::format("{}/_binlog", schema_hash_path); |
965 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(binlog_dir)); |
966 | | |
967 | 0 | std::vector<io::FileInfo> files; |
968 | 0 | RETURN_IF_ERROR( |
969 | 0 | io::global_local_filesystem()->list(schema_hash_path, true, &files, &exists)); |
970 | 0 | for (auto& file : files) { |
971 | 0 | auto& filename = file.file_name; |
972 | 0 | std::string new_suffix; |
973 | 0 | std::string old_suffix; |
974 | |
|
975 | 0 | if (filename.ends_with(".binlog")) { |
976 | 0 | old_suffix = ".binlog"; |
977 | 0 | new_suffix = ".dat"; |
978 | 0 | } else if (filename.ends_with(".binlog-index")) { |
979 | 0 | old_suffix = ".binlog-index"; |
980 | 0 | new_suffix = ".idx"; |
981 | 0 | } else { |
982 | 0 | continue; |
983 | 0 | } |
984 | | |
985 | 0 | std::string new_filename = filename; |
986 | 0 | new_filename.replace(filename.size() - old_suffix.size(), old_suffix.size(), |
987 | 0 | new_suffix); |
988 | 0 | auto from = fmt::format("{}/{}", schema_hash_path, filename); |
989 | 0 | auto to = fmt::format("{}/_binlog/{}", schema_hash_path, new_filename); |
990 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->rename(from, to)); |
991 | 0 | } |
992 | | |
993 | 0 | auto* meta = store->get_meta(); |
994 | | // if ingest binlog metas error, it will be gc in gc_unused_binlog_metas |
995 | 0 | RETURN_IF_ERROR( |
996 | 0 | RowsetMetaManager::ingest_binlog_metas(meta, tablet_uid, &rowset_binlog_metas_pb)); |
997 | 0 | } |
998 | | |
999 | | // has to change shard id here, because meta file maybe copied from other source |
1000 | | // its shard is different from local shard |
1001 | 2 | tablet_meta->set_shard_id(shard); |
1002 | | // load dir is called by clone, restore, storage migration |
1003 | | // should change tablet uid when tablet object changed |
1004 | 2 | tablet_meta->set_tablet_uid(std::move(tablet_uid)); |
1005 | 2 | std::string meta_binary; |
1006 | 2 | tablet_meta->serialize(&meta_binary); |
1007 | 2 | RETURN_NOT_OK_STATUS_WITH_WARN( |
1008 | 2 | load_tablet_from_meta(store, tablet_id, schema_hash, meta_binary, true, force, restore, |
1009 | 2 | true), |
1010 | 2 | strings::Substitute("fail to load tablet. header_path=$0", header_path)); |
1011 | | |
1012 | 2 | return Status::OK(); |
1013 | 2 | } |
1014 | | |
1015 | 0 | Status TabletManager::report_tablet_info(TTabletInfo* tablet_info) { |
1016 | 0 | DorisMetrics::instance()->report_tablet_requests_total->increment(1); |
1017 | 0 | LOG(INFO) << "begin to process report tablet info." |
1018 | 0 | << "tablet_id=" << tablet_info->tablet_id; |
1019 | |
|
1020 | 0 | Status res = Status::OK(); |
1021 | |
|
1022 | 0 | TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id); |
1023 | 0 | if (tablet == nullptr) { |
1024 | 0 | return Status::Error<TABLE_NOT_FOUND>("can't find tablet={}", tablet_info->tablet_id); |
1025 | 0 | } |
1026 | | |
1027 | 0 | tablet->build_tablet_report_info(tablet_info); |
1028 | 0 | VLOG_TRACE << "success to process report tablet info."; |
1029 | 0 | return res; |
1030 | 0 | } |
1031 | | |
1032 | 0 | Status TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>* tablets_info) { |
1033 | 0 | DCHECK(tablets_info != nullptr); |
1034 | 0 | VLOG_NOTICE << "begin to build all report tablets info"; |
1035 | | |
1036 | | // build the expired txn map first, outside the tablet map lock |
1037 | 0 | std::map<TabletInfo, std::vector<int64_t>> expire_txn_map; |
1038 | 0 | StorageEngine::instance()->txn_manager()->build_expire_txn_map(&expire_txn_map); |
1039 | 0 | LOG(INFO) << "find expired transactions for " << expire_txn_map.size() << " tablets"; |
1040 | |
|
1041 | 0 | DorisMetrics::instance()->report_all_tablets_requests_total->increment(1); |
1042 | 0 | HistogramStat tablet_version_num_hist; |
1043 | 0 | auto local_cache = std::make_shared<std::vector<TTabletStat>>(); |
1044 | 0 | auto handler = [&](const TabletSharedPtr& tablet) { |
1045 | 0 | auto& t_tablet = (*tablets_info)[tablet->tablet_id()]; |
1046 | 0 | TTabletInfo& tablet_info = t_tablet.tablet_infos.emplace_back(); |
1047 | 0 | tablet->build_tablet_report_info(&tablet_info, true, true); |
1048 | | // find expired transaction corresponding to this tablet |
1049 | 0 | TabletInfo tinfo(tablet->tablet_id(), tablet->schema_hash(), tablet->tablet_uid()); |
1050 | 0 | auto find = expire_txn_map.find(tinfo); |
1051 | 0 | if (find != expire_txn_map.end()) { |
1052 | 0 | tablet_info.__set_transaction_ids(find->second); |
1053 | 0 | expire_txn_map.erase(find); |
1054 | 0 | } |
1055 | 0 | tablet_version_num_hist.add(tablet->version_count()); |
1056 | 0 | auto& t_tablet_stat = local_cache->emplace_back(); |
1057 | 0 | t_tablet_stat.__set_tablet_id(tablet_info.tablet_id); |
1058 | 0 | t_tablet_stat.__set_data_size(tablet_info.data_size); |
1059 | 0 | t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size); |
1060 | 0 | t_tablet_stat.__set_row_num(tablet_info.row_count); |
1061 | 0 | t_tablet_stat.__set_version_count(tablet_info.version_count); |
1062 | 0 | t_tablet_stat.__set_visible_version(tablet_info.version); |
1063 | 0 | }; |
1064 | 0 | for_each_tablet(handler, filter_all_tablets); |
1065 | |
|
1066 | 0 | { |
1067 | 0 | std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex); |
1068 | 0 | _tablet_stat_list_cache.swap(local_cache); |
1069 | 0 | } |
1070 | 0 | DorisMetrics::instance()->tablet_version_num_distribution->set_histogram( |
1071 | 0 | tablet_version_num_hist); |
1072 | 0 | LOG(INFO) << "success to build all report tablets info. tablet_count=" << tablets_info->size(); |
1073 | 0 | return Status::OK(); |
1074 | 0 | } |
1075 | | |
1076 | 16 | Status TabletManager::start_trash_sweep() { |
1077 | 16 | DBUG_EXECUTE_IF("TabletManager.start_trash_sweep.sleep", DBUG_BLOCK); |
1078 | 16 | std::unique_lock<std::mutex> lock(_gc_tablets_lock, std::defer_lock); |
1079 | 16 | if (!lock.try_lock()) { |
1080 | 0 | return Status::OK(); |
1081 | 0 | } |
1082 | | |
1083 | 16 | SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); |
1084 | 16 | for_each_tablet([](const TabletSharedPtr& tablet) { tablet->delete_expired_stale_rowset(); }, |
1085 | 16 | filter_all_tablets); |
1086 | | |
1087 | 16 | std::list<TabletSharedPtr>::iterator last_it; |
1088 | 16 | { |
1089 | 16 | std::shared_lock rdlock(_shutdown_tablets_lock); |
1090 | 16 | last_it = _shutdown_tablets.begin(); |
1091 | 16 | if (last_it == _shutdown_tablets.end()) { |
1092 | 12 | return Status::OK(); |
1093 | 12 | } |
1094 | 16 | } |
1095 | | |
1096 | 7 | auto get_batch_tablets = [this, &last_it](int limit) { |
1097 | 7 | std::vector<TabletSharedPtr> batch_tablets; |
1098 | 7 | std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock); |
1099 | 11 | while (last_it != _shutdown_tablets.end() && batch_tablets.size() < limit) { |
1100 | | // it means current tablet is referenced by other thread |
1101 | 4 | if (last_it->use_count() > 1) { |
1102 | 1 | last_it++; |
1103 | 3 | } else { |
1104 | 3 | batch_tablets.push_back(*last_it); |
1105 | 3 | last_it = _shutdown_tablets.erase(last_it); |
1106 | 3 | } |
1107 | 4 | } |
1108 | | |
1109 | 7 | return batch_tablets; |
1110 | 7 | }; |
1111 | | |
1112 | 4 | std::list<TabletSharedPtr> failed_tablets; |
1113 | | // return true if need continue delete |
1114 | 4 | auto delete_one_batch = [this, get_batch_tablets, &failed_tablets]() -> bool { |
1115 | 4 | int limit = 200; |
1116 | 7 | for (;;) { |
1117 | 7 | auto batch_tablets = get_batch_tablets(limit); |
1118 | 7 | for (const auto& tablet : batch_tablets) { |
1119 | 3 | if (_move_tablet_to_trash(tablet)) { |
1120 | 3 | limit--; |
1121 | 3 | } else { |
1122 | 0 | failed_tablets.push_back(tablet); |
1123 | 0 | } |
1124 | 3 | } |
1125 | 7 | if (limit <= 0) { |
1126 | 0 | return true; |
1127 | 0 | } |
1128 | 7 | if (batch_tablets.empty()) { |
1129 | 4 | return false; |
1130 | 4 | } |
1131 | 7 | } |
1132 | | |
1133 | 0 | return false; |
1134 | 4 | }; |
1135 | | |
1136 | 4 | while (delete_one_batch()) { |
1137 | | #ifndef BE_TEST |
1138 | | sleep(1); |
1139 | | #endif |
1140 | 0 | } |
1141 | | |
1142 | 4 | if (!failed_tablets.empty()) { |
1143 | 0 | std::lock_guard<std::shared_mutex> wrlock(_shutdown_tablets_lock); |
1144 | 0 | _shutdown_tablets.splice(_shutdown_tablets.end(), failed_tablets); |
1145 | 0 | } |
1146 | | |
1147 | 4 | return Status::OK(); |
1148 | 16 | } |
1149 | | |
1150 | 3 | bool TabletManager::_move_tablet_to_trash(const TabletSharedPtr& tablet) { |
1151 | 3 | RETURN_IF_ERROR(register_transition_tablet(tablet->tablet_id(), "move to trash")); |
1152 | 3 | Defer defer {[&]() { unregister_transition_tablet(tablet->tablet_id(), "move to trash"); }}; |
1153 | | |
1154 | 3 | TabletSharedPtr tablet_in_not_shutdown = get_tablet(tablet->tablet_id()); |
1155 | 3 | if (tablet_in_not_shutdown) { |
1156 | 0 | TSchemaHash schema_hash_not_shutdown = tablet_in_not_shutdown->schema_hash(); |
1157 | 0 | size_t path_hash_not_shutdown = tablet_in_not_shutdown->data_dir()->path_hash(); |
1158 | 0 | if (tablet->schema_hash() == schema_hash_not_shutdown && |
1159 | 0 | tablet->data_dir()->path_hash() == path_hash_not_shutdown) { |
1160 | 0 | tablet->clear_cache(); |
1161 | | // shard_id in memory not eq shard_id in shutdown |
1162 | 0 | if (tablet_in_not_shutdown->tablet_path() != tablet->tablet_path()) { |
1163 | 0 | LOG(INFO) << "tablet path not eq shutdown tablet path, move it to trash, tablet_id=" |
1164 | 0 | << tablet_in_not_shutdown->tablet_id() |
1165 | 0 | << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() |
1166 | 0 | << " shutdown tablet path=" << tablet->tablet_path(); |
1167 | 0 | return tablet->data_dir()->move_to_trash(tablet->tablet_path()); |
1168 | 0 | } else { |
1169 | 0 | LOG(INFO) << "tablet path eq shutdown tablet path, not move to trash, tablet_id=" |
1170 | 0 | << tablet_in_not_shutdown->tablet_id() |
1171 | 0 | << " mem manager tablet path=" << tablet_in_not_shutdown->tablet_path() |
1172 | 0 | << " shutdown tablet path=" << tablet->tablet_path(); |
1173 | 0 | return true; |
1174 | 0 | } |
1175 | 0 | } |
1176 | 0 | } |
1177 | | |
1178 | 3 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
1179 | 3 | int64_t get_meta_ts = MonotonicMicros(); |
1180 | 3 | Status check_st = TabletMetaManager::get_meta(tablet->data_dir(), tablet->tablet_id(), |
1181 | 3 | tablet->schema_hash(), tablet_meta); |
1182 | 3 | if (check_st.ok()) { |
1183 | 3 | if (tablet_meta->tablet_state() != TABLET_SHUTDOWN || |
1184 | 3 | tablet_meta->tablet_uid() != tablet->tablet_uid()) { |
1185 | 0 | LOG(WARNING) << "tablet's state changed to normal, skip remove dirs" |
1186 | 0 | << " tablet id = " << tablet_meta->tablet_id() |
1187 | 0 | << " schema hash = " << tablet_meta->schema_hash() |
1188 | 0 | << " old tablet_uid=" << tablet->tablet_uid() |
1189 | 0 | << " cur tablet_uid=" << tablet_meta->tablet_uid(); |
1190 | 0 | return true; |
1191 | 0 | } |
1192 | | |
1193 | 3 | tablet->clear_cache(); |
1194 | | |
1195 | | // move data to trash |
1196 | 3 | const auto& tablet_path = tablet->tablet_path(); |
1197 | 3 | bool exists = false; |
1198 | 3 | Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists); |
1199 | 3 | if (!exists_st) { |
1200 | 0 | return false; |
1201 | 0 | } |
1202 | 3 | if (exists) { |
1203 | | // take snapshot of tablet meta |
1204 | 3 | auto meta_file_path = fmt::format("{}/{}.hdr", tablet_path, tablet->tablet_id()); |
1205 | 3 | int64_t save_meta_ts = MonotonicMicros(); |
1206 | 3 | auto save_st = tablet->tablet_meta()->save(meta_file_path); |
1207 | 3 | if (!save_st.ok()) { |
1208 | 0 | LOG(WARNING) << "failed to save meta, tablet_id=" << tablet_meta->tablet_id() |
1209 | 0 | << ", tablet_uid=" << tablet_meta->tablet_uid() |
1210 | 0 | << ", error=" << save_st; |
1211 | 0 | return false; |
1212 | 0 | } |
1213 | 3 | int64_t now = MonotonicMicros(); |
1214 | 3 | LOG(INFO) << "start to move tablet to trash. " << tablet_path |
1215 | 3 | << ". rocksdb get meta cost " << (save_meta_ts - get_meta_ts) |
1216 | 3 | << " us, rocksdb save meta cost " << (now - save_meta_ts) << " us"; |
1217 | 3 | Status rm_st = tablet->data_dir()->move_to_trash(tablet_path); |
1218 | 3 | if (!rm_st.ok()) { |
1219 | 0 | LOG(WARNING) << "fail to move dir to trash. " << tablet_path; |
1220 | 0 | return false; |
1221 | 0 | } |
1222 | 3 | } |
1223 | | // remove tablet meta |
1224 | 3 | auto remove_st = TabletMetaManager::remove(tablet->data_dir(), tablet->tablet_id(), |
1225 | 3 | tablet->schema_hash()); |
1226 | 3 | if (!remove_st.ok()) { |
1227 | 0 | LOG(WARNING) << "failed to remove meta, tablet_id=" << tablet_meta->tablet_id() |
1228 | 0 | << ", tablet_uid=" << tablet_meta->tablet_uid() << ", error=" << remove_st; |
1229 | 0 | return false; |
1230 | 0 | } |
1231 | 3 | LOG(INFO) << "successfully move tablet to trash. " |
1232 | 3 | << "tablet_id=" << tablet->tablet_id() |
1233 | 3 | << ", schema_hash=" << tablet->schema_hash() << ", tablet_path=" << tablet_path; |
1234 | 3 | return true; |
1235 | 3 | } else { |
1236 | 0 | tablet->clear_cache(); |
1237 | | // if could not find tablet info in meta store, then check if dir existed |
1238 | 0 | const auto& tablet_path = tablet->tablet_path(); |
1239 | 0 | bool exists = false; |
1240 | 0 | Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists); |
1241 | 0 | if (!exists_st) { |
1242 | 0 | return false; |
1243 | 0 | } |
1244 | 0 | if (exists) { |
1245 | 0 | if (check_st.is<META_KEY_NOT_FOUND>()) { |
1246 | 0 | LOG(INFO) << "could not find tablet meta in rocksdb, so just delete it path " |
1247 | 0 | << "tablet_id=" << tablet->tablet_id() |
1248 | 0 | << ", schema_hash=" << tablet->schema_hash() |
1249 | 0 | << ", delete tablet_path=" << tablet_path; |
1250 | 0 | RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_path)); |
1251 | 0 | RETURN_IF_ERROR(DataDir::delete_tablet_parent_path_if_empty(tablet_path)); |
1252 | 0 | return true; |
1253 | 0 | } |
1254 | 0 | LOG(WARNING) << "errors while load meta from store, skip this tablet. " |
1255 | 0 | << "tablet_id=" << tablet->tablet_id() |
1256 | 0 | << ", schema_hash=" << tablet->schema_hash(); |
1257 | 0 | return false; |
1258 | 0 | } else { |
1259 | 0 | LOG(INFO) << "could not find tablet dir, skip it and remove it from gc-queue. " |
1260 | 0 | << "tablet_id=" << tablet->tablet_id() |
1261 | 0 | << ", schema_hash=" << tablet->schema_hash() |
1262 | 0 | << ", tablet_path=" << tablet_path; |
1263 | 0 | return true; |
1264 | 0 | } |
1265 | 0 | } |
1266 | 3 | } |
1267 | | |
1268 | 29 | Status TabletManager::register_transition_tablet(int64_t tablet_id, std::string reason) { |
1269 | 29 | tablets_shard& shard = _get_tablets_shard(tablet_id); |
1270 | 29 | std::thread::id thread_id = std::this_thread::get_id(); |
1271 | 29 | std::lock_guard<std::mutex> lk(shard.lock_for_transition); |
1272 | 29 | if (auto search = shard.tablets_under_transition.find(tablet_id); |
1273 | 29 | search == shard.tablets_under_transition.end()) { |
1274 | | // not found |
1275 | 27 | shard.tablets_under_transition[tablet_id] = std::make_tuple(reason, thread_id, 1); |
1276 | 27 | LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason |
1277 | 27 | << " lock times=1 thread_id_in_map=" << thread_id; |
1278 | 27 | return Status::OK(); |
1279 | 27 | } else { |
1280 | | // found |
1281 | 2 | auto& [r, thread_id_in_map, lock_times] = search->second; |
1282 | 2 | if (thread_id != thread_id_in_map) { |
1283 | | // other thread, failed |
1284 | 0 | LOG(INFO) << "tablet_id = " << tablet_id << " is doing " << r |
1285 | 0 | << " thread_id_in_map=" << thread_id_in_map << " , add reason=" << reason |
1286 | 0 | << " thread_id=" << thread_id; |
1287 | 0 | return Status::InternalError<false>("{} failed try later, tablet_id={}", reason, |
1288 | 0 | tablet_id); |
1289 | 0 | } |
1290 | | // add lock times |
1291 | 2 | ++lock_times; |
1292 | 2 | LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason |
1293 | 2 | << " lock times=" << lock_times << " thread_id_in_map=" << thread_id_in_map; |
1294 | 2 | return Status::OK(); |
1295 | 2 | } |
1296 | 29 | } |
1297 | | |
1298 | 29 | void TabletManager::unregister_transition_tablet(int64_t tablet_id, std::string reason) { |
1299 | 29 | tablets_shard& shard = _get_tablets_shard(tablet_id); |
1300 | 29 | std::thread::id thread_id = std::this_thread::get_id(); |
1301 | 29 | std::lock_guard<std::mutex> lk(shard.lock_for_transition); |
1302 | 29 | if (auto search = shard.tablets_under_transition.find(tablet_id); |
1303 | 29 | search == shard.tablets_under_transition.end()) { |
1304 | | // impossible, bug |
1305 | 0 | DCHECK(false) << "tablet " << tablet_id |
1306 | 0 | << " must be found, before unreg must have been reg"; |
1307 | 29 | } else { |
1308 | 29 | auto& [r, thread_id_in_map, lock_times] = search->second; |
1309 | 29 | if (thread_id_in_map != thread_id) { |
1310 | | // impossible, bug |
1311 | 0 | DCHECK(false) << "tablet " << tablet_id << " unreg thread must same reg thread"; |
1312 | 0 | } |
1313 | | // sub lock times |
1314 | 29 | --lock_times; |
1315 | 29 | if (lock_times != 0) { |
1316 | 2 | LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason |
1317 | 2 | << " left=" << lock_times << " thread_id_in_map=" << thread_id_in_map; |
1318 | 27 | } else { |
1319 | 27 | LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason |
1320 | 27 | << " thread_id_in_map=" << thread_id_in_map; |
1321 | 27 | shard.tablets_under_transition.erase(tablet_id); |
1322 | 27 | } |
1323 | 29 | } |
1324 | 29 | } |
1325 | | |
1326 | | void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id, |
1327 | | SchemaHash schema_hash, |
1328 | | const string& schema_hash_path, |
1329 | 2 | int16_t shard_id) { |
1330 | 2 | SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); |
1331 | | // acquire the read lock, so that there is no creating tablet or load tablet from meta tasks |
1332 | | // create tablet and load tablet task should check whether the dir exists |
1333 | 2 | tablets_shard& shard = _get_tablets_shard(tablet_id); |
1334 | 2 | std::shared_lock rdlock(shard.lock); |
1335 | | |
1336 | | // check if meta already exists |
1337 | 2 | TabletMetaSharedPtr tablet_meta(new TabletMeta()); |
1338 | 2 | Status check_st = TabletMetaManager::get_meta(data_dir, tablet_id, schema_hash, tablet_meta); |
1339 | 2 | if (check_st.ok() && tablet_meta->shard_id() == shard_id) { |
1340 | 2 | return; |
1341 | 2 | } |
1342 | | |
1343 | 0 | LOG(INFO) << "tablet meta not exists, try delete tablet path " << schema_hash_path; |
1344 | |
|
1345 | 0 | bool succ = register_transition_tablet(tablet_id, "path gc"); |
1346 | 0 | if (!succ) { |
1347 | 0 | return; |
1348 | 0 | } |
1349 | 0 | Defer defer {[&]() { unregister_transition_tablet(tablet_id, "path gc"); }}; |
1350 | |
|
1351 | 0 | TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id); |
1352 | 0 | if (tablet != nullptr && tablet->tablet_path() == schema_hash_path) { |
1353 | 0 | LOG(INFO) << "tablet , skip delete the path " << schema_hash_path; |
1354 | 0 | return; |
1355 | 0 | } |
1356 | | |
1357 | | // TODO(ygl): may do other checks in the future |
1358 | 0 | bool exists = false; |
1359 | 0 | Status exists_st = io::global_local_filesystem()->exists(schema_hash_path, &exists); |
1360 | 0 | if (exists_st && exists) { |
1361 | 0 | LOG(INFO) << "start to move tablet to trash. tablet_path = " << schema_hash_path; |
1362 | 0 | Status rm_st = data_dir->move_to_trash(schema_hash_path); |
1363 | 0 | if (!rm_st.ok()) { |
1364 | 0 | LOG(WARNING) << "fail to move dir to trash. dir=" << schema_hash_path; |
1365 | 0 | } else { |
1366 | 0 | LOG(INFO) << "move path " << schema_hash_path << " to trash successfully"; |
1367 | 0 | } |
1368 | 0 | } |
1369 | 0 | } |
1370 | | |
1371 | | void TabletManager::update_root_path_info(std::map<string, DataDirInfo>* path_map, |
1372 | 12 | size_t* tablet_count) { |
1373 | 12 | DCHECK(tablet_count); |
1374 | 12 | *tablet_count = 0; |
1375 | 12 | auto filter = [path_map, tablet_count](Tablet* t) -> bool { |
1376 | 0 | ++(*tablet_count); |
1377 | 0 | auto iter = path_map->find(t->data_dir()->path()); |
1378 | 0 | return iter != path_map->end() && iter->second.is_used; |
1379 | 0 | }; |
1380 | | |
1381 | 12 | auto handler = [&](const TabletSharedPtr& tablet) { |
1382 | 0 | auto& data_dir_info = (*path_map)[tablet->data_dir()->path()]; |
1383 | 0 | data_dir_info.local_used_capacity += tablet->tablet_local_size(); |
1384 | 0 | data_dir_info.remote_used_capacity += tablet->tablet_remote_size(); |
1385 | 0 | }; |
1386 | | |
1387 | 12 | for_each_tablet(handler, filter); |
1388 | 12 | } |
1389 | | |
1390 | | void TabletManager::get_partition_related_tablets(int64_t partition_id, |
1391 | 0 | std::set<TabletInfo>* tablet_infos) { |
1392 | 0 | std::shared_lock rdlock(_partition_tablet_map_lock); |
1393 | 0 | if (_partition_tablet_map.find(partition_id) != _partition_tablet_map.end()) { |
1394 | 0 | *tablet_infos = _partition_tablet_map[partition_id]; |
1395 | 0 | } |
1396 | 0 | } |
1397 | | |
1398 | 13 | void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) { |
1399 | 13 | SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); |
1400 | 13 | auto filter = [data_dir](Tablet* tablet) -> bool { |
1401 | 0 | return tablet->tablet_state() == TABLET_RUNNING && |
1402 | 0 | tablet->data_dir()->path_hash() == data_dir->path_hash() && tablet->is_used() && |
1403 | 0 | tablet->init_succeeded(); |
1404 | 0 | }; |
1405 | | |
1406 | 13 | std::vector<TabletSharedPtr> related_tablets = get_all_tablet(filter); |
1407 | 13 | int counter = 0; |
1408 | 13 | MonotonicStopWatch watch; |
1409 | 13 | watch.start(); |
1410 | 13 | for (TabletSharedPtr tablet : related_tablets) { |
1411 | 0 | if (tablet->do_tablet_meta_checkpoint()) { |
1412 | 0 | ++counter; |
1413 | 0 | } |
1414 | 0 | } |
1415 | 13 | int64_t cost = watch.elapsed_time() / 1000 / 1000; |
1416 | 13 | LOG(INFO) << "finish to do meta checkpoint on dir: " << data_dir->path() |
1417 | 13 | << ", number: " << counter << ", cost(ms): " << cost; |
1418 | 13 | } |
1419 | | |
1420 | | Status TabletManager::_create_tablet_meta_unlocked(const TCreateTabletReq& request, DataDir* store, |
1421 | | const bool is_schema_change, |
1422 | | const Tablet* base_tablet, |
1423 | 24 | TabletMetaSharedPtr* tablet_meta) { |
1424 | 24 | uint32_t next_unique_id = 0; |
1425 | 24 | std::unordered_map<uint32_t, uint32_t> col_idx_to_unique_id; |
1426 | 24 | if (!is_schema_change) { |
1427 | 252 | for (uint32_t col_idx = 0; col_idx < request.tablet_schema.columns.size(); ++col_idx) { |
1428 | 228 | col_idx_to_unique_id[col_idx] = col_idx; |
1429 | 228 | } |
1430 | 24 | next_unique_id = request.tablet_schema.columns.size(); |
1431 | 24 | } else { |
1432 | 0 | next_unique_id = base_tablet->next_unique_id(); |
1433 | 0 | auto& new_columns = request.tablet_schema.columns; |
1434 | 0 | for (uint32_t new_col_idx = 0; new_col_idx < new_columns.size(); ++new_col_idx) { |
1435 | 0 | const TColumn& column = new_columns[new_col_idx]; |
1436 | | // For schema change, compare old_tablet and new_tablet: |
1437 | | // 1. if column exist in both new_tablet and old_tablet, choose the column's |
1438 | | // unique_id in old_tablet to be the column's ordinal number in new_tablet |
1439 | | // 2. if column exists only in new_tablet, assign next_unique_id of old_tablet |
1440 | | // to the new column |
1441 | 0 | int32_t old_col_idx = base_tablet->tablet_schema()->field_index(column.column_name); |
1442 | 0 | if (old_col_idx != -1) { |
1443 | 0 | uint32_t old_unique_id = |
1444 | 0 | base_tablet->tablet_schema()->column(old_col_idx).unique_id(); |
1445 | 0 | col_idx_to_unique_id[new_col_idx] = old_unique_id; |
1446 | 0 | } else { |
1447 | | // Not exist in old tablet, it is a new added column |
1448 | 0 | col_idx_to_unique_id[new_col_idx] = next_unique_id++; |
1449 | 0 | } |
1450 | 0 | } |
1451 | 0 | } |
1452 | 24 | VLOG_NOTICE << "creating tablet meta. next_unique_id=" << next_unique_id; |
1453 | | |
1454 | | // We generate a new tablet_uid for this new tablet. |
1455 | 24 | uint64_t shard_id = 0; |
1456 | 24 | RETURN_NOT_OK_STATUS_WITH_WARN(store->get_shard(&shard_id), "fail to get root path shard"); |
1457 | 24 | Status res = TabletMeta::create(request, TabletUid::gen_uid(), shard_id, next_unique_id, |
1458 | 24 | col_idx_to_unique_id, tablet_meta); |
1459 | 24 | RETURN_IF_ERROR(res); |
1460 | 24 | if (request.__isset.storage_format) { |
1461 | 6 | if (request.storage_format == TStorageFormat::DEFAULT) { |
1462 | 0 | (*tablet_meta) |
1463 | 0 | ->set_preferred_rowset_type(StorageEngine::instance()->default_rowset_type()); |
1464 | 6 | } else if (request.storage_format == TStorageFormat::V1) { |
1465 | 0 | (*tablet_meta)->set_preferred_rowset_type(ALPHA_ROWSET); |
1466 | 6 | } else if (request.storage_format == TStorageFormat::V2) { |
1467 | 6 | (*tablet_meta)->set_preferred_rowset_type(BETA_ROWSET); |
1468 | 6 | } else { |
1469 | 0 | return Status::Error<CE_CMD_PARAMS_ERROR>("invalid TStorageFormat: {}", |
1470 | 0 | request.storage_format); |
1471 | 0 | } |
1472 | 6 | } |
1473 | 24 | return res; |
1474 | 24 | } |
1475 | | |
1476 | 2.18k | TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id) { |
1477 | 2.18k | VLOG_NOTICE << "begin to get tablet. tablet_id=" << tablet_id; |
1478 | 2.18k | tablet_map_t& tablet_map = _get_tablet_map(tablet_id); |
1479 | 2.18k | const auto& iter = tablet_map.find(tablet_id); |
1480 | 2.18k | if (iter != tablet_map.end()) { |
1481 | 2.14k | return iter->second; |
1482 | 2.14k | } |
1483 | 38 | return nullptr; |
1484 | 2.18k | } |
1485 | | |
1486 | 26 | void TabletManager::_add_tablet_to_partition(const TabletSharedPtr& tablet) { |
1487 | 26 | std::lock_guard<std::shared_mutex> wrlock(_partition_tablet_map_lock); |
1488 | 26 | _partition_tablet_map[tablet->partition_id()].insert(tablet->get_tablet_info()); |
1489 | 26 | } |
1490 | | |
1491 | 23 | void TabletManager::_remove_tablet_from_partition(const TabletSharedPtr& tablet) { |
1492 | 23 | std::lock_guard<std::shared_mutex> wrlock(_partition_tablet_map_lock); |
1493 | 23 | _partition_tablet_map[tablet->partition_id()].erase(tablet->get_tablet_info()); |
1494 | 23 | if (_partition_tablet_map[tablet->partition_id()].empty()) { |
1495 | 23 | _partition_tablet_map.erase(tablet->partition_id()); |
1496 | 23 | } |
1497 | 23 | } |
1498 | | |
1499 | | void TabletManager::obtain_specific_quantity_tablets(vector<TabletInfo>& tablets_info, |
1500 | 0 | int64_t num) { |
1501 | 0 | for (const auto& tablets_shard : _tablets_shards) { |
1502 | 0 | std::shared_lock rdlock(tablets_shard.lock); |
1503 | 0 | for (const auto& item : tablets_shard.tablet_map) { |
1504 | 0 | TabletSharedPtr tablet = item.second; |
1505 | 0 | if (tablets_info.size() >= num) { |
1506 | 0 | return; |
1507 | 0 | } |
1508 | 0 | if (tablet == nullptr) { |
1509 | 0 | continue; |
1510 | 0 | } |
1511 | 0 | tablets_info.push_back(tablet->get_tablet_info()); |
1512 | 0 | } |
1513 | 0 | } |
1514 | 0 | } |
1515 | | |
1516 | 2.13k | std::shared_mutex& TabletManager::_get_tablets_shard_lock(TTabletId tabletId) { |
1517 | 2.13k | return _get_tablets_shard(tabletId).lock; |
1518 | 2.13k | } |
1519 | | |
1520 | 2.25k | TabletManager::tablet_map_t& TabletManager::_get_tablet_map(TTabletId tabletId) { |
1521 | 2.25k | return _get_tablets_shard(tabletId).tablet_map; |
1522 | 2.25k | } |
1523 | | |
1524 | 4.47k | TabletManager::tablets_shard& TabletManager::_get_tablets_shard(TTabletId tabletId) { |
1525 | 4.47k | return _tablets_shards[tabletId & _tablets_shards_mask]; |
1526 | 4.47k | } |
1527 | | |
1528 | | void TabletManager::get_tablets_distribution_on_different_disks( |
1529 | | std::map<int64_t, std::map<DataDir*, int64_t>>& tablets_num_on_disk, |
1530 | 0 | std::map<int64_t, std::map<DataDir*, std::vector<TabletSize>>>& tablets_info_on_disk) { |
1531 | 0 | std::vector<DataDir*> data_dirs = StorageEngine::instance()->get_stores(); |
1532 | 0 | std::map<int64_t, std::set<TabletInfo>> partition_tablet_map; |
1533 | 0 | { |
1534 | | // When drop tablet, '_partition_tablet_map_lock' is locked in 'tablet_shard_lock'. |
1535 | | // To avoid locking 'tablet_shard_lock' in '_partition_tablet_map_lock', we lock and |
1536 | | // copy _partition_tablet_map here. |
1537 | 0 | std::shared_lock rdlock(_partition_tablet_map_lock); |
1538 | 0 | partition_tablet_map = _partition_tablet_map; |
1539 | 0 | } |
1540 | 0 | std::map<int64_t, std::set<TabletInfo>>::iterator partition_iter = partition_tablet_map.begin(); |
1541 | 0 | for (; partition_iter != partition_tablet_map.end(); ++partition_iter) { |
1542 | 0 | std::map<DataDir*, int64_t> tablets_num; |
1543 | 0 | std::map<DataDir*, std::vector<TabletSize>> tablets_info; |
1544 | 0 | for (int i = 0; i < data_dirs.size(); i++) { |
1545 | 0 | tablets_num[data_dirs[i]] = 0; |
1546 | 0 | } |
1547 | 0 | int64_t partition_id = partition_iter->first; |
1548 | 0 | std::set<TabletInfo>::iterator tablet_info_iter = (partition_iter->second).begin(); |
1549 | 0 | for (; tablet_info_iter != (partition_iter->second).end(); ++tablet_info_iter) { |
1550 | | // get_tablet() will hold 'tablet_shard_lock' |
1551 | 0 | TabletSharedPtr tablet = get_tablet(tablet_info_iter->tablet_id); |
1552 | 0 | if (tablet == nullptr) { |
1553 | 0 | continue; |
1554 | 0 | } |
1555 | 0 | DataDir* data_dir = tablet->data_dir(); |
1556 | 0 | size_t tablet_footprint = tablet->tablet_footprint(); |
1557 | 0 | tablets_num[data_dir]++; |
1558 | 0 | TabletSize tablet_size(tablet_info_iter->tablet_id, tablet_info_iter->schema_hash, |
1559 | 0 | tablet_footprint); |
1560 | 0 | tablets_info[data_dir].push_back(tablet_size); |
1561 | 0 | } |
1562 | 0 | tablets_num_on_disk[partition_id] = tablets_num; |
1563 | 0 | tablets_info_on_disk[partition_id] = tablets_info; |
1564 | 0 | } |
1565 | 0 | } |
1566 | | |
1567 | | struct SortCtx { |
1568 | | SortCtx(TabletSharedPtr tablet, int64_t cooldown_timestamp, int64_t file_size) |
1569 | 0 | : tablet(tablet), cooldown_timestamp(cooldown_timestamp), file_size(file_size) {} |
1570 | | TabletSharedPtr tablet; |
1571 | | // to ensure the tablet with -1 would always be greater than other |
1572 | | uint64_t cooldown_timestamp; |
1573 | | int64_t file_size; |
1574 | 0 | bool operator<(const SortCtx& other) const { |
1575 | 0 | if (this->cooldown_timestamp == other.cooldown_timestamp) { |
1576 | 0 | return this->file_size > other.file_size; |
1577 | 0 | } |
1578 | 0 | return this->cooldown_timestamp < other.cooldown_timestamp; |
1579 | 0 | } |
1580 | | }; |
1581 | | |
1582 | | void TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets, |
1583 | 14 | std::function<bool(const TabletSharedPtr&)> skip_tablet) { |
1584 | 14 | std::vector<SortCtx> sort_ctx_vec; |
1585 | 14 | std::vector<std::weak_ptr<Tablet>> candidates; |
1586 | 14 | for_each_tablet([&](const TabletSharedPtr& tablet) { candidates.emplace_back(tablet); }, |
1587 | 14 | filter_all_tablets); |
1588 | 14 | auto get_cooldown_tablet = [&sort_ctx_vec, &skip_tablet](std::weak_ptr<Tablet>& t) { |
1589 | 0 | const TabletSharedPtr& tablet = t.lock(); |
1590 | 0 | if (UNLIKELY(nullptr == tablet)) { |
1591 | 0 | return; |
1592 | 0 | } |
1593 | 0 | std::shared_lock rdlock(tablet->get_header_lock()); |
1594 | 0 | int64_t cooldown_timestamp = -1; |
1595 | 0 | size_t file_size = -1; |
1596 | 0 | if (!skip_tablet(tablet) && tablet->need_cooldown(&cooldown_timestamp, &file_size)) { |
1597 | 0 | sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, file_size); |
1598 | 0 | } |
1599 | 0 | }; |
1600 | 14 | std::for_each(candidates.begin(), candidates.end(), get_cooldown_tablet); |
1601 | | |
1602 | 14 | std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end()); |
1603 | | |
1604 | 14 | for (SortCtx& ctx : sort_ctx_vec) { |
1605 | 0 | VLOG_DEBUG << "get cooldown tablet: " << ctx.tablet->tablet_id(); |
1606 | 0 | tablets->push_back(std::move(ctx.tablet)); |
1607 | 0 | } |
1608 | 14 | } |
1609 | | |
1610 | 0 | void TabletManager::get_all_tablets_storage_format(TCheckStorageFormatResult* result) { |
1611 | 0 | DCHECK(result != nullptr); |
1612 | 0 | auto handler = [result](const TabletSharedPtr& tablet) { |
1613 | 0 | if (tablet->all_beta()) { |
1614 | 0 | result->v2_tablets.push_back(tablet->tablet_id()); |
1615 | 0 | } else { |
1616 | 0 | result->v1_tablets.push_back(tablet->tablet_id()); |
1617 | 0 | } |
1618 | 0 | }; |
1619 | |
|
1620 | 0 | for_each_tablet(handler, filter_all_tablets); |
1621 | 0 | result->__isset.v1_tablets = true; |
1622 | 0 | result->__isset.v2_tablets = true; |
1623 | 0 | } |
1624 | | |
1625 | 0 | std::set<int64_t> TabletManager::check_all_tablet_segment(bool repair) { |
1626 | 0 | std::set<int64_t> bad_tablets; |
1627 | 0 | std::map<int64_t, std::vector<int64_t>> repair_shard_bad_tablets; |
1628 | 0 | auto handler = [&](const TabletSharedPtr& tablet) { |
1629 | 0 | if (!tablet->check_all_rowset_segment()) { |
1630 | 0 | int64_t tablet_id = tablet->tablet_id(); |
1631 | 0 | bad_tablets.insert(tablet_id); |
1632 | 0 | if (repair) { |
1633 | 0 | repair_shard_bad_tablets[tablet_id & _tablets_shards_mask].push_back(tablet_id); |
1634 | 0 | } |
1635 | 0 | } |
1636 | 0 | }; |
1637 | 0 | for_each_tablet(handler, filter_all_tablets); |
1638 | |
|
1639 | 0 | for (const auto& [shard_index, shard_tablets] : repair_shard_bad_tablets) { |
1640 | 0 | auto& tablets_shard = _tablets_shards[shard_index]; |
1641 | 0 | auto& tablet_map = tablets_shard.tablet_map; |
1642 | 0 | std::lock_guard<std::shared_mutex> wrlock(tablets_shard.lock); |
1643 | 0 | for (auto tablet_id : shard_tablets) { |
1644 | 0 | auto it = tablet_map.find(tablet_id); |
1645 | 0 | if (it == tablet_map.end()) { |
1646 | 0 | bad_tablets.erase(tablet_id); |
1647 | 0 | LOG(WARNING) << "Bad tablet has be removed. tablet_id=" << tablet_id; |
1648 | 0 | } else { |
1649 | 0 | const auto& tablet = it->second; |
1650 | 0 | tablet->set_tablet_state(TABLET_SHUTDOWN); |
1651 | 0 | tablet->save_meta(); |
1652 | 0 | { |
1653 | 0 | std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock( |
1654 | 0 | _shutdown_tablets_lock); |
1655 | 0 | _shutdown_tablets.push_back(tablet); |
1656 | 0 | } |
1657 | 0 | LOG(WARNING) << "There are some segments lost, set tablet to shutdown state." |
1658 | 0 | << "tablet_id=" << tablet->tablet_id() |
1659 | 0 | << ", tablet_path=" << tablet->tablet_path(); |
1660 | 0 | } |
1661 | 0 | } |
1662 | 0 | } |
1663 | |
|
1664 | 0 | return bad_tablets; |
1665 | 0 | } |
1666 | | |
1667 | | bool TabletManager::update_tablet_partition_id(::doris::TPartitionId partition_id, |
1668 | 0 | ::doris::TTabletId tablet_id) { |
1669 | 0 | std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id)); |
1670 | 0 | TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id); |
1671 | 0 | if (tablet == nullptr) { |
1672 | 0 | LOG(WARNING) << "get tablet err partition_id: " << partition_id |
1673 | 0 | << " tablet_id:" << tablet_id; |
1674 | 0 | return false; |
1675 | 0 | } |
1676 | 0 | _remove_tablet_from_partition(tablet); |
1677 | 0 | auto st = tablet->tablet_meta()->set_partition_id(partition_id); |
1678 | 0 | if (!st.ok()) { |
1679 | 0 | LOG(WARNING) << "set partition id err partition_id: " << partition_id |
1680 | 0 | << " tablet_id:" << tablet_id; |
1681 | 0 | return false; |
1682 | 0 | } |
1683 | 0 | _add_tablet_to_partition(tablet); |
1684 | 0 | return true; |
1685 | 0 | } |
1686 | | |
1687 | | } // end namespace doris |