Coverage Report

Created: 2026-06-05 19:14

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/storage/tablet/tablet_manager.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "storage/tablet/tablet_manager.h"
19
20
#include <fmt/format.h>
21
#include <gen_cpp/AgentService_types.h>
22
#include <gen_cpp/BackendService_types.h>
23
#include <gen_cpp/Descriptors_types.h>
24
#include <gen_cpp/MasterService_types.h>
25
#include <gen_cpp/Types_types.h>
26
#include <gen_cpp/olap_file.pb.h>
27
#include <re2/re2.h>
28
#include <unistd.h>
29
30
#include <algorithm>
31
#include <list>
32
#include <mutex>
33
#include <ostream>
34
#include <string_view>
35
36
#include "absl/strings/substitute.h"
37
#include "bvar/bvar.h"
38
#include "common/compiler_util.h" // IWYU pragma: keep
39
#include "common/config.h"
40
#include "common/logging.h"
41
#include "common/metrics/doris_metrics.h"
42
#include "common/metrics/metrics.h"
43
#include "io/fs/local_file_system.h"
44
#include "runtime/exec_env.h"
45
#include "service/backend_options.h"
46
#include "storage/compaction/cumulative_compaction_time_series_policy.h"
47
#include "storage/data_dir.h"
48
#include "storage/olap_common.h"
49
#include "storage/olap_define.h"
50
#include "storage/olap_meta.h"
51
#include "storage/pb_helper.h"
52
#include "storage/rowset/beta_rowset.h"
53
#include "storage/rowset/rowset.h"
54
#include "storage/rowset/rowset_meta_manager.h"
55
#include "storage/storage_engine.h"
56
#include "storage/tablet/tablet.h"
57
#include "storage/tablet/tablet_meta.h"
58
#include "storage/tablet/tablet_meta_manager.h"
59
#include "storage/tablet/tablet_schema.h"
60
#include "storage/txn/txn_manager.h"
61
#include "util/defer_op.h"
62
#include "util/histogram.h"
63
#include "util/path_util.h"
64
#include "util/stopwatch.hpp"
65
#include "util/time.h"
66
#include "util/trace.h"
67
#include "util/uid_util.h"
68
69
namespace doris {
70
class CumulativeCompactionPolicy;
71
} // namespace doris
72
73
using std::map;
74
using std::set;
75
using std::string;
76
using std::vector;
77
78
namespace doris {
79
using namespace ErrorCode;
80
81
bvar::Adder<int64_t> g_tablet_meta_schema_columns_count("tablet_meta_schema_columns_count");
82
83
TabletManager::TabletManager(StorageEngine& engine, int32_t tablet_map_lock_shard_size)
84
376
        : _engine(engine),
85
376
          _tablets_shards_size(tablet_map_lock_shard_size),
86
376
          _tablets_shards_mask(tablet_map_lock_shard_size - 1) {
87
376
    CHECK_GT(_tablets_shards_size, 0);
88
376
    CHECK_EQ(_tablets_shards_size & _tablets_shards_mask, 0);
89
376
    _tablets_shards.resize(_tablets_shards_size);
90
376
}
91
92
373
TabletManager::~TabletManager() = default;
93
94
Status TabletManager::_add_tablet_unlocked(TTabletId tablet_id, const TabletSharedPtr& tablet,
95
286k
                                           bool update_meta, bool force, RuntimeProfile* profile) {
96
286k
    if (profile->get_counter("AddTablet") == nullptr) {
97
282k
        ADD_TIMER(profile, "AddTablet");
98
282k
    }
99
286k
    Status res = Status::OK();
100
286k
    VLOG_NOTICE << "begin to add tablet to TabletManager. "
101
99
                << "tablet_id=" << tablet_id << ", force=" << force;
102
103
286k
    TabletSharedPtr existed_tablet = nullptr;
104
286k
    tablet_map_t& tablet_map = _get_tablet_map(tablet_id);
105
286k
    const auto& iter = tablet_map.find(tablet_id);
106
286k
    if (iter != tablet_map.end()) {
107
90
        existed_tablet = iter->second;
108
90
    }
109
110
286k
    if (existed_tablet == nullptr) {
111
286k
        return _add_tablet_to_map_unlocked(tablet_id, tablet, update_meta, false /*keep_files*/,
112
286k
                                           false /*drop_old*/, profile);
113
286k
    }
114
    // During restore process, the tablet is exist and snapshot loader will replace the tablet's rowsets
115
    // and then reload the tablet, the tablet's path will the same
116
48
    if (!force) {
117
2
        if (existed_tablet->tablet_path() == tablet->tablet_path()) {
118
0
            return Status::Error<ENGINE_INSERT_EXISTS_TABLE>(
119
0
                    "add the same tablet twice! tablet_id={}, tablet_path={}", tablet_id,
120
0
                    tablet->tablet_path());
121
0
        }
122
2
        if (existed_tablet->data_dir() == tablet->data_dir()) {
123
0
            return Status::Error<ENGINE_INSERT_EXISTS_TABLE>(
124
0
                    "add tablet with same data dir twice! tablet_id={}", tablet_id);
125
0
        }
126
2
    }
127
128
48
    MonotonicStopWatch watch;
129
48
    watch.start();
130
131
    // During storage migration, the tablet is moved to another disk, have to check
132
    // if the new tablet's rowset version is larger than the old one to prevent losting data during
133
    // migration
134
48
    int64_t old_time, new_time;
135
48
    int64_t old_version, new_version;
136
48
    {
137
48
        std::shared_lock rdlock(existed_tablet->get_header_lock());
138
48
        const RowsetSharedPtr old_rowset = existed_tablet->get_rowset_with_max_version();
139
48
        const RowsetSharedPtr new_rowset = tablet->get_rowset_with_max_version();
140
        // If new tablet is empty, it is a newly created schema change tablet.
141
        // the old tablet is dropped before add tablet. it should not exist old tablet
142
48
        if (new_rowset == nullptr) {
143
            // it seems useless to call unlock and return here.
144
            // it could prevent error when log level is changed in the future.
145
0
            return Status::Error<ENGINE_INSERT_EXISTS_TABLE>(
146
0
                    "new tablet is empty and old tablet exists. it should not happen. tablet_id={}",
147
0
                    tablet_id);
148
0
        }
149
48
        old_time = old_rowset == nullptr ? -1 : old_rowset->creation_time();
150
48
        new_time = new_rowset->creation_time();
151
48
        old_version = old_rowset == nullptr ? -1 : old_rowset->end_version();
152
48
        new_version = new_rowset->end_version();
153
48
    }
154
48
    COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "GetExistTabletVersion", "AddTablet"),
155
48
                   static_cast<int64_t>(watch.reset()));
156
157
    // In restore process, we replace all origin files in tablet dir with
158
    // the downloaded snapshot files. Then we try to reload tablet header.
159
    // force == true means we forcibly replace the Tablet in tablet_map
160
    // with the new one. But if we do so, the files in the tablet dir will be
161
    // dropped when the origin Tablet deconstruct.
162
    // So we set keep_files == true to not delete files when the
163
    // origin Tablet deconstruct.
164
    // During restore process, snapshot loader
165
    // replaced the old tablet's rowset with new rowsets, but the tablet path is reused, if drop files
166
    // here, the new rowset's file will also be dropped, so use keep files here
167
48
    bool keep_files = force;
168
48
    if (force ||
169
90
        (new_version > old_version || (new_version == old_version && new_time >= old_time))) {
170
        // check if new tablet's meta is in store and add new tablet's meta to meta store
171
90
        res = _add_tablet_to_map_unlocked(tablet_id, tablet, update_meta, keep_files,
172
90
                                          true /*drop_old*/, profile);
173
18.4E
    } else {
174
18.4E
        RETURN_IF_ERROR(tablet->set_tablet_state(TABLET_SHUTDOWN));
175
18.4E
        tablet->save_meta();
176
18.4E
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "SaveMeta", "AddTablet"),
177
18.4E
                       static_cast<int64_t>(watch.reset()));
178
18.4E
        {
179
18.4E
            std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(_shutdown_tablets_lock);
180
18.4E
            _shutdown_tablets.push_back(tablet);
181
18.4E
        }
182
183
18.4E
        res = Status::Error<ENGINE_INSERT_OLD_TABLET>(
184
18.4E
                "set tablet to shutdown state. tablet_id={}, tablet_path={}", tablet->tablet_id(),
185
18.4E
                tablet->tablet_path());
186
18.4E
    }
187
48
    LOG(WARNING) << "add duplicated tablet. force=" << force << ", res=" << res
188
48
                 << ", tablet_id=" << tablet_id << ", old_version=" << old_version
189
48
                 << ", new_version=" << new_version << ", old_time=" << old_time
190
48
                 << ", new_time=" << new_time
191
48
                 << ", old_tablet_path=" << existed_tablet->tablet_path()
192
48
                 << ", new_tablet_path=" << tablet->tablet_path();
193
194
48
    return res;
195
48
}
196
197
Status TabletManager::_add_tablet_to_map_unlocked(TTabletId tablet_id,
198
                                                  const TabletSharedPtr& tablet, bool update_meta,
199
                                                  bool keep_files, bool drop_old,
200
286k
                                                  RuntimeProfile* profile) {
201
    // check if new tablet's meta is in store and add new tablet's meta to meta store
202
286k
    Status res = Status::OK();
203
286k
    MonotonicStopWatch watch;
204
286k
    watch.start();
205
286k
    if (update_meta) {
206
        // call tablet save meta in order to valid the meta
207
4.09k
        tablet->save_meta();
208
4.09k
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "SaveMeta", "AddTablet"),
209
4.09k
                       static_cast<int64_t>(watch.reset()));
210
4.09k
    }
211
286k
    if (drop_old) {
212
        // If the new tablet is fresher than the existing one, then replace
213
        // the existing tablet with the new one.
214
        // Use default replica_id to ignore whether replica_id is match when drop tablet.
215
90
        Status status = _drop_tablet(tablet_id, /* replica_id */ 0, keep_files, false, true);
216
90
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "DropOldTablet", "AddTablet"),
217
90
                       static_cast<int64_t>(watch.reset()));
218
90
        RETURN_NOT_OK_STATUS_WITH_WARN(
219
90
                status, absl::Substitute("failed to drop old tablet when add new tablet. "
220
90
                                         "tablet_id=$0",
221
90
                                         tablet_id));
222
90
    }
223
    // Register tablet into DataDir, so that we can manage tablet from
224
    // the perspective of root path.
225
    // Example: unregister all tables when a bad disk found.
226
286k
    tablet->register_tablet_into_dir();
227
286k
    tablet_map_t& tablet_map = _get_tablet_map(tablet_id);
228
286k
    tablet_map[tablet_id] = tablet;
229
286k
    _add_tablet_to_partition(tablet);
230
286k
    g_tablet_meta_schema_columns_count << tablet->tablet_meta()->tablet_columns_num();
231
286k
    COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RegisterTabletInfo", "AddTablet"),
232
286k
                   static_cast<int64_t>(watch.reset()));
233
234
286k
    VLOG_NOTICE << "add tablet to map successfully."
235
43
                << " tablet_id=" << tablet_id;
236
237
286k
    return res;
238
286k
}
239
240
0
bool TabletManager::check_tablet_id_exist(TTabletId tablet_id) {
241
0
    std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id));
242
0
    return _check_tablet_id_exist_unlocked(tablet_id);
243
0
}
244
245
0
bool TabletManager::_check_tablet_id_exist_unlocked(TTabletId tablet_id) {
246
0
    tablet_map_t& tablet_map = _get_tablet_map(tablet_id);
247
0
    return tablet_map.find(tablet_id) != tablet_map.end();
248
0
}
249
250
Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector<DataDir*> stores,
251
4.03k
                                    RuntimeProfile* profile) {
252
4.03k
    DorisMetrics::instance()->create_tablet_requests_total->increment(1);
253
254
4.03k
    int64_t tablet_id = request.tablet_id;
255
4.03k
    LOG(INFO) << "begin to create tablet. tablet_id=" << tablet_id
256
4.03k
              << ", table_id=" << request.table_id << ", partition_id=" << request.partition_id
257
4.03k
              << ", replica_id=" << request.replica_id << ", stores.size=" << stores.size()
258
4.03k
              << ", first store=" << stores[0]->path();
259
260
    // when we create rollup tablet A(assume on shard-1) from tablet B(assume on shard-2)
261
    // we need use write lock on shard-1 and then use read lock on shard-2
262
    // if there have create rollup tablet C(assume on shard-2) from tablet D(assume on shard-1) at the same time, we will meet deadlock
263
4.03k
    std::unique_lock two_tablet_lock(_two_tablet_mtx, std::defer_lock);
264
4.03k
    bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode;
265
4.03k
    bool is_schema_change_or_atomic_restore =
266
4.03k
            request.__isset.base_tablet_id && request.base_tablet_id > 0;
267
4.03k
    bool need_two_lock =
268
4.03k
            is_schema_change_or_atomic_restore &&
269
4.03k
            ((_tablets_shards_mask & request.base_tablet_id) != (_tablets_shards_mask & tablet_id));
270
4.03k
    if (need_two_lock) {
271
0
        SCOPED_TIMER(ADD_TIMER(profile, "GetTwoTableLock"));
272
0
        two_tablet_lock.lock();
273
0
    }
274
275
4.03k
    MonotonicStopWatch shard_lock_watch;
276
4.03k
    shard_lock_watch.start();
277
4.03k
    std::lock_guard wrlock(_get_tablets_shard_lock(tablet_id));
278
4.03k
    shard_lock_watch.stop();
279
4.03k
    COUNTER_UPDATE(ADD_TIMER(profile, "GetShardLock"),
280
4.03k
                   static_cast<int64_t>(shard_lock_watch.elapsed_time()));
281
    // Make create_tablet operation to be idempotent:
282
    // 1. Return true if tablet with same tablet_id and schema_hash exist;
283
    //           false if tablet with same tablet_id but different schema_hash exist.
284
    // 2. When this is an alter task, if the tablet(both tablet_id and schema_hash are
285
    // same) already exist, then just return true(an duplicate request). But if
286
    // tablet_id exist but with different schema_hash, return an error(report task will
287
    // eventually trigger its deletion).
288
4.03k
    {
289
4.03k
        SCOPED_TIMER(ADD_TIMER(profile, "GetTabletUnlocked"));
290
4.03k
        if (_get_tablet_unlocked(tablet_id) != nullptr) {
291
3
            LOG(INFO) << "success to create tablet. tablet already exist. tablet_id=" << tablet_id;
292
3
            return Status::OK();
293
3
        }
294
4.03k
    }
295
296
4.02k
    TabletSharedPtr base_tablet = nullptr;
297
    // If the CreateTabletReq has base_tablet_id then it is a alter-tablet request
298
4.02k
    if (is_schema_change_or_atomic_restore) {
299
        // if base_tablet_id's lock diffrent with new_tablet_id, we need lock it.
300
0
        if (need_two_lock) {
301
0
            SCOPED_TIMER(ADD_TIMER(profile, "GetBaseTablet"));
302
0
            base_tablet = get_tablet(request.base_tablet_id);
303
0
            two_tablet_lock.unlock();
304
0
        } else {
305
0
            SCOPED_TIMER(ADD_TIMER(profile, "GetBaseTabletUnlocked"));
306
0
            base_tablet = _get_tablet_unlocked(request.base_tablet_id);
307
0
        }
308
0
        if (base_tablet == nullptr) {
309
0
            DorisMetrics::instance()->create_tablet_requests_failed->increment(1);
310
0
            return Status::Error<TABLE_CREATE_META_ERROR>(
311
0
                    "fail to create tablet(change schema/atomic restore), base tablet does not "
312
0
                    "exist. new_tablet_id={}, base_tablet_id={}",
313
0
                    tablet_id, request.base_tablet_id);
314
0
        }
315
        // If we are doing schema-change or atomic-restore, we should use the same data dir
316
        // TODO(lingbin): A litter trick here, the directory should be determined before
317
        // entering this method
318
        //
319
        // ATTN: Since all restored replicas will be saved to HDD, so no storage_medium check here.
320
0
        if (in_restore_mode ||
321
0
            request.storage_medium == base_tablet->data_dir()->storage_medium()) {
322
0
            LOG(INFO) << "create tablet use the base tablet data dir. tablet_id=" << tablet_id
323
0
                      << ", base tablet_id=" << request.base_tablet_id
324
0
                      << ", data dir=" << base_tablet->data_dir()->path();
325
0
            stores.clear();
326
0
            stores.push_back(base_tablet->data_dir());
327
0
        }
328
0
    }
329
330
    // set alter type to schema-change. it is useless
331
4.02k
    TabletSharedPtr tablet = _internal_create_tablet_unlocked(
332
4.02k
            request, is_schema_change_or_atomic_restore, base_tablet.get(), stores, profile);
333
4.02k
    if (tablet == nullptr) {
334
0
        DorisMetrics::instance()->create_tablet_requests_failed->increment(1);
335
0
        return Status::Error<CE_CMD_PARAMS_ERROR>("fail to create tablet. tablet_id={}",
336
0
                                                  request.tablet_id);
337
0
    }
338
339
4.02k
    LOG(INFO) << "success to create tablet. tablet_id=" << tablet_id
340
4.02k
              << ", tablet_path=" << tablet->tablet_path();
341
4.02k
    return Status::OK();
342
4.02k
}
343
344
TabletSharedPtr TabletManager::_internal_create_tablet_unlocked(
345
        const TCreateTabletReq& request, const bool is_schema_change, const Tablet* base_tablet,
346
4.02k
        const std::vector<DataDir*>& data_dirs, RuntimeProfile* profile) {
347
    // If in schema-change state, base_tablet must also be provided.
348
    // i.e., is_schema_change and base_tablet are either assigned or not assigned
349
4.02k
    DCHECK((is_schema_change && base_tablet) || (!is_schema_change && !base_tablet));
350
351
    // NOTE: The existence of tablet_id and schema_hash has already been checked,
352
    // no need check again here.
353
354
4.02k
    const std::string parent_timer_name = "InternalCreateTablet";
355
4.02k
    SCOPED_TIMER(ADD_TIMER(profile, parent_timer_name));
356
357
4.02k
    MonotonicStopWatch watch;
358
4.02k
    watch.start();
359
4.02k
    auto create_meta_timer = ADD_CHILD_TIMER(profile, "CreateMeta", parent_timer_name);
360
4.02k
    auto tablet = _create_tablet_meta_and_dir_unlocked(request, is_schema_change, base_tablet,
361
4.02k
                                                       data_dirs, profile);
362
4.02k
    COUNTER_UPDATE(create_meta_timer, static_cast<int64_t>(watch.reset()));
363
4.02k
    if (tablet == nullptr) {
364
0
        return nullptr;
365
0
    }
366
367
4.02k
    int64_t new_tablet_id = request.tablet_id;
368
4.02k
    int32_t new_schema_hash = request.tablet_schema.schema_hash;
369
370
    // should remove the tablet's pending_id no matter create-tablet success or not
371
4.02k
    DataDir* data_dir = tablet->data_dir();
372
373
    // TODO(yiguolei)
374
    // the following code is very difficult to understand because it mixed alter tablet v2
375
    // and alter tablet v1 should remove alter tablet v1 code after v0.12
376
4.02k
    Status res = Status::OK();
377
4.02k
    bool is_tablet_added = false;
378
4.02k
    do {
379
4.02k
        res = tablet->init();
380
4.02k
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "TabletInit", parent_timer_name),
381
4.02k
                       static_cast<int64_t>(watch.reset()));
382
4.02k
        if (!res.ok()) {
383
0
            LOG(WARNING) << "tablet init failed. tablet:" << tablet->tablet_id();
384
0
            break;
385
0
        }
386
387
        // Create init version if this is not a restore mode replica and request.version is set
388
        // bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode;
389
        // if (!in_restore_mode && request.__isset.version) {
390
        // create initial rowset before add it to storage engine could omit many locks
391
4.02k
        res = tablet->create_initial_rowset(request.version);
392
4.02k
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "InitRowset", parent_timer_name),
393
4.02k
                       static_cast<int64_t>(watch.reset()));
394
4.02k
        if (!res.ok()) {
395
0
            LOG(WARNING) << "fail to create initial version for tablet. res=" << res;
396
0
            break;
397
0
        }
398
399
4.02k
        if (is_schema_change) {
400
            // if this is a new alter tablet, has to set its state to not ready
401
            // because schema change handler depends on it to check whether history data
402
            // convert finished
403
0
            static_cast<void>(tablet->set_tablet_state(TabletState::TABLET_NOTREADY));
404
0
        }
405
        // Add tablet to StorageEngine will make it visible to user
406
        // Will persist tablet meta
407
4.02k
        auto add_tablet_timer = ADD_CHILD_TIMER(profile, "AddTablet", parent_timer_name);
408
4.02k
        res = _add_tablet_unlocked(new_tablet_id, tablet, /*update_meta*/ true, false, profile);
409
4.02k
        COUNTER_UPDATE(add_tablet_timer, static_cast<int64_t>(watch.reset()));
410
4.02k
        if (!res.ok()) {
411
0
            LOG(WARNING) << "fail to add tablet to StorageEngine. res=" << res;
412
0
            break;
413
0
        }
414
4.02k
        is_tablet_added = true;
415
416
        // TODO(lingbin): The following logic seems useless, can be removed?
417
        // Because if _add_tablet_unlocked() return OK, we must can get it from map.
418
4.02k
        TabletSharedPtr tablet_ptr = _get_tablet_unlocked(new_tablet_id);
419
4.02k
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "GetTablet", parent_timer_name),
420
4.02k
                       static_cast<int64_t>(watch.reset()));
421
4.02k
        if (tablet_ptr == nullptr) {
422
0
            res = Status::Error<TABLE_NOT_FOUND>("fail to get tablet. res={}", res);
423
0
            break;
424
0
        }
425
4.02k
    } while (false);
426
427
4.02k
    if (res.ok()) {
428
4.02k
        return tablet;
429
4.02k
    }
430
    // something is wrong, we need clear environment
431
1
    if (is_tablet_added) {
432
0
        Status status = _drop_tablet(new_tablet_id, request.replica_id, false, false, true);
433
0
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "DropTablet", parent_timer_name),
434
0
                       static_cast<int64_t>(watch.reset()));
435
0
        if (!status.ok()) {
436
0
            LOG(WARNING) << "fail to drop tablet when create tablet failed. res=" << res;
437
0
        }
438
1
    } else {
439
1
        tablet->delete_all_files();
440
1
        static_cast<void>(TabletMetaManager::remove(data_dir, new_tablet_id, new_schema_hash));
441
1
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RemoveTabletFiles", parent_timer_name),
442
1
                       static_cast<int64_t>(watch.reset()));
443
1
    }
444
1
    return nullptr;
445
4.02k
}
446
447
4.01k
static string _gen_tablet_dir(const string& dir, int32_t shard_id, int64_t tablet_id) {
448
4.01k
    string path = dir;
449
4.01k
    path = path_util::join_path_segments(path, DATA_PREFIX);
450
4.01k
    path = path_util::join_path_segments(path, std::to_string(shard_id));
451
4.01k
    path = path_util::join_path_segments(path, std::to_string(tablet_id));
452
4.01k
    return path;
453
4.01k
}
454
455
TabletSharedPtr TabletManager::_create_tablet_meta_and_dir_unlocked(
456
        const TCreateTabletReq& request, const bool is_schema_change, const Tablet* base_tablet,
457
4.02k
        const std::vector<DataDir*>& data_dirs, RuntimeProfile* profile) {
458
4.02k
    string pending_id = TABLET_ID_PREFIX + std::to_string(request.tablet_id);
459
    // Many attempts are made here in the hope that even if a disk fails, it can still continue.
460
4.02k
    std::string parent_timer_name = "CreateMeta";
461
4.02k
    MonotonicStopWatch watch;
462
4.02k
    watch.start();
463
4.02k
    for (auto& data_dir : data_dirs) {
464
4.02k
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RemovePendingIds", parent_timer_name),
465
4.02k
                       static_cast<int64_t>(watch.reset()));
466
467
4.02k
        TabletMetaSharedPtr tablet_meta;
468
        // if create meta failed, do not need to clean dir, because it is only in memory
469
4.02k
        Status res = _create_tablet_meta_unlocked(request, data_dir, is_schema_change, base_tablet,
470
4.02k
                                                  &tablet_meta);
471
4.02k
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "CreateMetaUnlock", parent_timer_name),
472
4.02k
                       static_cast<int64_t>(watch.reset()));
473
4.02k
        if (!res.ok()) {
474
0
            LOG(WARNING) << "fail to create tablet meta. res=" << res
475
0
                         << ", root=" << data_dir->path();
476
0
            continue;
477
0
        }
478
479
4.02k
        string tablet_dir =
480
4.02k
                _gen_tablet_dir(data_dir->path(), tablet_meta->shard_id(), request.tablet_id);
481
4.02k
        string schema_hash_dir = path_util::join_path_segments(
482
4.02k
                tablet_dir, std::to_string(request.tablet_schema.schema_hash));
483
4.02k
        bool has_row_binlog = tablet_meta->binlog_config().is_enable() &&
484
4.02k
                              tablet_meta->binlog_config().is_row_binlog_format();
485
4.02k
        string row_binlog_dir = path_util::join_path_segments(schema_hash_dir, "_row_binlog");
486
487
        // Because the tablet is removed asynchronously, so that the dir may still exist when BE
488
        // receive create-tablet request again, For example retried schema-change request
489
4.02k
        bool exists = true;
490
4.02k
        res = io::global_local_filesystem()->exists(schema_hash_dir, &exists);
491
4.02k
        if (!res.ok()) {
492
0
            continue;
493
0
        }
494
4.02k
        if (exists) {
495
0
            LOG(WARNING) << "skip this dir because tablet path exist, path=" << schema_hash_dir;
496
0
            continue;
497
4.02k
        } else {
498
4.02k
            Status st = io::global_local_filesystem()->create_directory(schema_hash_dir);
499
4.02k
            if (!st.ok()) {
500
0
                continue;
501
0
            }
502
4.02k
        }
503
504
4.02k
        if (has_row_binlog) {
505
3
            Status st = io::global_local_filesystem()->create_directory(row_binlog_dir);
506
3
            if (!st.ok()) {
507
0
                WARN_IF_ERROR(io::global_local_filesystem()->delete_directory(schema_hash_dir),
508
0
                              "failed to cleanup tablet dir after create sub directory failed");
509
0
                continue;
510
0
            }
511
3
        }
512
513
4.02k
        if (tablet_meta->partition_id() <= 0) {
514
121
            LOG(WARNING) << "invalid partition id " << tablet_meta->partition_id() << ", tablet "
515
121
                         << tablet_meta->tablet_id();
516
121
        }
517
4.02k
        TabletSharedPtr new_tablet =
518
4.02k
                std::make_shared<Tablet>(_engine, std::move(tablet_meta), data_dir);
519
4.02k
        COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "CreateTabletFromMeta", parent_timer_name),
520
4.02k
                       static_cast<int64_t>(watch.reset()));
521
4.02k
        return new_tablet;
522
4.02k
    }
523
0
    return nullptr;
524
4.02k
}
525
526
Status TabletManager::drop_tablet(TTabletId tablet_id, TReplicaId replica_id,
527
3.12k
                                  bool is_drop_table_or_partition) {
528
3.12k
    return _drop_tablet(tablet_id, replica_id, false, is_drop_table_or_partition, false);
529
3.12k
}
530
531
// Drop specified tablet.
532
Status TabletManager::_drop_tablet(TTabletId tablet_id, TReplicaId replica_id, bool keep_files,
533
3.21k
                                   bool is_drop_table_or_partition, bool had_held_shard_lock) {
534
3.21k
    LOG(INFO) << "begin drop tablet. tablet_id=" << tablet_id << ", replica_id=" << replica_id
535
3.21k
              << ", is_drop_table_or_partition=" << is_drop_table_or_partition
536
3.21k
              << ", keep_files=" << keep_files;
537
3.21k
    DorisMetrics::instance()->drop_tablet_requests_total->increment(1);
538
539
3.21k
    RETURN_IF_ERROR(register_transition_tablet(tablet_id, "drop tablet"));
540
3.21k
    Defer defer {[&]() { unregister_transition_tablet(tablet_id, "drop tablet"); }};
541
542
    // Fetch tablet which need to be dropped
543
3.21k
    TabletSharedPtr to_drop_tablet;
544
3.21k
    {
545
3.21k
        std::unique_lock<std::shared_mutex> wlock(_get_tablets_shard_lock(tablet_id),
546
3.21k
                                                  std::defer_lock);
547
3.21k
        if (!had_held_shard_lock) {
548
3.12k
            wlock.lock();
549
3.12k
        }
550
3.21k
        to_drop_tablet = _get_tablet_unlocked(tablet_id);
551
3.21k
        if (to_drop_tablet == nullptr) {
552
1
            LOG(WARNING) << "fail to drop tablet because it does not exist. "
553
1
                         << "tablet_id=" << tablet_id;
554
1
            return Status::OK();
555
1
        }
556
557
        // We should compare replica id to avoid dropping new cloned tablet.
558
        // Iff request replica id is 0, FE may be an older release, then we drop this tablet as before.
559
3.21k
        if (to_drop_tablet->replica_id() != replica_id && replica_id != 0) {
560
0
            return Status::Aborted("replica_id not match({} vs {})", to_drop_tablet->replica_id(),
561
0
                                   replica_id);
562
0
        }
563
564
3.21k
        _remove_tablet_from_partition(to_drop_tablet);
565
3.21k
        tablet_map_t& tablet_map = _get_tablet_map(tablet_id);
566
3.21k
        tablet_map.erase(tablet_id);
567
3.21k
    }
568
569
0
    to_drop_tablet->clear_cache();
570
571
3.21k
    {
572
        // drop tablet will update tablet meta, should lock
573
3.21k
        std::lock_guard<std::shared_mutex> wrlock(to_drop_tablet->get_header_lock());
574
3.21k
        SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
575
        // NOTE: has to update tablet here, but must not update tablet meta directly.
576
        // because other thread may hold the tablet object, they may save meta too.
577
        // If update meta directly here, other thread may override the meta
578
        // and the tablet will be loaded at restart time.
579
        // To avoid this exception, we first set the state of the tablet to `SHUTDOWN`.
580
        //
581
        // Until now, only the restore task uses keep files.
582
3.21k
        RETURN_IF_ERROR(to_drop_tablet->set_tablet_state(TABLET_SHUTDOWN));
583
3.21k
        if (!keep_files) {
584
3.12k
            LOG(INFO) << "set tablet to shutdown state and remove it from memory. "
585
3.12k
                      << "tablet_id=" << tablet_id
586
3.12k
                      << ", tablet_path=" << to_drop_tablet->tablet_path();
587
            // We must record unused remote rowsets path info to OlapMeta before tablet state is marked as TABLET_SHUTDOWN in OlapMeta,
588
            // otherwise if BE shutdown after saving tablet state, these remote rowsets path info will lost.
589
3.12k
            if (is_drop_table_or_partition) {
590
2.97k
                RETURN_IF_ERROR(to_drop_tablet->remove_all_remote_rowsets());
591
2.97k
            }
592
3.12k
            to_drop_tablet->save_meta();
593
3.12k
            {
594
3.12k
                std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock);
595
3.12k
                _shutdown_tablets.push_back(to_drop_tablet);
596
3.12k
            }
597
3.12k
        }
598
3.21k
    }
599
600
3.21k
    to_drop_tablet->deregister_tablet_from_dir();
601
3.21k
    g_tablet_meta_schema_columns_count << -to_drop_tablet->tablet_meta()->tablet_columns_num();
602
3.21k
    return Status::OK();
603
3.21k
}
604
605
1.25M
TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, bool include_deleted, string* err) {
606
1.25M
    std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id));
607
1.25M
    return _get_tablet_unlocked(tablet_id, include_deleted, err);
608
1.25M
}
609
610
52
std::vector<TabletSharedPtr> TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) {
611
52
    std::vector<TabletSharedPtr> res;
612
291k
    for_each_tablet([&](const TabletSharedPtr& tablet) { res.emplace_back(tablet); },
613
52
                    std::move(filter));
614
52
    return res;
615
52
}
616
617
void TabletManager::for_each_tablet(std::function<void(const TabletSharedPtr&)>&& handler,
618
59.4k
                                    std::function<bool(Tablet*)>&& filter) {
619
59.4k
    std::vector<TabletSharedPtr> tablets;
620
15.1M
    for (const auto& tablets_shard : _tablets_shards) {
621
15.1M
        tablets.clear();
622
15.1M
        {
623
15.1M
            std::shared_lock rdlock(tablets_shard.lock);
624
680M
            for (const auto& [id, tablet] : tablets_shard.tablet_map) {
625
680M
                if (filter(tablet.get())) {
626
666M
                    tablets.emplace_back(tablet);
627
666M
                }
628
680M
            }
629
15.1M
        }
630
693M
        for (const auto& tablet : tablets) {
631
693M
            handler(tablet);
632
693M
        }
633
15.1M
    }
634
59.4k
}
635
636
TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id, bool include_deleted,
637
1.27M
                                                    string* err) {
638
1.27M
    TabletSharedPtr tablet;
639
1.27M
    tablet = _get_tablet_unlocked(tablet_id);
640
1.27M
    if (tablet == nullptr && include_deleted) {
641
22
        std::shared_lock rdlock(_shutdown_tablets_lock);
642
2.32k
        for (auto& deleted_tablet : _shutdown_tablets) {
643
2.32k
            CHECK(deleted_tablet != nullptr) << "deleted tablet is nullptr";
644
2.32k
            if (deleted_tablet->tablet_id() == tablet_id) {
645
0
                tablet = deleted_tablet;
646
0
                break;
647
0
            }
648
2.32k
        }
649
22
    }
650
651
1.27M
    if (tablet == nullptr) {
652
5.19k
        if (err != nullptr) {
653
22
            *err = "tablet does not exist. " + BackendOptions::get_localhost();
654
22
        }
655
5.19k
        return nullptr;
656
5.19k
    }
657
1.27M
#ifndef BE_TEST
658
1.27M
    if (!tablet->is_used()) {
659
0
        LOG(WARNING) << "tablet cannot be used. tablet=" << tablet_id;
660
0
        if (err != nullptr) {
661
0
            *err = "tablet cannot be used. " + BackendOptions::get_localhost();
662
0
        }
663
0
        return nullptr;
664
0
    }
665
1.27M
#endif
666
667
1.27M
    return tablet;
668
1.27M
}
669
670
TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TabletUid tablet_uid,
671
21.7k
                                          bool include_deleted, string* err) {
672
21.7k
    std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id));
673
21.7k
    TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id, include_deleted, err);
674
21.7k
    if (tablet != nullptr && tablet->tablet_uid() == tablet_uid) {
675
21.7k
        return tablet;
676
21.7k
    }
677
2
    return nullptr;
678
21.7k
}
679
680
322
uint64_t TabletManager::get_rowset_nums() {
681
322
    uint64_t rowset_nums = 0;
682
4.54M
    for_each_tablet([&](const TabletSharedPtr& tablet) { rowset_nums += tablet->version_count(); },
683
322
                    filter_all_tablets);
684
322
    return rowset_nums;
685
322
}
686
687
322
uint64_t TabletManager::get_segment_nums() {
688
322
    uint64_t segment_nums = 0;
689
4.54M
    for_each_tablet([&](const TabletSharedPtr& tablet) { segment_nums += tablet->segment_count(); },
690
322
                    filter_all_tablets);
691
322
    return segment_nums;
692
322
}
693
694
bool TabletManager::get_tablet_id_and_schema_hash_from_path(const string& path,
695
                                                            TTabletId* tablet_id,
696
571k
                                                            TSchemaHash* schema_hash) {
697
    // the path like: /data/14/10080/964828783/
698
571k
    static re2::RE2 normal_re("/data/\\d+/(\\d+)/(\\d+)($|/)");
699
    // match tablet schema hash data path, for example, the path is /data/1/16791/29998
700
    // 1 is shard id , 16791 is tablet id, 29998 is schema hash
701
571k
    if (RE2::PartialMatch(path, normal_re, tablet_id, schema_hash)) {
702
571k
        return true;
703
571k
    }
704
705
    // If we can't match normal path pattern, this may be a path which is a empty tablet
706
    // directory. Use this pattern to match empty tablet directory. In this case schema_hash
707
    // will be set to zero.
708
4
    static re2::RE2 empty_tablet_re("/data/\\d+/(\\d+)($|/$)");
709
4
    if (!RE2::PartialMatch(path, empty_tablet_re, tablet_id)) {
710
2
        return false;
711
2
    }
712
2
    *schema_hash = 0;
713
2
    return true;
714
4
}
715
716
3
bool TabletManager::get_rowset_id_from_path(const string& path, RowsetId* rowset_id) {
717
    // the path like: /data/14/10080/964828783/02000000000000969144d8725cb62765f9af6cd3125d5a91_0.dat
718
3
    static re2::RE2 re("/data/\\d+/\\d+/\\d+/([A-Fa-f0-9]+)_.*");
719
3
    string id_str;
720
3
    bool ret = RE2::PartialMatch(path, re, &id_str);
721
3
    if (ret) {
722
1
        rowset_id->init(id_str);
723
1
        return true;
724
1
    }
725
2
    return false;
726
3
}
727
728
80
void TabletManager::get_tablet_stat(TTabletStatResult* result) {
729
80
    std::shared_ptr<std::vector<TTabletStat>> local_cache;
730
80
    {
731
80
        std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex);
732
80
        local_cache = _tablet_stat_list_cache;
733
80
    }
734
80
    result->__set_tablet_stat_list(*local_cache);
735
80
}
736
737
struct TabletScore {
738
    TabletSharedPtr tablet_ptr;
739
    uint32_t score = 0;
740
    int8_t prefer_compaction_level = -1;
741
};
742
743
std::vector<TabletCompactionContext> TabletManager::find_best_tablets_to_compaction(
744
        CompactionType compaction_type, DataDir* data_dir,
745
        const std::unordered_set<TabletSharedPtr>& tablet_submitted_compaction, uint32_t* score,
746
        const std::unordered_map<std::string_view, std::shared_ptr<CumulativeCompactionPolicy>>&
747
58.1k
                all_cumulative_compaction_policies) {
748
58.1k
    int64_t now_ms = UnixMillis();
749
58.1k
    const string& compaction_type_str = compaction_type == CompactionType::BASE_COMPACTION ? "base"
750
58.1k
                                        : compaction_type == CompactionType::BINLOG_COMPACTION
751
57.4k
                                                ? "binlog"
752
57.4k
                                                : "cumulative";
753
58.1k
    uint32_t highest_score = 0;
754
58.1k
    TabletScore best_tablet_context;
755
58.1k
    int64_t compaction_num_per_round =
756
58.1k
            ExecEnv::GetInstance()->storage_engine().to_local().get_compaction_num_per_round();
757
1.36M
    auto cmp = [](TabletScore left, TabletScore right) { return left.score > right.score; };
758
58.1k
    std::priority_queue<TabletScore, std::vector<TabletScore>, decltype(cmp)> top_tablets(cmp);
759
760
681M
    auto handler = [&](const TabletSharedPtr& tablet_ptr) {
761
681M
        if (tablet_ptr->tablet_meta()->tablet_schema()->disable_auto_compaction()) {
762
7.12M
            LOG_EVERY_N(INFO, 500) << "Tablet " << tablet_ptr->tablet_id()
763
14.0k
                                   << " will be ignored by automatic compaction tasks since it's "
764
14.0k
                                   << "set to disabled automatic compaction.";
765
7.12M
            return;
766
7.12M
        }
767
768
674M
        if (config::enable_skip_tablet_compaction &&
769
674M
            tablet_ptr->should_skip_compaction(compaction_type, UnixSeconds())) {
770
283M
            return;
771
283M
        }
772
404M
        if (!tablet_ptr->can_do_compaction(data_dir->path_hash(), compaction_type)) {
773
404M
            return;
774
404M
        }
775
776
18.4E
        auto search = tablet_submitted_compaction.find(tablet_ptr);
777
18.4E
        if (search != tablet_submitted_compaction.end()) {
778
1.56k
            return;
779
1.56k
        }
780
781
18.4E
        int64_t last_failure_ms = tablet_ptr->last_cumu_compaction_failure_time();
782
18.4E
        if (compaction_type == CompactionType::BASE_COMPACTION) {
783
5.70M
            last_failure_ms = tablet_ptr->last_base_compaction_failure_time();
784
18.4E
        } else if (compaction_type == CompactionType::BINLOG_COMPACTION) {
785
496
            last_failure_ms = tablet_ptr->last_binlog_compaction_failure_time();
786
496
        }
787
18.4E
        if (now_ms - last_failure_ms <= config::tablet_sched_delay_time_ms) {
788
809k
            VLOG_DEBUG << "Too often to check compaction, skip it. "
789
0
                       << "compaction_type=" << compaction_type_str
790
0
                       << ", last_failure_time_ms=" << last_failure_ms
791
0
                       << ", tablet_id=" << tablet_ptr->tablet_id();
792
809k
            return;
793
809k
        }
794
795
18.4E
        if (compaction_type == CompactionType::BASE_COMPACTION) {
796
5.69M
            std::unique_lock<std::mutex> lock(tablet_ptr->get_base_compaction_lock(),
797
5.69M
                                              std::try_to_lock);
798
5.69M
            if (!lock.owns_lock()) {
799
0
                LOG(INFO) << "can not get base lock: " << tablet_ptr->tablet_id();
800
0
                return;
801
0
            }
802
18.4E
        } else if (compaction_type == CompactionType::BINLOG_COMPACTION) {
803
496
            std::unique_lock<std::mutex> lock(tablet_ptr->get_binlog_compaction_lock(),
804
496
                                              std::try_to_lock);
805
496
            if (!lock.owns_lock()) {
806
0
                LOG(INFO) << "can not get binlog lock: " << tablet_ptr->tablet_id();
807
0
                return;
808
0
            }
809
18.4E
        } else {
810
18.4E
            std::unique_lock<std::mutex> lock(tablet_ptr->get_cumulative_compaction_lock(),
811
18.4E
                                              std::try_to_lock);
812
18.4E
            if (!lock.owns_lock()) {
813
0
                LOG(INFO) << "can not get cumu lock: " << tablet_ptr->tablet_id();
814
0
                return;
815
0
            }
816
18.4E
        }
817
18.4E
        std::shared_ptr<CumulativeCompactionPolicy> cumulative_compaction_policy;
818
18.4E
        if (compaction_type != CompactionType::BINLOG_COMPACTION) {
819
13.3M
            cumulative_compaction_policy = all_cumulative_compaction_policies.at(
820
13.3M
                    tablet_ptr->tablet_meta()->compaction_policy());
821
13.3M
        }
822
18.4E
        int8_t prefer_compaction_level = -1;
823
18.4E
        uint32_t current_compaction_score =
824
18.4E
                tablet_ptr->calc_compaction_score(compaction_type, &prefer_compaction_level);
825
18.4E
        if (current_compaction_score < 5) {
826
12.9M
            tablet_ptr->set_skip_compaction(true, compaction_type, UnixSeconds());
827
12.9M
        }
828
829
18.4E
        if (current_compaction_score <= 0) {
830
0
            return;
831
0
        }
832
833
18.4E
        if (compaction_num_per_round > 1) {
834
13.3M
            TabletScore ts;
835
13.3M
            ts.score = current_compaction_score;
836
13.3M
            ts.tablet_ptr = tablet_ptr;
837
13.3M
            ts.prefer_compaction_level = prefer_compaction_level;
838
13.3M
            if ((top_tablets.size() >= compaction_num_per_round &&
839
13.3M
                 current_compaction_score > top_tablets.top().score) ||
840
13.3M
                top_tablets.size() < compaction_num_per_round) {
841
916k
                bool ret = tablet_ptr->suitable_for_compaction(compaction_type,
842
916k
                                                               cumulative_compaction_policy);
843
916k
                if (ret) {
844
230k
                    top_tablets.push(ts);
845
230k
                    if (top_tablets.size() > compaction_num_per_round) {
846
68.4k
                        top_tablets.pop();
847
68.4k
                    }
848
230k
                    highest_score = std::max(current_compaction_score, highest_score);
849
230k
                }
850
916k
            }
851
18.4E
        } else {
852
18.4E
            if (current_compaction_score > highest_score) {
853
1
                bool ret = tablet_ptr->suitable_for_compaction(compaction_type,
854
1
                                                               cumulative_compaction_policy);
855
1
                if (ret) {
856
1
                    highest_score = current_compaction_score;
857
1
                    best_tablet_context = {.tablet_ptr = tablet_ptr,
858
1
                                           .score = current_compaction_score,
859
1
                                           .prefer_compaction_level = prefer_compaction_level};
860
1
                }
861
1
            }
862
18.4E
        }
863
18.4E
    };
864
865
58.1k
    for_each_tablet(handler, filter_all_tablets);
866
58.1k
    std::vector<TabletCompactionContext> picked_tablet_contexts;
867
58.1k
    if (best_tablet_context.tablet_ptr != nullptr) {
868
1
        VLOG_CRITICAL << "Found the best tablet for compaction. "
869
1
                      << "compaction_type=" << compaction_type_str
870
1
                      << ", tablet_id=" << best_tablet_context.tablet_ptr->tablet_id()
871
1
                      << ", path=" << data_dir->path() << ", highest_score=" << highest_score;
872
1
        picked_tablet_contexts.emplace_back(TabletCompactionContext {
873
1
                .tablet = std::move(best_tablet_context.tablet_ptr),
874
1
                .prefer_compaction_level = best_tablet_context.prefer_compaction_level});
875
1
    }
876
877
58.1k
    std::vector<TabletScore> reverse_top_tablets;
878
220k
    while (!top_tablets.empty()) {
879
162k
        reverse_top_tablets.emplace_back(top_tablets.top());
880
162k
        top_tablets.pop();
881
162k
    }
882
883
220k
    for (auto it = reverse_top_tablets.rbegin(); it != reverse_top_tablets.rend(); ++it) {
884
162k
        picked_tablet_contexts.emplace_back(TabletCompactionContext {
885
162k
                .tablet = it->tablet_ptr, .prefer_compaction_level = it->prefer_compaction_level});
886
162k
    }
887
888
58.1k
    *score = highest_score;
889
58.1k
    return picked_tablet_contexts;
890
58.1k
}
891
892
Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_id,
893
                                            TSchemaHash schema_hash, std::string_view meta_binary,
894
                                            bool update_meta, bool force, bool restore,
895
282k
                                            bool check_path) {
896
282k
    TabletMetaSharedPtr tablet_meta(new TabletMeta());
897
282k
    Status status = tablet_meta->deserialize(meta_binary);
898
282k
    if (!status.ok()) {
899
0
        return Status::Error<HEADER_PB_PARSE_FAILED>(
900
0
                "fail to load tablet because can not parse meta_binary string. tablet_id={}, "
901
0
                "schema_hash={}, path={}, status={}",
902
0
                tablet_id, schema_hash, data_dir->path(), status);
903
0
    }
904
905
    // check if tablet meta is valid
906
282k
    if (tablet_meta->tablet_id() != tablet_id || tablet_meta->schema_hash() != schema_hash) {
907
0
        return Status::Error<HEADER_PB_PARSE_FAILED>(
908
0
                "fail to load tablet because meet invalid tablet meta. trying to load "
909
0
                "tablet(tablet_id={}, schema_hash={}), but meet tablet={}, path={}",
910
0
                tablet_id, schema_hash, tablet_meta->tablet_id(), data_dir->path());
911
0
    }
912
282k
    if (tablet_meta->tablet_uid().hi == 0 && tablet_meta->tablet_uid().lo == 0) {
913
0
        return Status::Error<HEADER_PB_PARSE_FAILED>(
914
0
                "fail to load tablet because its uid == 0. tablet={}, path={}",
915
0
                tablet_meta->tablet_id(), data_dir->path());
916
0
    }
917
918
282k
    if (restore) {
919
        // we're restoring tablet from trash, tablet state should be changed from shutdown back to running
920
0
        tablet_meta->set_tablet_state(TABLET_RUNNING);
921
0
    }
922
923
282k
    if (tablet_meta->partition_id() == 0) {
924
1
        LOG(WARNING) << "tablet=" << tablet_id << " load from meta but partition id eq 0";
925
1
    }
926
927
282k
    TabletSharedPtr tablet = std::make_shared<Tablet>(_engine, std::move(tablet_meta), data_dir);
928
929
    // NOTE: method load_tablet_from_meta could be called by two cases as below
930
    // case 1: BE start;
931
    // case 2: Clone Task/Restore
932
    // For case 1 doesn't need path check because BE is just starting and not ready,
933
    // just check tablet meta status to judge whether tablet is delete is enough.
934
    // For case 2, If a tablet has just been copied to local BE,
935
    // it may be cleared by gc-thread(see perform_tablet_gc) because the tablet meta may not be loaded to memory.
936
    // So clone task should check path and then failed and retry in this case.
937
282k
    if (check_path) {
938
90
        bool exists = true;
939
90
        RETURN_IF_ERROR(io::global_local_filesystem()->exists(tablet->tablet_path(), &exists));
940
90
        if (!exists) {
941
0
            return Status::Error<TABLE_ALREADY_DELETED_ERROR>(
942
0
                    "tablet path not exists, create tablet failed, path={}", tablet->tablet_path());
943
0
        }
944
90
    }
945
946
282k
    if (tablet->tablet_meta()->tablet_state() == TABLET_SHUTDOWN) {
947
20
        {
948
20
            std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(_shutdown_tablets_lock);
949
20
            _shutdown_tablets.push_back(tablet);
950
20
        }
951
20
        return Status::Error<TABLE_ALREADY_DELETED_ERROR>(
952
20
                "fail to load tablet because it is to be deleted. tablet_id={}, schema_hash={}, "
953
20
                "path={}",
954
20
                tablet_id, schema_hash, data_dir->path());
955
20
    }
956
    // NOTE: We do not check tablet's initial version here, because if BE restarts when
957
    // one tablet is doing schema-change, we may meet empty tablet.
958
282k
    if (tablet->max_version().first == -1 && tablet->tablet_state() == TABLET_RUNNING) {
959
        // tablet state is invalid, drop tablet
960
0
        return Status::Error<TABLE_INDEX_VALIDATE_ERROR>(
961
0
                "fail to load tablet. it is in running state but without delta. tablet={}, path={}",
962
0
                tablet->tablet_id(), data_dir->path());
963
0
    }
964
965
282k
    RETURN_NOT_OK_STATUS_WITH_WARN(
966
282k
            tablet->init(), absl::Substitute("tablet init failed. tablet=$0", tablet->tablet_id()));
967
968
282k
    RuntimeProfile profile("CreateTablet");
969
282k
    std::lock_guard<std::shared_mutex> wrlock(_get_tablets_shard_lock(tablet_id));
970
282k
    RETURN_NOT_OK_STATUS_WITH_WARN(
971
282k
            _add_tablet_unlocked(tablet_id, tablet, update_meta, force, &profile),
972
282k
            absl::Substitute("fail to add tablet. tablet=$0", tablet->tablet_id()));
973
974
282k
    return Status::OK();
975
282k
}
976
977
Status TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_id,
978
                                           SchemaHash schema_hash, const string& schema_hash_path,
979
89
                                           bool force, bool restore) {
980
89
    LOG(INFO) << "begin to load tablet from dir. "
981
89
              << " tablet_id=" << tablet_id << " schema_hash=" << schema_hash
982
89
              << " path = " << schema_hash_path << " force = " << force << " restore = " << restore;
983
    // not add lock here, because load_tablet_from_meta already add lock
984
89
    std::string header_path = TabletMeta::construct_header_file_path(schema_hash_path, tablet_id);
985
    // should change shard id before load tablet
986
89
    std::string shard_path =
987
89
            path_util::dir_name(path_util::dir_name(path_util::dir_name(header_path)));
988
89
    std::string shard_str = shard_path.substr(shard_path.find_last_of('/') + 1);
989
89
    int32_t shard = static_cast<int32_t>(stol(shard_str));
990
991
89
    bool exists = false;
992
89
    RETURN_IF_ERROR(io::global_local_filesystem()->exists(header_path, &exists));
993
89
    if (!exists) {
994
0
        return Status::Error<NOT_FOUND>("fail to find header file. [header_path={}]", header_path);
995
0
    }
996
997
89
    TabletMetaSharedPtr tablet_meta(new TabletMeta());
998
89
    if (!tablet_meta->create_from_file(header_path).ok()) {
999
0
        return Status::Error<ENGINE_LOAD_INDEX_TABLE_ERROR>(
1000
0
                "fail to load tablet_meta. file_path={}", header_path);
1001
0
    }
1002
89
    TabletUid tablet_uid = TabletUid::gen_uid();
1003
1004
    // remove rowset binlog metas
1005
89
    auto binlog_metas_file = fmt::format("{}/rowset_binlog_metas.pb", schema_hash_path);
1006
89
    bool binlog_metas_file_exists = false;
1007
89
    auto file_exists_status =
1008
89
            io::global_local_filesystem()->exists(binlog_metas_file, &binlog_metas_file_exists);
1009
89
    if (!file_exists_status.ok()) {
1010
0
        return file_exists_status;
1011
0
    }
1012
89
    bool contain_binlog = false;
1013
89
    RowsetBinlogMetasPB rowset_binlog_metas_pb;
1014
89
    if (binlog_metas_file_exists) {
1015
0
        auto binlog_meta_filesize = std::filesystem::file_size(binlog_metas_file);
1016
0
        if (binlog_meta_filesize > 0) {
1017
0
            contain_binlog = true;
1018
0
            RETURN_IF_ERROR(read_pb(binlog_metas_file, &rowset_binlog_metas_pb));
1019
0
            VLOG_DEBUG << "load rowset binlog metas from file. file_path=" << binlog_metas_file;
1020
0
        }
1021
0
        RETURN_IF_ERROR(io::global_local_filesystem()->delete_file(binlog_metas_file));
1022
0
    }
1023
89
    if (contain_binlog) {
1024
0
        auto binlog_dir = fmt::format("{}/_binlog", schema_hash_path);
1025
0
        RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(binlog_dir));
1026
1027
0
        std::vector<io::FileInfo> files;
1028
0
        RETURN_IF_ERROR(
1029
0
                io::global_local_filesystem()->list(schema_hash_path, true, &files, &exists));
1030
0
        for (auto& file : files) {
1031
0
            auto& filename = file.file_name;
1032
0
            std::string new_suffix;
1033
0
            std::string old_suffix;
1034
1035
0
            if (filename.ends_with(".binlog")) {
1036
0
                old_suffix = ".binlog";
1037
0
                new_suffix = ".dat";
1038
0
            } else if (filename.ends_with(".binlog-index")) {
1039
0
                old_suffix = ".binlog-index";
1040
0
                new_suffix = ".idx";
1041
0
            } else {
1042
0
                continue;
1043
0
            }
1044
1045
0
            std::string new_filename = filename;
1046
0
            new_filename.replace(filename.size() - old_suffix.size(), old_suffix.size(),
1047
0
                                 new_suffix);
1048
0
            auto from = fmt::format("{}/{}", schema_hash_path, filename);
1049
0
            auto to = fmt::format("{}/_binlog/{}", schema_hash_path, new_filename);
1050
0
            RETURN_IF_ERROR(io::global_local_filesystem()->rename(from, to));
1051
0
        }
1052
1053
0
        auto* meta = store->get_meta();
1054
        // if ingest binlog metas error, it will be gc in gc_unused_binlog_metas
1055
0
        RETURN_IF_ERROR(
1056
0
                RowsetMetaManager::ingest_binlog_metas(meta, tablet_uid, &rowset_binlog_metas_pb));
1057
0
    }
1058
1059
    // has to change shard id here, because meta file maybe copied from other source
1060
    // its shard is different from local shard
1061
89
    tablet_meta->set_shard_id(shard);
1062
    // load dir is called by clone, restore, storage migration
1063
    // should change tablet uid when tablet object changed
1064
89
    tablet_meta->set_tablet_uid(std::move(tablet_uid));
1065
89
    std::string meta_binary;
1066
89
    tablet_meta->serialize(&meta_binary);
1067
89
    RETURN_NOT_OK_STATUS_WITH_WARN(
1068
89
            load_tablet_from_meta(store, tablet_id, schema_hash, meta_binary, true, force, restore,
1069
89
                                  true),
1070
89
            absl::Substitute("fail to load tablet. header_path=$0", header_path));
1071
1072
89
    return Status::OK();
1073
89
}
1074
1075
11
Status TabletManager::report_tablet_info(TTabletInfo* tablet_info) {
1076
11
    LOG(INFO) << "begin to process report tablet info."
1077
11
              << "tablet_id=" << tablet_info->tablet_id;
1078
1079
11
    Status res = Status::OK();
1080
1081
11
    TabletSharedPtr tablet = get_tablet(tablet_info->tablet_id);
1082
11
    if (tablet == nullptr) {
1083
0
        return Status::Error<TABLE_NOT_FOUND>("can't find tablet={}", tablet_info->tablet_id);
1084
0
    }
1085
1086
11
    tablet->build_tablet_report_info(tablet_info);
1087
11
    VLOG_TRACE << "success to process report tablet info.";
1088
11
    return res;
1089
11
}
1090
1091
79
void TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>* tablets_info) {
1092
79
    DCHECK(tablets_info != nullptr);
1093
79
    VLOG_NOTICE << "begin to build all report tablets info";
1094
1095
    // build the expired txn map first, outside the tablet map lock
1096
79
    std::map<TabletInfo, std::vector<int64_t>> expire_txn_map;
1097
79
    _engine.txn_manager()->build_expire_txn_map(&expire_txn_map);
1098
79
    LOG(INFO) << "find expired transactions for " << expire_txn_map.size() << " tablets";
1099
1100
79
    HistogramStat tablet_version_num_hist;
1101
79
    auto local_cache = std::make_shared<std::vector<TTabletStat>>();
1102
1.20M
    auto handler = [&](const TabletSharedPtr& tablet) {
1103
1.20M
        auto& t_tablet = (*tablets_info)[tablet->tablet_id()];
1104
1.20M
        TTabletInfo& tablet_info = t_tablet.tablet_infos.emplace_back();
1105
1.20M
        tablet->build_tablet_report_info(&tablet_info, true, true);
1106
        // find expired transaction corresponding to this tablet
1107
1.20M
        TabletInfo tinfo(tablet->tablet_id(), tablet->tablet_uid());
1108
1.20M
        auto find = expire_txn_map.find(tinfo);
1109
1.20M
        if (find != expire_txn_map.end()) {
1110
10
            tablet_info.__set_transaction_ids(find->second);
1111
10
            expire_txn_map.erase(find);
1112
10
        }
1113
1.20M
        tablet_version_num_hist.add(tablet_info.total_version_count);
1114
1.20M
        auto& t_tablet_stat = local_cache->emplace_back();
1115
1.20M
        t_tablet_stat.__set_tablet_id(tablet_info.tablet_id);
1116
1.20M
        t_tablet_stat.__set_data_size(tablet_info.data_size);
1117
1.20M
        t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size);
1118
1.20M
        t_tablet_stat.__set_row_count(tablet_info.row_count);
1119
1.20M
        t_tablet_stat.__set_total_version_count(tablet_info.total_version_count);
1120
1.20M
        t_tablet_stat.__set_visible_version_count(tablet_info.visible_version_count);
1121
1.20M
        t_tablet_stat.__set_visible_version(tablet_info.version);
1122
1.20M
        t_tablet_stat.__set_local_index_size(tablet_info.local_index_size);
1123
1.20M
        t_tablet_stat.__set_local_segment_size(tablet_info.local_segment_size);
1124
1.20M
        t_tablet_stat.__set_remote_index_size(tablet_info.remote_index_size);
1125
1.20M
        t_tablet_stat.__set_remote_segment_size(tablet_info.remote_segment_size);
1126
1.20M
        if (tablet_info.__isset.binlog_size) {
1127
1.20M
            t_tablet_stat.__set_binlog_size(tablet_info.binlog_size);
1128
1.20M
            t_tablet_stat.__set_binlog_file_num(tablet_info.binlog_file_num);
1129
1.20M
        }
1130
1.20M
    };
1131
79
    for_each_tablet(handler, filter_all_tablets);
1132
1133
79
    {
1134
79
        std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex);
1135
79
        _tablet_stat_list_cache.swap(local_cache);
1136
79
    }
1137
79
    DorisMetrics::instance()->tablet_version_num_distribution->set_histogram(
1138
79
            tablet_version_num_hist);
1139
79
    LOG(INFO) << "success to build all report tablets info. tablet_count=" << tablets_info->size();
1140
79
}
1141
1142
35
Status TabletManager::start_trash_sweep() {
1143
35
    DBUG_EXECUTE_IF("TabletManager.start_trash_sweep.sleep", DBUG_BLOCK);
1144
35
    std::unique_lock<std::mutex> lock(_gc_tablets_lock, std::defer_lock);
1145
35
    if (!lock.try_lock()) {
1146
0
        return Status::OK();
1147
0
    }
1148
1149
587k
    for_each_tablet([](const TabletSharedPtr& tablet) { tablet->delete_expired_stale_rowset(); },
1150
35
                    filter_all_tablets);
1151
1152
35
    if (config::enable_check_agg_and_remove_pre_rowsets_delete_bitmap) {
1153
0
        int64_t max_useless_rowset_count = 0;
1154
0
        int64_t tablet_id_with_max_useless_rowset_count = 0;
1155
0
        int64_t max_useless_rowset_version_count = 0;
1156
0
        int64_t tablet_id_with_max_useless_rowset_version_count = 0;
1157
0
        OlapStopWatch watch;
1158
0
        for_each_tablet(
1159
0
                [&](const TabletSharedPtr& tablet) {
1160
0
                    int64_t useless_rowset_count = 0;
1161
0
                    int64_t useless_rowset_version_count = 0;
1162
0
                    tablet->check_agg_delete_bitmap_for_stale_rowsets(useless_rowset_count,
1163
0
                                                                      useless_rowset_version_count);
1164
0
                    if (useless_rowset_count > max_useless_rowset_count) {
1165
0
                        max_useless_rowset_count = useless_rowset_count;
1166
0
                        tablet_id_with_max_useless_rowset_count = tablet->tablet_id();
1167
0
                    }
1168
0
                    if (useless_rowset_version_count > max_useless_rowset_version_count) {
1169
0
                        max_useless_rowset_version_count = useless_rowset_version_count;
1170
0
                        tablet_id_with_max_useless_rowset_version_count = tablet->tablet_id();
1171
0
                    }
1172
0
                },
1173
0
                filter_all_tablets);
1174
0
        g_max_rowsets_with_useless_delete_bitmap.set_value(max_useless_rowset_count);
1175
0
        g_max_rowsets_with_useless_delete_bitmap_version.set_value(
1176
0
                max_useless_rowset_version_count);
1177
0
        LOG(INFO) << "finish check_agg_delete_bitmap_for_stale_rowsets, cost(us)="
1178
0
                  << watch.get_elapse_time_us()
1179
0
                  << ". max useless rowset count=" << max_useless_rowset_count
1180
0
                  << ", tablet_id=" << tablet_id_with_max_useless_rowset_count
1181
0
                  << ", max useless rowset version count=" << max_useless_rowset_version_count
1182
0
                  << ", tablet_id=" << tablet_id_with_max_useless_rowset_version_count;
1183
0
    }
1184
1185
35
    std::list<TabletSharedPtr>::iterator last_it;
1186
35
    {
1187
35
        std::shared_lock rdlock(_shutdown_tablets_lock);
1188
35
        last_it = _shutdown_tablets.begin();
1189
35
        if (last_it == _shutdown_tablets.end()) {
1190
10
            return Status::OK();
1191
10
        }
1192
35
    }
1193
1194
47
    auto get_batch_tablets = [this, &last_it](int limit) {
1195
47
        std::vector<TabletSharedPtr> batch_tablets;
1196
47
        std::lock_guard<std::shared_mutex> wrdlock(_shutdown_tablets_lock);
1197
3.04k
        while (last_it != _shutdown_tablets.end() && batch_tablets.size() < limit) {
1198
            // it means current tablet is referenced by other thread
1199
2.99k
            if (last_it->use_count() > 1) {
1200
0
                last_it++;
1201
2.99k
            } else {
1202
2.99k
                batch_tablets.push_back(*last_it);
1203
2.99k
                last_it = _shutdown_tablets.erase(last_it);
1204
2.99k
            }
1205
2.99k
        }
1206
1207
47
        return batch_tablets;
1208
47
    };
1209
1210
25
    std::list<TabletSharedPtr> failed_tablets;
1211
    // return true if need continue delete
1212
27
    auto delete_one_batch = [this, get_batch_tablets, &failed_tablets]() -> bool {
1213
27
        int limit = 200;
1214
47
        for (;;) {
1215
47
            auto batch_tablets = get_batch_tablets(limit);
1216
2.99k
            for (const auto& tablet : batch_tablets) {
1217
2.99k
                if (_move_tablet_to_trash(tablet)) {
1218
2.99k
                    limit--;
1219
2.99k
                } else {
1220
0
                    failed_tablets.push_back(tablet);
1221
0
                }
1222
2.99k
            }
1223
47
            if (limit <= 0) {
1224
7
                return true;
1225
7
            }
1226
40
            if (batch_tablets.empty()) {
1227
20
                return false;
1228
20
            }
1229
40
        }
1230
1231
0
        return false;
1232
27
    };
1233
1234
32
    while (delete_one_batch()) {
1235
7
#ifndef BE_TEST
1236
7
        sleep(1);
1237
7
#endif
1238
7
    }
1239
1240
25
    if (!failed_tablets.empty()) {
1241
0
        std::lock_guard<std::shared_mutex> wrlock(_shutdown_tablets_lock);
1242
0
        _shutdown_tablets.splice(_shutdown_tablets.end(), failed_tablets);
1243
0
    }
1244
1245
25
    return Status::OK();
1246
35
}
1247
1248
3.11k
bool TabletManager::_move_tablet_to_trash(const TabletSharedPtr& tablet) {
1249
3.11k
    RETURN_IF_ERROR(register_transition_tablet(tablet->tablet_id(), "move to trash"));
1250
3.11k
    Defer defer {[&]() { unregister_transition_tablet(tablet->tablet_id(), "move to trash"); }};
1251
1252
3.11k
    TabletSharedPtr tablet_in_not_shutdown = get_tablet(tablet->tablet_id());
1253
3.11k
    if (tablet_in_not_shutdown) {
1254
0
        TSchemaHash schema_hash_not_shutdown = tablet_in_not_shutdown->schema_hash();
1255
0
        size_t path_hash_not_shutdown = tablet_in_not_shutdown->data_dir()->path_hash();
1256
0
        if (tablet->schema_hash() == schema_hash_not_shutdown &&
1257
0
            tablet->data_dir()->path_hash() == path_hash_not_shutdown) {
1258
0
            tablet->clear_cache();
1259
            // shard_id in memory not eq shard_id in shutdown
1260
0
            if (tablet_in_not_shutdown->tablet_path() != tablet->tablet_path()) {
1261
0
                LOG(INFO) << "tablet path not eq shutdown tablet path, move it to trash, tablet_id="
1262
0
                          << tablet_in_not_shutdown->tablet_id()
1263
0
                          << ", mem manager tablet path=" << tablet_in_not_shutdown->tablet_path()
1264
0
                          << ", shutdown tablet path=" << tablet->tablet_path();
1265
0
                return tablet->data_dir()->move_to_trash(tablet->tablet_path());
1266
0
            } else {
1267
0
                LOG(INFO) << "tablet path eq shutdown tablet path, not move to trash, tablet_id="
1268
0
                          << tablet_in_not_shutdown->tablet_id()
1269
0
                          << ", mem manager tablet path=" << tablet_in_not_shutdown->tablet_path()
1270
0
                          << ", shutdown tablet path=" << tablet->tablet_path();
1271
0
                return true;
1272
0
            }
1273
0
        }
1274
0
    }
1275
1276
3.11k
    TabletMetaSharedPtr tablet_meta(new TabletMeta());
1277
3.11k
    int64_t get_meta_ts = MonotonicMicros();
1278
3.11k
    Status check_st = TabletMetaManager::get_meta(tablet->data_dir(), tablet->tablet_id(),
1279
3.11k
                                                  tablet->schema_hash(), tablet_meta);
1280
3.11k
    if (check_st.ok()) {
1281
3.11k
        if (tablet_meta->tablet_state() != TABLET_SHUTDOWN ||
1282
3.11k
            tablet_meta->tablet_uid() != tablet->tablet_uid()) {
1283
0
            LOG(WARNING) << "tablet's state changed to normal, skip remove dirs"
1284
0
                         << " tablet id = " << tablet_meta->tablet_id()
1285
0
                         << " schema hash = " << tablet_meta->schema_hash()
1286
0
                         << " old tablet_uid=" << tablet->tablet_uid()
1287
0
                         << " cur tablet_uid=" << tablet_meta->tablet_uid();
1288
0
            return true;
1289
0
        }
1290
1291
3.11k
        tablet->clear_cache();
1292
1293
        // move data to trash
1294
3.11k
        const auto& tablet_path = tablet->tablet_path();
1295
3.11k
        bool exists = false;
1296
3.11k
        Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists);
1297
3.11k
        if (!exists_st) {
1298
0
            return false;
1299
0
        }
1300
3.11k
        if (exists) {
1301
            // take snapshot of tablet meta
1302
3.11k
            auto meta_file_path = fmt::format("{}/{}.hdr", tablet_path, tablet->tablet_id());
1303
3.11k
            int64_t save_meta_ts = MonotonicMicros();
1304
3.11k
            auto save_st = tablet->tablet_meta()->save(meta_file_path);
1305
3.11k
            if (!save_st.ok()) {
1306
0
                LOG(WARNING) << "failed to save meta, tablet_id=" << tablet_meta->tablet_id()
1307
0
                             << ", tablet_uid=" << tablet_meta->tablet_uid()
1308
0
                             << ", error=" << save_st;
1309
0
                return false;
1310
0
            }
1311
3.11k
            int64_t now = MonotonicMicros();
1312
3.11k
            LOG(INFO) << "start to move tablet to trash. " << tablet_path
1313
3.11k
                      << ". rocksdb get meta cost " << (save_meta_ts - get_meta_ts)
1314
3.11k
                      << " us, rocksdb save meta cost " << (now - save_meta_ts) << " us";
1315
3.11k
            Status rm_st = tablet->data_dir()->move_to_trash(tablet_path);
1316
3.11k
            if (!rm_st.ok()) {
1317
0
                LOG(WARNING) << "fail to move dir to trash. " << tablet_path;
1318
0
                return false;
1319
0
            }
1320
3.11k
        }
1321
        // remove tablet meta
1322
3.11k
        auto remove_st = TabletMetaManager::remove(tablet->data_dir(), tablet->tablet_id(),
1323
3.11k
                                                   tablet->schema_hash());
1324
3.11k
        if (!remove_st.ok()) {
1325
0
            LOG(WARNING) << "failed to remove meta, tablet_id=" << tablet_meta->tablet_id()
1326
0
                         << ", tablet_uid=" << tablet_meta->tablet_uid() << ", error=" << remove_st;
1327
0
            return false;
1328
0
        }
1329
3.11k
        LOG(INFO) << "successfully move tablet to trash. "
1330
3.11k
                  << "tablet_id=" << tablet->tablet_id()
1331
3.11k
                  << ", schema_hash=" << tablet->schema_hash() << ", tablet_path=" << tablet_path;
1332
3.11k
        return true;
1333
3.11k
    } else {
1334
0
        tablet->clear_cache();
1335
        // if could not find tablet info in meta store, then check if dir existed
1336
0
        const auto& tablet_path = tablet->tablet_path();
1337
0
        bool exists = false;
1338
0
        Status exists_st = io::global_local_filesystem()->exists(tablet_path, &exists);
1339
0
        if (!exists_st) {
1340
0
            return false;
1341
0
        }
1342
0
        if (exists) {
1343
0
            if (check_st.is<META_KEY_NOT_FOUND>()) {
1344
0
                LOG(INFO) << "could not find tablet meta in rocksdb, so just delete it path "
1345
0
                          << "tablet_id=" << tablet->tablet_id()
1346
0
                          << ", schema_hash=" << tablet->schema_hash()
1347
0
                          << ", delete tablet_path=" << tablet_path;
1348
0
                RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_path));
1349
0
                RETURN_IF_ERROR(DataDir::delete_tablet_parent_path_if_empty(tablet_path));
1350
0
                return true;
1351
0
            }
1352
0
            LOG(WARNING) << "errors while load meta from store, skip this tablet. "
1353
0
                         << "tablet_id=" << tablet->tablet_id()
1354
0
                         << ", schema_hash=" << tablet->schema_hash();
1355
0
            return false;
1356
0
        } else {
1357
0
            LOG(INFO) << "could not find tablet dir, skip it and remove it from gc-queue. "
1358
0
                      << "tablet_id=" << tablet->tablet_id()
1359
0
                      << ", schema_hash=" << tablet->schema_hash()
1360
0
                      << ", tablet_path=" << tablet_path;
1361
0
            return true;
1362
0
        }
1363
0
    }
1364
3.11k
}
1365
1366
6.33k
Status TabletManager::register_transition_tablet(int64_t tablet_id, std::string reason) {
1367
6.33k
    tablets_shard& shard = _get_tablets_shard(tablet_id);
1368
6.33k
    std::thread::id thread_id = std::this_thread::get_id();
1369
6.33k
    std::lock_guard<std::mutex> lk(shard.lock_for_transition);
1370
6.33k
    if (auto search = shard.tablets_under_transition.find(tablet_id);
1371
6.33k
        search == shard.tablets_under_transition.end()) {
1372
        // not found
1373
6.33k
        shard.tablets_under_transition[tablet_id] = std::make_tuple(reason, thread_id, 1);
1374
6.33k
        LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason
1375
6.33k
                  << ", lock times=1, thread_id_in_map=" << thread_id;
1376
6.33k
        return Status::OK();
1377
6.33k
    } else {
1378
        // found
1379
2
        auto& [r, thread_id_in_map, lock_times] = search->second;
1380
2
        if (thread_id != thread_id_in_map) {
1381
            // other thread, failed
1382
0
            LOG(INFO) << "tablet_id = " << tablet_id << " is doing " << r
1383
0
                      << ", thread_id_in_map=" << thread_id_in_map << " , add reason=" << reason
1384
0
                      << ", thread_id=" << thread_id;
1385
0
            return Status::InternalError<false>("{} failed try later, tablet_id={}", reason,
1386
0
                                                tablet_id);
1387
0
        }
1388
        // add lock times
1389
2
        ++lock_times;
1390
2
        LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << reason
1391
2
                  << ", lock times=" << lock_times << ", thread_id_in_map=" << thread_id_in_map;
1392
2
        return Status::OK();
1393
2
    }
1394
6.33k
}
1395
1396
6.33k
void TabletManager::unregister_transition_tablet(int64_t tablet_id, std::string reason) {
1397
6.33k
    tablets_shard& shard = _get_tablets_shard(tablet_id);
1398
6.33k
    std::thread::id thread_id = std::this_thread::get_id();
1399
6.33k
    std::lock_guard<std::mutex> lk(shard.lock_for_transition);
1400
6.33k
    if (auto search = shard.tablets_under_transition.find(tablet_id);
1401
6.33k
        search == shard.tablets_under_transition.end()) {
1402
        // impossible, bug
1403
0
        DCHECK(false) << "tablet " << tablet_id
1404
0
                      << " must be found, before unreg must have been reg";
1405
6.33k
    } else {
1406
6.33k
        auto& [r, thread_id_in_map, lock_times] = search->second;
1407
6.33k
        if (thread_id_in_map != thread_id) {
1408
            // impossible, bug
1409
0
            DCHECK(false) << "tablet " << tablet_id << " unreg thread must same reg thread";
1410
0
        }
1411
        // sub lock times
1412
6.33k
        --lock_times;
1413
6.33k
        if (lock_times != 0) {
1414
2
            LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason
1415
2
                      << ", left=" << lock_times << ", thread_id_in_map=" << thread_id_in_map;
1416
6.33k
        } else {
1417
6.33k
            LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, reason=" << reason
1418
6.33k
                      << ", thread_id_in_map=" << thread_id_in_map;
1419
6.33k
            shard.tablets_under_transition.erase(tablet_id);
1420
6.33k
        }
1421
6.33k
    }
1422
6.33k
}
1423
1424
void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id,
1425
                                                  SchemaHash schema_hash,
1426
                                                  const string& schema_hash_path,
1427
538
                                                  int16_t shard_id) {
1428
    // acquire the read lock, so that there is no creating tablet or load tablet from meta tasks
1429
    // create tablet and load tablet task should check whether the dir exists
1430
538
    tablets_shard& shard = _get_tablets_shard(tablet_id);
1431
538
    std::shared_lock rdlock(shard.lock);
1432
1433
    // check if meta already exists
1434
538
    TabletMetaSharedPtr tablet_meta(new TabletMeta());
1435
538
    Status check_st = TabletMetaManager::get_meta(data_dir, tablet_id, schema_hash, tablet_meta);
1436
538
    if (check_st.ok() && tablet_meta->shard_id() == shard_id) {
1437
528
        return;
1438
528
    }
1439
1440
538
    LOG(INFO) << "tablet meta not exists, try delete tablet path " << schema_hash_path;
1441
1442
10
    bool succ = register_transition_tablet(tablet_id, "path gc");
1443
10
    if (!succ) {
1444
0
        return;
1445
0
    }
1446
10
    Defer defer {[&]() { unregister_transition_tablet(tablet_id, "path gc"); }};
1447
1448
10
    TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id);
1449
10
    if (tablet != nullptr && tablet->tablet_path() == schema_hash_path) {
1450
0
        LOG(INFO) << "tablet exists, skip delete the path " << schema_hash_path;
1451
0
        return;
1452
0
    }
1453
1454
    // TODO(ygl): may do other checks in the future
1455
10
    bool exists = false;
1456
10
    Status exists_st = io::global_local_filesystem()->exists(schema_hash_path, &exists);
1457
10
    if (exists_st && exists) {
1458
10
        LOG(INFO) << "start to move tablet to trash. tablet_path = " << schema_hash_path;
1459
10
        Status rm_st = data_dir->move_to_trash(schema_hash_path);
1460
10
        if (!rm_st.ok()) {
1461
0
            LOG(WARNING) << "fail to move dir to trash. dir=" << schema_hash_path;
1462
10
        } else {
1463
10
            LOG(INFO) << "move path " << schema_hash_path << " to trash successfully";
1464
10
        }
1465
10
    }
1466
10
}
1467
1468
void TabletManager::update_root_path_info(std::map<string, DataDirInfo>* path_map,
1469
193
                                          size_t* tablet_count) {
1470
193
    DCHECK(tablet_count);
1471
193
    *tablet_count = 0;
1472
3.13M
    auto filter = [path_map, tablet_count](Tablet* t) -> bool {
1473
3.13M
        ++(*tablet_count);
1474
3.13M
        auto iter = path_map->find(t->data_dir()->path());
1475
3.13M
        return iter != path_map->end() && iter->second.is_used;
1476
3.13M
    };
1477
1478
3.13M
    auto handler = [&](const TabletSharedPtr& tablet) {
1479
3.13M
        auto& data_dir_info = (*path_map)[tablet->data_dir()->path()];
1480
3.13M
        data_dir_info.local_used_capacity += tablet->tablet_local_size();
1481
3.13M
        data_dir_info.remote_used_capacity += tablet->tablet_remote_size();
1482
3.13M
    };
1483
1484
193
    for_each_tablet(handler, filter);
1485
193
}
1486
1487
void TabletManager::get_partition_related_tablets(int64_t partition_id,
1488
5.40k
                                                  std::set<TabletInfo>* tablet_infos) {
1489
5.40k
    std::shared_lock rdlock(_partitions_lock);
1490
5.40k
    auto it = _partitions.find(partition_id);
1491
5.40k
    if (it != _partitions.end()) {
1492
5.40k
        *tablet_infos = it->second.tablets;
1493
5.40k
    }
1494
5.40k
}
1495
1496
78
void TabletManager::get_partitions_visible_version(std::map<int64_t, int64_t>* partitions_version) {
1497
78
    std::shared_lock rdlock(_partitions_lock);
1498
154k
    for (const auto& [partition_id, partition] : _partitions) {
1499
154k
        partitions_version->insert(
1500
154k
                {partition_id, partition.visible_version->version.load(std::memory_order_relaxed)});
1501
154k
    }
1502
78
}
1503
1504
void TabletManager::update_partitions_visible_version(
1505
2.69k
        const std::map<int64_t, int64_t>& partitions_version) {
1506
2.69k
    std::shared_lock rdlock(_partitions_lock);
1507
36.9k
    for (auto [partition_id, version] : partitions_version) {
1508
36.9k
        auto it = _partitions.find(partition_id);
1509
36.9k
        if (it != _partitions.end()) {
1510
36.9k
            it->second.visible_version->update_version_monoto(version);
1511
36.9k
        }
1512
36.9k
    }
1513
2.69k
}
1514
1515
15
void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) {
1516
540k
    auto filter = [data_dir](Tablet* tablet) -> bool {
1517
540k
        return tablet->tablet_state() == TABLET_RUNNING &&
1518
554k
               tablet->data_dir()->path_hash() == data_dir->path_hash() && tablet->is_used() &&
1519
540k
               tablet->init_succeeded();
1520
540k
    };
1521
1522
15
    std::vector<TabletSharedPtr> related_tablets = get_all_tablet(filter);
1523
15
    int counter = 0;
1524
15
    MonotonicStopWatch watch;
1525
15
    watch.start();
1526
289k
    for (TabletSharedPtr tablet : related_tablets) {
1527
289k
        if (tablet->do_tablet_meta_checkpoint()) {
1528
4.01k
            ++counter;
1529
4.01k
        }
1530
289k
    }
1531
15
    int64_t cost = watch.elapsed_time() / 1000 / 1000;
1532
15
    LOG(INFO) << "finish to do meta checkpoint on dir: " << data_dir->path()
1533
15
              << ", number: " << counter << ", cost(ms): " << cost;
1534
15
}
1535
1536
Status TabletManager::_create_tablet_meta_unlocked(const TCreateTabletReq& request, DataDir* store,
1537
                                                   const bool is_schema_change,
1538
                                                   const Tablet* base_tablet,
1539
4.02k
                                                   TabletMetaSharedPtr* tablet_meta) {
1540
4.02k
    uint32_t next_unique_id = 0;
1541
4.02k
    std::unordered_map<uint32_t, uint32_t> col_idx_to_unique_id;
1542
4.02k
    if (!is_schema_change) {
1543
28.8k
        for (uint32_t col_idx = 0; col_idx < request.tablet_schema.columns.size(); ++col_idx) {
1544
24.7k
            col_idx_to_unique_id[col_idx] = col_idx;
1545
24.7k
        }
1546
4.02k
        next_unique_id = cast_set<int32_t>(request.tablet_schema.columns.size());
1547
4.02k
    } else {
1548
1
        next_unique_id = cast_set<int32_t>(base_tablet->next_unique_id());
1549
1
        auto& new_columns = request.tablet_schema.columns;
1550
1
        for (uint32_t new_col_idx = 0; new_col_idx < new_columns.size(); ++new_col_idx) {
1551
0
            const TColumn& column = new_columns[new_col_idx];
1552
            // For schema change, compare old_tablet and new_tablet:
1553
            // 1. if column exist in both new_tablet and old_tablet, choose the column's
1554
            //    unique_id in old_tablet to be the column's ordinal number in new_tablet
1555
            // 2. if column exists only in new_tablet, assign next_unique_id of old_tablet
1556
            //    to the new column
1557
0
            int32_t old_col_idx = base_tablet->tablet_schema()->field_index(column.column_name);
1558
0
            if (old_col_idx != -1) {
1559
0
                uint32_t old_unique_id =
1560
0
                        base_tablet->tablet_schema()->column(old_col_idx).unique_id();
1561
0
                col_idx_to_unique_id[new_col_idx] = old_unique_id;
1562
0
            } else {
1563
                // Not exist in old tablet, it is a new added column
1564
0
                col_idx_to_unique_id[new_col_idx] = next_unique_id++;
1565
0
            }
1566
0
        }
1567
1
    }
1568
4.02k
    VLOG_NOTICE << "creating tablet meta. next_unique_id=" << next_unique_id;
1569
1570
    // We generate a new tablet_uid for this new tablet.
1571
4.02k
    uint64_t shard_id = store->get_shard();
1572
4.02k
    *tablet_meta = TabletMeta::create(request, TabletUid::gen_uid(), shard_id, next_unique_id,
1573
4.02k
                                      col_idx_to_unique_id);
1574
4.02k
    if (request.__isset.storage_format) {
1575
3.88k
        if (request.storage_format == TStorageFormat::DEFAULT) {
1576
0
            (*tablet_meta)->set_preferred_rowset_type(_engine.default_rowset_type());
1577
3.88k
        } else if (request.storage_format == TStorageFormat::V1) {
1578
0
            (*tablet_meta)->set_preferred_rowset_type(ALPHA_ROWSET);
1579
3.88k
        } else if (request.storage_format == TStorageFormat::V2 ||
1580
3.88k
                   request.storage_format == TStorageFormat::V3) {
1581
3.88k
            (*tablet_meta)->set_preferred_rowset_type(BETA_ROWSET);
1582
3.88k
        } else {
1583
0
            return Status::Error<CE_CMD_PARAMS_ERROR>("invalid TStorageFormat: {}",
1584
0
                                                      request.storage_format);
1585
0
        }
1586
3.88k
    }
1587
4.02k
    return Status::OK();
1588
4.02k
}
1589
1590
1.29M
TabletSharedPtr TabletManager::_get_tablet_unlocked(TTabletId tablet_id) {
1591
1.29M
    VLOG_NOTICE << "begin to get tablet. tablet_id=" << tablet_id;
1592
1.29M
    tablet_map_t& tablet_map = _get_tablet_map(tablet_id);
1593
1.29M
    const auto& iter = tablet_map.find(tablet_id);
1594
1.29M
    if (iter != tablet_map.end()) {
1595
1.28M
        return iter->second;
1596
1.28M
    }
1597
9.45k
    return nullptr;
1598
1.29M
}
1599
1600
286k
void TabletManager::_add_tablet_to_partition(const TabletSharedPtr& tablet) {
1601
286k
    std::lock_guard<std::shared_mutex> wrlock(_partitions_lock);
1602
286k
    auto& partition = _partitions[tablet->partition_id()];
1603
286k
    partition.tablets.insert(tablet->get_tablet_info());
1604
286k
    tablet->set_visible_version(
1605
286k
            std::static_pointer_cast<const VersionWithTime>(partition.visible_version));
1606
286k
}
1607
1608
3.21k
void TabletManager::_remove_tablet_from_partition(const TabletSharedPtr& tablet) {
1609
3.21k
    tablet->set_visible_version(nullptr);
1610
3.21k
    std::lock_guard<std::shared_mutex> wrlock(_partitions_lock);
1611
3.21k
    auto it = _partitions.find(tablet->partition_id());
1612
3.21k
    if (it == _partitions.end()) {
1613
0
        return;
1614
0
    }
1615
1616
3.21k
    auto& tablets = it->second.tablets;
1617
3.21k
    tablets.erase(tablet->get_tablet_info());
1618
3.21k
    if (tablets.empty()) {
1619
676
        _partitions.erase(it);
1620
676
    }
1621
3.21k
}
1622
1623
void TabletManager::obtain_specific_quantity_tablets(vector<TabletInfo>& tablets_info,
1624
0
                                                     int64_t num) {
1625
0
    for (const auto& tablets_shard : _tablets_shards) {
1626
0
        std::shared_lock rdlock(tablets_shard.lock);
1627
0
        for (const auto& item : tablets_shard.tablet_map) {
1628
0
            TabletSharedPtr tablet = item.second;
1629
0
            if (tablets_info.size() >= num) {
1630
0
                return;
1631
0
            }
1632
0
            if (tablet == nullptr) {
1633
0
                continue;
1634
0
            }
1635
0
            tablets_info.push_back(tablet->get_tablet_info());
1636
0
        }
1637
0
    }
1638
0
}
1639
1640
1.56M
std::shared_mutex& TabletManager::_get_tablets_shard_lock(TTabletId tabletId) {
1641
1.56M
    return _get_tablets_shard(tabletId).lock;
1642
1.56M
}
1643
1644
1.86M
TabletManager::tablet_map_t& TabletManager::_get_tablet_map(TTabletId tabletId) {
1645
1.86M
    return _get_tablets_shard(tabletId).tablet_map;
1646
1.86M
}
1647
1648
3.44M
TabletManager::tablets_shard& TabletManager::_get_tablets_shard(TTabletId tabletId) {
1649
3.44M
    return _tablets_shards[tabletId & _tablets_shards_mask];
1650
3.44M
}
1651
1652
void TabletManager::get_tablets_distribution_on_different_disks(
1653
        std::map<int64_t, std::map<DataDir*, int64_t>>& tablets_num_on_disk,
1654
0
        std::map<int64_t, std::map<DataDir*, std::vector<TabletSize>>>& tablets_info_on_disk) {
1655
0
    std::vector<DataDir*> data_dirs = _engine.get_stores();
1656
0
    std::map<int64_t, Partition> partitions;
1657
0
    {
1658
        // When drop tablet, '_partitions_lock' is locked in 'tablet_shard_lock'.
1659
        // To avoid locking 'tablet_shard_lock' in '_partitions_lock', we lock and
1660
        // copy _partitions here.
1661
0
        std::shared_lock rdlock(_partitions_lock);
1662
0
        partitions = _partitions;
1663
0
    }
1664
1665
0
    for (const auto& [partition_id, partition] : partitions) {
1666
0
        std::map<DataDir*, int64_t> tablets_num;
1667
0
        std::map<DataDir*, std::vector<TabletSize>> tablets_info;
1668
0
        for (auto* data_dir : data_dirs) {
1669
0
            tablets_num[data_dir] = 0;
1670
0
        }
1671
1672
0
        for (const auto& tablet_info : partition.tablets) {
1673
            // get_tablet() will hold 'tablet_shard_lock'
1674
0
            TabletSharedPtr tablet = get_tablet(tablet_info.tablet_id);
1675
0
            if (tablet == nullptr) {
1676
0
                continue;
1677
0
            }
1678
0
            DataDir* data_dir = tablet->data_dir();
1679
0
            size_t tablet_footprint = tablet->tablet_footprint();
1680
0
            tablets_num[data_dir]++;
1681
0
            TabletSize tablet_size(tablet_info.tablet_id, tablet_footprint);
1682
0
            tablets_info[data_dir].push_back(tablet_size);
1683
0
        }
1684
0
        tablets_num_on_disk[partition_id] = tablets_num;
1685
0
        tablets_info_on_disk[partition_id] = tablets_info;
1686
0
    }
1687
0
}
1688
1689
struct SortCtx {
1690
    SortCtx(TabletSharedPtr tablet, RowsetSharedPtr rowset, int64_t cooldown_timestamp,
1691
            int64_t file_size)
1692
0
            : tablet(tablet), cooldown_timestamp(cooldown_timestamp), file_size(file_size) {}
1693
    TabletSharedPtr tablet;
1694
    RowsetSharedPtr rowset;
1695
    // to ensure the tablet with -1 would always be greater than other
1696
    uint64_t cooldown_timestamp;
1697
    int64_t file_size;
1698
0
    bool operator<(const SortCtx& other) const {
1699
0
        if (this->cooldown_timestamp == other.cooldown_timestamp) {
1700
0
            return this->file_size > other.file_size;
1701
0
        }
1702
0
        return this->cooldown_timestamp < other.cooldown_timestamp;
1703
0
    }
1704
};
1705
1706
void TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets,
1707
                                         std::vector<RowsetSharedPtr>* rowsets,
1708
247
                                         std::function<bool(const TabletSharedPtr&)> skip_tablet) {
1709
247
    std::vector<SortCtx> sort_ctx_vec;
1710
247
    std::vector<std::weak_ptr<Tablet>> candidates;
1711
3.75M
    for_each_tablet([&](const TabletSharedPtr& tablet) { candidates.emplace_back(tablet); },
1712
247
                    filter_all_tablets);
1713
3.75M
    auto get_cooldown_tablet = [&sort_ctx_vec, &skip_tablet](std::weak_ptr<Tablet>& t) {
1714
3.75M
        const TabletSharedPtr& tablet = t.lock();
1715
3.75M
        RowsetSharedPtr rowset = nullptr;
1716
3.75M
        if (UNLIKELY(nullptr == tablet)) {
1717
0
            return;
1718
0
        }
1719
3.75M
        int64_t cooldown_timestamp = -1;
1720
3.75M
        size_t file_size = -1;
1721
3.75M
        if (!skip_tablet(tablet) &&
1722
3.75M
            (rowset = tablet->need_cooldown(&cooldown_timestamp, &file_size))) {
1723
0
            sort_ctx_vec.emplace_back(tablet, rowset, cooldown_timestamp, file_size);
1724
0
        }
1725
3.75M
    };
1726
247
    std::for_each(candidates.begin(), candidates.end(), get_cooldown_tablet);
1727
1728
247
    std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end());
1729
1730
247
    for (SortCtx& ctx : sort_ctx_vec) {
1731
0
        VLOG_DEBUG << "get cooldown tablet: " << ctx.tablet->tablet_id();
1732
0
        tablets->push_back(std::move(ctx.tablet));
1733
0
        rowsets->push_back(std::move(ctx.rowset));
1734
0
    }
1735
247
}
1736
1737
0
void TabletManager::get_all_tablets_storage_format(TCheckStorageFormatResult* result) {
1738
0
    DCHECK(result != nullptr);
1739
0
    auto handler = [result](const TabletSharedPtr& tablet) {
1740
0
        if (tablet->all_beta()) {
1741
0
            result->v2_tablets.push_back(tablet->tablet_id());
1742
0
        } else {
1743
0
            result->v1_tablets.push_back(tablet->tablet_id());
1744
0
        }
1745
0
    };
1746
1747
0
    for_each_tablet(handler, filter_all_tablets);
1748
0
    result->__isset.v1_tablets = true;
1749
0
    result->__isset.v2_tablets = true;
1750
0
}
1751
1752
0
std::set<int64_t> TabletManager::check_all_tablet_segment(bool repair) {
1753
0
    std::set<int64_t> bad_tablets;
1754
0
    std::map<int64_t, std::vector<int64_t>> repair_shard_bad_tablets;
1755
0
    auto handler = [&](const TabletSharedPtr& tablet) {
1756
0
        if (!tablet->check_all_rowset_segment()) {
1757
0
            int64_t tablet_id = tablet->tablet_id();
1758
0
            bad_tablets.insert(tablet_id);
1759
0
            if (repair) {
1760
0
                repair_shard_bad_tablets[tablet_id & _tablets_shards_mask].push_back(tablet_id);
1761
0
            }
1762
0
        }
1763
0
    };
1764
0
    for_each_tablet(handler, filter_all_tablets);
1765
1766
0
    for (const auto& [shard_index, shard_tablets] : repair_shard_bad_tablets) {
1767
0
        auto& tablets_shard = _tablets_shards[shard_index];
1768
0
        auto& tablet_map = tablets_shard.tablet_map;
1769
0
        std::lock_guard<std::shared_mutex> wrlock(tablets_shard.lock);
1770
0
        for (auto tablet_id : shard_tablets) {
1771
0
            auto it = tablet_map.find(tablet_id);
1772
0
            if (it == tablet_map.end()) {
1773
0
                bad_tablets.erase(tablet_id);
1774
0
                LOG(WARNING) << "Bad tablet has be removed. tablet_id=" << tablet_id;
1775
0
            } else {
1776
0
                const auto& tablet = it->second;
1777
0
                static_cast<void>(tablet->set_tablet_state(TABLET_SHUTDOWN));
1778
0
                tablet->save_meta();
1779
0
                {
1780
0
                    std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(
1781
0
                            _shutdown_tablets_lock);
1782
0
                    _shutdown_tablets.push_back(tablet);
1783
0
                }
1784
0
                LOG(WARNING) << "There are some segments lost, set tablet to shutdown state."
1785
0
                             << "tablet_id=" << tablet->tablet_id()
1786
0
                             << ", tablet_path=" << tablet->tablet_path();
1787
0
            }
1788
0
        }
1789
0
    }
1790
1791
0
    return bad_tablets;
1792
0
}
1793
1794
bool TabletManager::update_tablet_partition_id(::doris::TPartitionId partition_id,
1795
0
                                               ::doris::TTabletId tablet_id) {
1796
0
    std::shared_lock rdlock(_get_tablets_shard_lock(tablet_id));
1797
0
    TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id);
1798
0
    if (tablet == nullptr) {
1799
0
        LOG(WARNING) << "get tablet err partition_id: " << partition_id
1800
0
                     << " tablet_id:" << tablet_id;
1801
0
        return false;
1802
0
    }
1803
0
    _remove_tablet_from_partition(tablet);
1804
0
    auto st = tablet->tablet_meta()->set_partition_id(partition_id);
1805
0
    if (!st.ok()) {
1806
0
        LOG(WARNING) << "set partition id err partition_id: " << partition_id
1807
0
                     << " tablet_id:" << tablet_id;
1808
0
        return false;
1809
0
    }
1810
0
    _add_tablet_to_partition(tablet);
1811
0
    return true;
1812
0
}
1813
1814
void TabletManager::get_topn_tablet_delete_bitmap_score(
1815
13
        uint64_t* max_delete_bitmap_score, uint64_t* max_base_rowset_delete_bitmap_score) {
1816
13
    int64_t max_delete_bitmap_score_tablet_id = 0;
1817
13
    int64_t max_base_rowset_delete_bitmap_score_tablet_id = 0;
1818
13
    OlapStopWatch watch;
1819
13
    uint64_t total_delete_map_count = 0;
1820
13
    int n = config::check_tablet_delete_bitmap_score_top_n;
1821
13
    std::vector<std::pair<std::shared_ptr<Tablet>, int64_t>> buf;
1822
13
    buf.reserve(n + 1);
1823
18.8k
    auto handler = [&](const TabletSharedPtr& tablet) {
1824
18.8k
        uint64_t delete_bitmap_count =
1825
18.8k
                tablet->tablet_meta()->delete_bitmap().get_delete_bitmap_count();
1826
18.8k
        total_delete_map_count += delete_bitmap_count;
1827
18.8k
        if (delete_bitmap_count > *max_delete_bitmap_score) {
1828
33
            max_delete_bitmap_score_tablet_id = tablet->tablet_id();
1829
33
            *max_delete_bitmap_score = delete_bitmap_count;
1830
33
        }
1831
18.8k
        buf.emplace_back(std::move(tablet), delete_bitmap_count);
1832
377k
        std::sort(buf.begin(), buf.end(), [](auto& a, auto& b) { return a.second > b.second; });
1833
18.8k
        if (buf.size() > n) {
1834
18.7k
            buf.pop_back();
1835
18.7k
        }
1836
18.8k
    };
1837
13
    for_each_tablet(handler, filter_all_tablets);
1838
130
    for (auto& [t, _] : buf) {
1839
130
        t->get_base_rowset_delete_bitmap_count(max_base_rowset_delete_bitmap_score,
1840
130
                                               &max_base_rowset_delete_bitmap_score_tablet_id);
1841
130
    }
1842
13
    std::stringstream ss;
1843
130
    for (auto& i : buf) {
1844
130
        ss << i.first->tablet_id() << ": " << i.second << ", ";
1845
130
    }
1846
    LOG(INFO) << "get_topn_tablet_delete_bitmap_score, n=" << n
1847
13
              << ", tablet size=" << _tablets_shards.size()
1848
13
              << ", total_delete_map_count=" << total_delete_map_count
1849
13
              << ", cost(us)=" << watch.get_elapse_time_us()
1850
13
              << ", max_delete_bitmap_score=" << *max_delete_bitmap_score
1851
13
              << ", max_delete_bitmap_score_tablet_id=" << max_delete_bitmap_score_tablet_id
1852
13
              << ", max_base_rowset_delete_bitmap_score=" << *max_base_rowset_delete_bitmap_score
1853
13
              << ", max_base_rowset_delete_bitmap_score_tablet_id="
1854
13
              << max_base_rowset_delete_bitmap_score_tablet_id << ", tablets=[" << ss.str() << "]";
1855
13
}
1856
1857
} // end namespace doris