Coverage Report

Created: 2025-07-31 14:33

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <deque>
34
#include <initializer_list>
35
#include <numeric>
36
#include <string>
37
#include <string_view>
38
#include <utility>
39
40
#include "common/defer.h"
41
#include "common/stopwatch.h"
42
#include "meta-service/meta_service.h"
43
#include "meta-service/meta_service_helper.h"
44
#include "meta-service/meta_service_schema.h"
45
#include "meta-store/blob_message.h"
46
#include "meta-store/txn_kv.h"
47
#include "meta-store/txn_kv_error.h"
48
#include "meta-store/versioned_value.h"
49
#include "recycler/checker.h"
50
#include "recycler/hdfs_accessor.h"
51
#include "recycler/s3_accessor.h"
52
#include "recycler/storage_vault_accessor.h"
53
#ifdef UNIT_TEST
54
#include "../test/mock_accessor.h"
55
#endif
56
#include "common/bvars.h"
57
#include "common/config.h"
58
#include "common/encryption_util.h"
59
#include "common/logging.h"
60
#include "common/simple_thread_pool.h"
61
#include "common/util.h"
62
#include "cpp/sync_point.h"
63
#include "meta-store/keys.h"
64
#include "recycler/recycler_service.h"
65
#include "recycler/sync_executor.h"
66
#include "recycler/util.h"
67
68
namespace doris::cloud {
69
70
using namespace std::chrono;
71
72
RecyclerMetricsContext tablet_metrics_context_("global_recycler", "recycle_tablet");
73
RecyclerMetricsContext segment_metrics_context_("global_recycler", "recycle_segment");
74
75
// return 0 for success get a key, 1 for key not found, negative for error
76
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
77
0
    std::unique_ptr<Transaction> txn;
78
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
79
0
    if (err != TxnErrorCode::TXN_OK) {
80
0
        return -1;
81
0
    }
82
0
    switch (txn->get(key, &val, true)) {
83
0
    case TxnErrorCode::TXN_OK:
84
0
        return 0;
85
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
86
0
        return 1;
87
0
    default:
88
0
        return -1;
89
0
    };
90
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
91
92
// 0 for success, negative for error
93
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
94
304
                   std::unique_ptr<RangeGetIterator>& it) {
95
304
    std::unique_ptr<Transaction> txn;
96
304
    TxnErrorCode err = txn_kv->create_txn(&txn);
97
304
    if (err != TxnErrorCode::TXN_OK) {
98
0
        return -1;
99
0
    }
100
304
    switch (txn->get(begin, end, &it, true)) {
101
304
    case TxnErrorCode::TXN_OK:
102
304
        return 0;
103
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
104
0
        return 1;
105
0
    default:
106
0
        return -1;
107
304
    };
108
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
94
285
                   std::unique_ptr<RangeGetIterator>& it) {
95
285
    std::unique_ptr<Transaction> txn;
96
285
    TxnErrorCode err = txn_kv->create_txn(&txn);
97
285
    if (err != TxnErrorCode::TXN_OK) {
98
0
        return -1;
99
0
    }
100
285
    switch (txn->get(begin, end, &it, true)) {
101
285
    case TxnErrorCode::TXN_OK:
102
285
        return 0;
103
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
104
0
        return 1;
105
0
    default:
106
0
        return -1;
107
285
    };
108
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
94
19
                   std::unique_ptr<RangeGetIterator>& it) {
95
19
    std::unique_ptr<Transaction> txn;
96
19
    TxnErrorCode err = txn_kv->create_txn(&txn);
97
19
    if (err != TxnErrorCode::TXN_OK) {
98
0
        return -1;
99
0
    }
100
19
    switch (txn->get(begin, end, &it, true)) {
101
19
    case TxnErrorCode::TXN_OK:
102
19
        return 0;
103
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
104
0
        return 1;
105
0
    default:
106
0
        return -1;
107
19
    };
108
0
}
109
110
// return 0 for success otherwise error
111
10
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
112
10
    std::unique_ptr<Transaction> txn;
113
10
    TxnErrorCode err = txn_kv->create_txn(&txn);
114
10
    if (err != TxnErrorCode::TXN_OK) {
115
0
        return -1;
116
0
    }
117
3.04k
    for (auto k : keys) {
118
3.04k
        txn->remove(k);
119
3.04k
    }
120
10
    switch (txn->commit()) {
121
10
    case TxnErrorCode::TXN_OK:
122
10
        return 0;
123
0
    case TxnErrorCode::TXN_CONFLICT:
124
0
        return -1;
125
0
    default:
126
0
        return -1;
127
10
    }
128
10
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
111
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
112
6
    std::unique_ptr<Transaction> txn;
113
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
114
6
    if (err != TxnErrorCode::TXN_OK) {
115
0
        return -1;
116
0
    }
117
3.02k
    for (auto k : keys) {
118
3.02k
        txn->remove(k);
119
3.02k
    }
120
6
    switch (txn->commit()) {
121
6
    case TxnErrorCode::TXN_OK:
122
6
        return 0;
123
0
    case TxnErrorCode::TXN_CONFLICT:
124
0
        return -1;
125
0
    default:
126
0
        return -1;
127
6
    }
128
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
111
4
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
112
4
    std::unique_ptr<Transaction> txn;
113
4
    TxnErrorCode err = txn_kv->create_txn(&txn);
114
4
    if (err != TxnErrorCode::TXN_OK) {
115
0
        return -1;
116
0
    }
117
21
    for (auto k : keys) {
118
21
        txn->remove(k);
119
21
    }
120
4
    switch (txn->commit()) {
121
4
    case TxnErrorCode::TXN_OK:
122
4
        return 0;
123
0
    case TxnErrorCode::TXN_CONFLICT:
124
0
        return -1;
125
0
    default:
126
0
        return -1;
127
4
    }
128
4
}
129
130
// return 0 for success otherwise error
131
37
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
132
37
    std::unique_ptr<Transaction> txn;
133
37
    TxnErrorCode err = txn_kv->create_txn(&txn);
134
37
    if (err != TxnErrorCode::TXN_OK) {
135
0
        return -1;
136
0
    }
137
7.02k
    for (auto& k : keys) {
138
7.02k
        txn->remove(k);
139
7.02k
    }
140
37
    switch (txn->commit()) {
141
37
    case TxnErrorCode::TXN_OK:
142
37
        return 0;
143
0
    case TxnErrorCode::TXN_CONFLICT:
144
0
        return -1;
145
0
    default:
146
0
        return -1;
147
37
    }
148
37
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
131
34
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
132
34
    std::unique_ptr<Transaction> txn;
133
34
    TxnErrorCode err = txn_kv->create_txn(&txn);
134
34
    if (err != TxnErrorCode::TXN_OK) {
135
0
        return -1;
136
0
    }
137
7.02k
    for (auto& k : keys) {
138
7.02k
        txn->remove(k);
139
7.02k
    }
140
34
    switch (txn->commit()) {
141
34
    case TxnErrorCode::TXN_OK:
142
34
        return 0;
143
0
    case TxnErrorCode::TXN_CONFLICT:
144
0
        return -1;
145
0
    default:
146
0
        return -1;
147
34
    }
148
34
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
131
3
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
132
3
    std::unique_ptr<Transaction> txn;
133
3
    TxnErrorCode err = txn_kv->create_txn(&txn);
134
3
    if (err != TxnErrorCode::TXN_OK) {
135
0
        return -1;
136
0
    }
137
3
    for (auto& k : keys) {
138
0
        txn->remove(k);
139
0
    }
140
3
    switch (txn->commit()) {
141
3
    case TxnErrorCode::TXN_OK:
142
3
        return 0;
143
0
    case TxnErrorCode::TXN_CONFLICT:
144
0
        return -1;
145
0
    default:
146
0
        return -1;
147
3
    }
148
3
}
149
150
// return 0 for success otherwise error
151
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
152
0
                                       std::string_view end) {
153
0
    std::unique_ptr<Transaction> txn;
154
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
155
0
    if (err != TxnErrorCode::TXN_OK) {
156
0
        return -1;
157
0
    }
158
0
    txn->remove(begin, end);
159
0
    switch (txn->commit()) {
160
0
    case TxnErrorCode::TXN_OK:
161
0
        return 0;
162
0
    case TxnErrorCode::TXN_CONFLICT:
163
0
        return -1;
164
0
    default:
165
0
        return -1;
166
0
    }
167
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
168
169
void scan_restore_job_rowset(
170
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
171
        std::string& msg,
172
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
173
174
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
175
                                      int64_t num_scanned, int64_t num_recycled,
176
49
                                      int64_t start_time) {
177
49
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
178
0
        int64_t cost =
179
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
180
0
        if (cost > config::recycle_task_threshold_seconds) {
181
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
182
0
                    .tag("instance_id", instance_id)
183
0
                    .tag("task", task_name)
184
0
                    .tag("num_scanned", num_scanned)
185
0
                    .tag("num_recycled", num_recycled);
186
0
        }
187
0
    }
188
49
    return;
189
49
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
176
47
                                      int64_t start_time) {
177
47
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
178
0
        int64_t cost =
179
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
180
0
        if (cost > config::recycle_task_threshold_seconds) {
181
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
182
0
                    .tag("instance_id", instance_id)
183
0
                    .tag("task", task_name)
184
0
                    .tag("num_scanned", num_scanned)
185
0
                    .tag("num_recycled", num_recycled);
186
0
        }
187
0
    }
188
47
    return;
189
47
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
176
2
                                      int64_t start_time) {
177
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
178
0
        int64_t cost =
179
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
180
0
        if (cost > config::recycle_task_threshold_seconds) {
181
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
182
0
                    .tag("instance_id", instance_id)
183
0
                    .tag("task", task_name)
184
0
                    .tag("num_scanned", num_scanned)
185
0
                    .tag("num_recycled", num_recycled);
186
0
        }
187
0
    }
188
2
    return;
189
2
}
190
191
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
192
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
193
194
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
195
4
                                                               "s3_producer_pool");
196
4
    s3_producer_pool->start();
197
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
198
4
                                                                  "recycle_tablet_pool");
199
4
    recycle_tablet_pool->start();
200
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
201
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
202
4
    group_recycle_function_pool->start();
203
4
    _thread_pool_group =
204
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
205
4
                                    std::move(group_recycle_function_pool));
206
207
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_);
208
4
}
209
210
4
Recycler::~Recycler() {
211
4
    if (!stopped()) {
212
0
        stop();
213
0
    }
214
4
}
215
216
4
void Recycler::instance_scanner_callback() {
217
    // sleep 60 seconds before scheduling for the launch procedure to complete:
218
    // some bad hdfs connection may cause some log to stdout stderr
219
    // which may pollute .out file and affect the script to check success
220
4
    std::this_thread::sleep_for(
221
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
222
8
    while (!stopped()) {
223
4
        std::vector<InstanceInfoPB> instances;
224
4
        get_all_instances(txn_kv_.get(), instances);
225
        // TODO(plat1ko): delete job recycle kv of non-existent instances
226
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
227
4
            std::stringstream ss;
228
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
229
4
            return ss.str();
230
4
        }();
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
226
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
227
4
            std::stringstream ss;
228
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
229
4
            return ss.str();
230
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
231
4
        if (!instances.empty()) {
232
            // enqueue instances
233
3
            std::lock_guard lock(mtx_);
234
30
            for (auto& instance : instances) {
235
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
236
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
237
                // skip instance already in pending queue
238
30
                if (success) {
239
30
                    pending_instance_queue_.push_back(std::move(instance));
240
30
                }
241
30
            }
242
3
            pending_instance_cond_.notify_all();
243
3
        }
244
4
        {
245
4
            std::unique_lock lock(mtx_);
246
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
247
7
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
247
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
248
4
        }
249
4
    }
250
4
}
251
252
8
void Recycler::recycle_callback() {
253
38
    while (!stopped()) {
254
37
        InstanceInfoPB instance;
255
37
        {
256
37
            std::unique_lock lock(mtx_);
257
37
            pending_instance_cond_.wait(
258
50
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
258
50
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
259
37
            if (stopped()) {
260
7
                return;
261
7
            }
262
30
            instance = std::move(pending_instance_queue_.front());
263
30
            pending_instance_queue_.pop_front();
264
30
            pending_instance_set_.erase(instance.instance_id());
265
30
        }
266
0
        auto& instance_id = instance.instance_id();
267
30
        {
268
30
            std::lock_guard lock(mtx_);
269
            // skip instance in recycling
270
30
            if (recycling_instance_map_.count(instance_id)) continue;
271
30
        }
272
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
273
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
274
275
30
        if (int r = instance_recycler->init(); r != 0) {
276
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
277
0
                         << " ret=" << r;
278
0
            continue;
279
0
        }
280
30
        std::string recycle_job_key;
281
30
        job_recycle_key({instance_id}, &recycle_job_key);
282
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
283
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
284
30
        if (ret != 0) { // Prepare failed
285
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
286
20
                         << " ret=" << ret;
287
20
            continue;
288
20
        } else {
289
10
            std::lock_guard lock(mtx_);
290
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
291
10
        }
292
10
        if (stopped()) return;
293
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
294
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
295
10
        g_bvar_recycler_instance_recycle_task_concurrency << 1;
296
10
        g_bvar_recycler_instance_running_counter << 1;
297
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
298
10
        tablet_metrics_context_.reset();
299
10
        segment_metrics_context_.reset();
300
10
        ret = instance_recycler->do_recycle();
301
10
        tablet_metrics_context_.finish_report();
302
10
        segment_metrics_context_.finish_report();
303
10
        g_bvar_recycler_instance_recycle_task_concurrency << -1;
304
10
        g_bvar_recycler_instance_running_counter << -1;
305
        // If instance recycler has been aborted, don't finish this job
306
10
        if (!instance_recycler->stopped()) {
307
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
308
10
                                        ret == 0, ctime_ms);
309
10
        }
310
10
        {
311
10
            std::lock_guard lock(mtx_);
312
10
            recycling_instance_map_.erase(instance_id);
313
10
        }
314
315
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
316
10
        auto elpased_ms = now - ctime_ms;
317
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
318
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
319
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
320
10
                                             now + config::recycle_interval_seconds * 1000);
321
10
        LOG(INFO) << "recycle instance done, "
322
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
323
10
                  << " now: " << now;
324
325
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
326
327
10
        LOG_WARNING("finish recycle instance")
328
10
                .tag("instance_id", instance_id)
329
10
                .tag("cost_ms", elpased_ms);
330
10
    }
331
8
}
332
333
4
void Recycler::lease_recycle_jobs() {
334
54
    while (!stopped()) {
335
50
        std::vector<std::string> instances;
336
50
        instances.reserve(recycling_instance_map_.size());
337
50
        {
338
50
            std::lock_guard lock(mtx_);
339
50
            for (auto& [id, _] : recycling_instance_map_) {
340
30
                instances.push_back(id);
341
30
            }
342
50
        }
343
50
        for (auto& i : instances) {
344
30
            std::string recycle_job_key;
345
30
            job_recycle_key({i}, &recycle_job_key);
346
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
347
30
            if (ret == 1) {
348
0
                std::lock_guard lock(mtx_);
349
0
                if (auto it = recycling_instance_map_.find(i);
350
0
                    it != recycling_instance_map_.end()) {
351
0
                    it->second->stop();
352
0
                }
353
0
            }
354
30
        }
355
50
        {
356
50
            std::unique_lock lock(mtx_);
357
50
            notifier_.wait_for(lock,
358
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
359
100
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
359
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
360
50
        }
361
50
    }
362
4
}
363
364
4
void Recycler::check_recycle_tasks() {
365
7
    while (!stopped()) {
366
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
367
3
        {
368
3
            std::lock_guard lock(mtx_);
369
3
            recycling_instance_map = recycling_instance_map_;
370
3
        }
371
3
        for (auto& entry : recycling_instance_map) {
372
0
            entry.second->check_recycle_tasks();
373
0
        }
374
375
3
        std::unique_lock lock(mtx_);
376
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
377
6
                           [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
377
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
378
3
    }
379
4
}
380
381
4
int Recycler::start(brpc::Server* server) {
382
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
383
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
384
4
    S3Environment::getInstance();
385
386
4
    if (config::enable_checker) {
387
0
        checker_ = std::make_unique<Checker>(txn_kv_);
388
0
        int ret = checker_->start();
389
0
        std::string msg;
390
0
        if (ret != 0) {
391
0
            msg = "failed to start checker";
392
0
            LOG(ERROR) << msg;
393
0
            std::cerr << msg << std::endl;
394
0
            return ret;
395
0
        }
396
0
        msg = "checker started";
397
0
        LOG(INFO) << msg;
398
0
        std::cout << msg << std::endl;
399
0
    }
400
401
4
    if (server) {
402
        // Add service
403
1
        auto recycler_service =
404
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
405
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
406
1
    }
407
408
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
408
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
409
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
410
8
        workers_.emplace_back([this] { recycle_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
410
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
411
8
    }
412
413
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
414
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
415
4
    return 0;
416
4
}
417
418
4
void Recycler::stop() {
419
4
    stopped_ = true;
420
4
    notifier_.notify_all();
421
4
    pending_instance_cond_.notify_all();
422
4
    {
423
4
        std::lock_guard lock(mtx_);
424
4
        for (auto& [_, recycler] : recycling_instance_map_) {
425
0
            recycler->stop();
426
0
        }
427
4
    }
428
20
    for (auto& w : workers_) {
429
20
        if (w.joinable()) w.join();
430
20
    }
431
4
    if (checker_) {
432
0
        checker_->stop();
433
0
    }
434
4
}
435
436
class InstanceRecycler::InvertedIndexIdCache {
437
public:
438
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
439
89
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
440
441
    // Return 0 if success, 1 if schema kv not found, negative for error
442
3.55k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
443
3.55k
        {
444
3.55k
            std::lock_guard lock(mtx_);
445
3.55k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
446
644
                return 0;
447
644
            }
448
2.91k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
449
2.91k
                it != inverted_index_id_map_.end()) {
450
2.37k
                res = it->second;
451
2.37k
                return 0;
452
2.37k
            }
453
2.91k
        }
454
        // Get schema from kv
455
        // TODO(plat1ko): Single flight
456
533
        std::unique_ptr<Transaction> txn;
457
533
        TxnErrorCode err = txn_kv_->create_txn(&txn);
458
533
        if (err != TxnErrorCode::TXN_OK) {
459
0
            LOG(WARNING) << "failed to create txn, err=" << err;
460
0
            return -1;
461
0
        }
462
533
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
463
533
        ValueBuf val_buf;
464
533
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
465
533
        if (err != TxnErrorCode::TXN_OK) {
466
500
            LOG(WARNING) << "failed to get schema, err=" << err;
467
500
            return static_cast<int>(err);
468
500
        }
469
33
        doris::TabletSchemaCloudPB schema;
470
33
        if (!parse_schema_value(val_buf, &schema)) {
471
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
472
0
            return -1;
473
0
        }
474
33
        if (schema.index_size() > 0) {
475
27
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
476
27
            if (schema.has_inverted_index_storage_format()) {
477
24
                index_format = schema.inverted_index_storage_format();
478
24
            }
479
27
            res.first = index_format;
480
27
            res.second.reserve(schema.index_size());
481
63
            for (auto& i : schema.index()) {
482
63
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
483
63
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
484
63
                }
485
63
            }
486
27
        }
487
33
        insert(index_id, schema_version, res);
488
33
        return 0;
489
33
    }
490
491
    // Empty `ids` means this schema has no inverted index
492
33
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
493
33
        if (index_info.second.empty()) {
494
6
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
495
6
            std::lock_guard lock(mtx_);
496
6
            schemas_without_inverted_index_.emplace(index_id, schema_version);
497
27
        } else {
498
27
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
499
27
            std::lock_guard lock(mtx_);
500
27
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
501
27
        }
502
33
    }
503
504
private:
505
    std::string instance_id_;
506
    std::shared_ptr<TxnKv> txn_kv_;
507
508
    std::mutex mtx_;
509
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
510
    struct HashOfKey {
511
6.49k
        size_t operator()(const Key& key) const {
512
6.49k
            size_t seed = 0;
513
6.49k
            seed = std::hash<int64_t> {}(key.first);
514
6.49k
            seed = std::hash<int32_t> {}(key.second);
515
6.49k
            return seed;
516
6.49k
        }
517
    };
518
    // <index_id, schema_version> -> inverted_index_ids
519
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
520
    // Store <index_id, schema_version> of schema which doesn't have inverted index
521
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
522
};
523
524
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
525
                                   RecyclerThreadPoolGroup thread_pool_group,
526
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
527
        : txn_kv_(std::move(txn_kv)),
528
          instance_id_(instance.instance_id()),
529
          instance_info_(instance),
530
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
531
          _thread_pool_group(std::move(thread_pool_group)),
532
89
          txn_lazy_committer_(std::move(txn_lazy_committer)) {};
533
534
89
InstanceRecycler::~InstanceRecycler() = default;
535
536
89
int InstanceRecycler::init_obj_store_accessors() {
537
89
    for (const auto& obj_info : instance_info_.obj_info()) {
538
68
#ifdef UNIT_TEST
539
68
        auto accessor = std::make_shared<MockAccessor>();
540
#else
541
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
542
        if (!s3_conf) {
543
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
544
            return -1;
545
        }
546
547
        std::shared_ptr<S3Accessor> accessor;
548
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
549
        if (ret != 0) {
550
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
551
                         << " resource_id=" << obj_info.id();
552
            return ret;
553
        }
554
#endif
555
68
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
556
68
    }
557
558
89
    return 0;
559
89
}
560
561
89
int InstanceRecycler::init_storage_vault_accessors() {
562
89
    if (instance_info_.resource_ids().empty()) {
563
82
        return 0;
564
82
    }
565
566
7
    FullRangeGetOptions opts(txn_kv_);
567
7
    opts.prefetch = true;
568
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
569
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
570
571
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
572
18
        auto [k, v] = *kv;
573
18
        StorageVaultPB vault;
574
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
575
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
576
0
            return -1;
577
0
        }
578
18
        std::string recycler_storage_vault_white_list = accumulate(
579
18
                config::recycler_storage_vault_white_list.begin(),
580
18
                config::recycler_storage_vault_white_list.end(), std::string(),
581
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
581
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
582
18
        LOG_INFO("config::recycler_storage_vault_white_list")
583
18
                .tag("", recycler_storage_vault_white_list);
584
18
        if (!config::recycler_storage_vault_white_list.empty()) {
585
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
586
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
587
8
                it == config::recycler_storage_vault_white_list.end()) {
588
2
                LOG_WARNING(
589
2
                        "failed to init accessor for vault because this vault is not in "
590
2
                        "config::recycler_storage_vault_white_list. ")
591
2
                        .tag(" vault name:", vault.name())
592
2
                        .tag(" config::recycler_storage_vault_white_list:",
593
2
                             recycler_storage_vault_white_list);
594
2
                continue;
595
2
            }
596
8
        }
597
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
598
16
                                 &accessor_map_, &vault);
599
16
        if (vault.has_hdfs_info()) {
600
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
601
9
            int ret = accessor->init();
602
9
            if (ret != 0) {
603
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
604
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
605
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
606
4
                continue;
607
4
            }
608
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
609
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
610
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
611
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
612
7
        } else if (vault.has_obj_info()) {
613
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
614
7
            if (!s3_conf) {
615
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
616
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
617
1
                continue;
618
1
            }
619
620
6
            std::shared_ptr<S3Accessor> accessor;
621
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
622
6
            if (ret != 0) {
623
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
624
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
625
0
                             << " ret=" << ret
626
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
627
0
                continue;
628
0
            }
629
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
630
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
631
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
632
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
633
6
        }
634
16
    }
635
636
7
    if (!it->is_valid()) {
637
0
        LOG_WARNING("failed to get storage vault kv");
638
0
        return -1;
639
0
    }
640
641
7
    if (accessor_map_.empty()) {
642
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
643
1
        return -2;
644
1
    }
645
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
646
6
             instance_id_);
647
648
6
    return 0;
649
7
}
650
651
89
int InstanceRecycler::init() {
652
89
    int ret = init_obj_store_accessors();
653
89
    if (ret != 0) {
654
0
        return ret;
655
0
    }
656
657
89
    return init_storage_vault_accessors();
658
89
}
659
660
template <typename... Func>
661
100
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
100
    return [funcs...]() {
663
100
        return [](std::initializer_list<int> ret_vals) {
664
100
            int i = 0;
665
120
            for (int ret : ret_vals) {
666
120
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
120
            }
670
100
            return i;
671
100
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
20
            for (int ret : ret_vals) {
666
20
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
20
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
20
            for (int ret : ret_vals) {
666
20
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
20
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
0
                    i = ret;
668
0
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
672
100
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
673
100
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
661
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
662
10
    return [funcs...]() {
663
10
        return [](std::initializer_list<int> ret_vals) {
664
10
            int i = 0;
665
10
            for (int ret : ret_vals) {
666
10
                if (ret != 0) {
667
10
                    i = ret;
668
10
                }
669
10
            }
670
10
            return i;
671
10
        }({funcs()...});
672
10
    };
673
10
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
674
675
10
int InstanceRecycler::do_recycle() {
676
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
677
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
678
0
        return recycle_deleted_instance();
679
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
680
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
681
10
                                        fmt::format("instance id {}", instance_id_),
682
100
                                        [](int r) { return r != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
Line
Count
Source
682
100
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
683
10
        sync_executor
684
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
685
                                   // becase they may both recycle the same set of tablets
686
                        // recycle dropped table or idexes(mv, rollup)
687
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
Line
Count
Source
687
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
688
                        // recycle dropped partitions
689
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
689
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
690
10
                .add(task_wrapper(
691
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
691
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
692
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
692
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
693
10
                .add(task_wrapper(
694
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
694
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
695
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
695
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
696
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
696
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
697
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
697
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
698
10
                .add(task_wrapper(
699
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
699
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
700
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
700
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
701
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
701
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
702
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
702
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
703
10
        bool finished = true;
704
10
        std::vector<int> rets = sync_executor.when_all(&finished);
705
100
        for (int ret : rets) {
706
100
            if (ret != 0) {
707
0
                return ret;
708
0
            }
709
100
        }
710
10
        return finished ? 0 : -1;
711
10
    } else {
712
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
713
0
                     << " instance_id=" << instance_id_;
714
0
        return -1;
715
0
    }
716
10
}
717
718
/**
719
 * 1. delete all remote data
720
 * 2. delete all kv
721
 * 3. remove instance kv
722
 */
723
1
int InstanceRecycler::recycle_deleted_instance() {
724
1
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
725
726
1
    int ret = 0;
727
1
    auto start_time = steady_clock::now();
728
729
1
    DORIS_CLOUD_DEFER {
730
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
731
1
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
732
1
                     << " recycle deleted instance, cost=" << cost
733
1
                     << "s, instance_id=" << instance_id_;
734
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
729
1
    DORIS_CLOUD_DEFER {
730
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
731
1
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
732
1
                     << " recycle deleted instance, cost=" << cost
733
1
                     << "s, instance_id=" << instance_id_;
734
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
735
736
    // delete all remote data
737
2
    for (auto& [_, accessor] : accessor_map_) {
738
2
        if (stopped()) {
739
0
            return ret;
740
0
        }
741
742
2
        LOG(INFO) << "begin to delete all objects in " << accessor->uri();
743
2
        int del_ret = accessor->delete_all();
744
2
        if (del_ret == 0) {
745
2
            LOG(INFO) << "successfully delete all objects in " << accessor->uri();
746
2
        } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
747
            // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
748
            // so the recycling has been successful.
749
0
            ret = -1;
750
0
        }
751
2
    }
752
753
1
    if (ret != 0) {
754
0
        LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
755
0
        return ret;
756
0
    }
757
758
    // delete all kv
759
1
    std::unique_ptr<Transaction> txn;
760
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
761
1
    if (err != TxnErrorCode::TXN_OK) {
762
0
        LOG(WARNING) << "failed to create txn";
763
0
        ret = -1;
764
0
        return -1;
765
0
    }
766
1
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
767
    // delete kv before deleting objects to prevent the checker from misjudging data loss
768
1
    std::string start_txn_key = txn_key_prefix(instance_id_);
769
1
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
770
1
    txn->remove(start_txn_key, end_txn_key);
771
1
    std::string start_version_key = version_key_prefix(instance_id_);
772
1
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
773
1
    txn->remove(start_version_key, end_version_key);
774
1
    std::string start_meta_key = meta_key_prefix(instance_id_);
775
1
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
776
1
    txn->remove(start_meta_key, end_meta_key);
777
1
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
778
1
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
779
1
    txn->remove(start_recycle_key, end_recycle_key);
780
1
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
781
1
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
782
1
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
783
1
    std::string start_copy_key = copy_key_prefix(instance_id_);
784
1
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
785
1
    txn->remove(start_copy_key, end_copy_key);
786
    // should not remove job key range, because we need to reserve job recycle kv
787
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
788
1
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
789
1
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
790
1
    txn->remove(start_job_tablet_key, end_job_tablet_key);
791
1
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
792
1
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
793
1
    std::string start_vault_key = storage_vault_key(key_info0);
794
1
    std::string end_vault_key = storage_vault_key(key_info1);
795
1
    txn->remove(start_vault_key, end_vault_key);
796
1
    err = txn->commit();
797
1
    if (err != TxnErrorCode::TXN_OK) {
798
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
799
0
        ret = -1;
800
0
    }
801
802
1
    if (ret == 0) {
803
        // remove instance kv
804
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
805
1
        err = txn_kv_->create_txn(&txn);
806
1
        if (err != TxnErrorCode::TXN_OK) {
807
0
            LOG(WARNING) << "failed to create txn";
808
0
            ret = -1;
809
0
            return ret;
810
0
        }
811
1
        std::string key;
812
1
        instance_key({instance_id_}, &key);
813
1
        txn->remove(key);
814
1
        err = txn->commit();
815
1
        if (err != TxnErrorCode::TXN_OK) {
816
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
817
0
                         << " err=" << err;
818
0
            ret = -1;
819
0
        }
820
1
    }
821
1
    return ret;
822
1
}
823
824
bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id,
825
3.05k
                     int64_t txn_id) {
826
3.05k
    std::unique_ptr<Transaction> txn;
827
3.05k
    TxnErrorCode err = txn_kv->create_txn(&txn);
828
3.05k
    if (err != TxnErrorCode::TXN_OK) {
829
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
830
0
        return false;
831
0
    }
832
833
3.05k
    std::string index_val;
834
3.05k
    const std::string index_key = txn_index_key({instance_id, txn_id});
835
3.05k
    err = txn->get(index_key, &index_val);
836
3.05k
    if (err != TxnErrorCode::TXN_OK) {
837
3.03k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
838
3.03k
            TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled");
839
            // txn has been recycled;
840
3.03k
            LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id
841
3.03k
                      << " instance_id=" << instance_id;
842
3.03k
            return true;
843
3.03k
        }
844
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
845
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
846
0
                     << " err=" << err;
847
0
        return false;
848
3.03k
    }
849
850
20
    TxnIndexPB index_pb;
851
20
    if (!index_pb.ParseFromString(index_val)) {
852
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
853
0
                     << " instance_id=" << instance_id;
854
0
        return false;
855
0
    }
856
857
20
    DCHECK(index_pb.has_tablet_index() == true);
858
20
    if (!index_pb.tablet_index().has_db_id()) {
859
        // In the previous version, the db_id was not set in the index_pb.
860
        // If updating to the version which enable txn lazy commit, the db_id will be set.
861
0
        LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id
862
0
                  << " index=" << index_pb.ShortDebugString();
863
0
        return true;
864
0
    }
865
866
20
    int64_t db_id = index_pb.tablet_index().db_id();
867
20
    DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id
868
0
                        << " instance_id=" << instance_id;
869
870
20
    std::string info_val;
871
20
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
872
20
    err = txn->get(info_key, &info_val);
873
20
    if (err != TxnErrorCode::TXN_OK) {
874
0
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
875
            // txn info has been recycled;
876
0
            LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id
877
0
                      << " instance_id=" << instance_id;
878
0
            return true;
879
0
        }
880
881
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
882
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
883
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
884
0
                     << " err=" << err;
885
0
        return false;
886
0
    }
887
888
20
    TxnInfoPB txn_info;
889
20
    if (!txn_info.ParseFromString(info_val)) {
890
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
891
0
                     << " instance_id=" << instance_id;
892
0
        return false;
893
0
    }
894
895
20
    DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id
896
0
                                        << " txn_info=" << txn_info.ShortDebugString();
897
898
20
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() ||
899
20
        TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) {
900
10
        TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info);
901
10
        return true;
902
10
    }
903
904
10
    TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info);
905
10
    return false;
906
20
}
907
908
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
909
4.00k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
910
4.00k
    if (config::force_immediate_recycle) {
911
0
        return 0L;
912
0
    }
913
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
914
4.00k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
915
4.00k
    int64_t retention_seconds = config::retention_seconds;
916
4.00k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
917
3.10k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
918
3.10k
    }
919
4.00k
    int64_t final_expiration = expiration + retention_seconds;
920
4.00k
    if (*earlest_ts > final_expiration) {
921
3
        *earlest_ts = final_expiration;
922
3
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
923
3
    }
924
4.00k
    return final_expiration;
925
4.00k
}
926
927
int64_t calculate_partition_expired_time(
928
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
929
8
        int64_t* earlest_ts /* partition earliest expiration ts */) {
930
8
    if (config::force_immediate_recycle) {
931
2
        return 0L;
932
2
    }
933
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
934
6
                                                            : partition_meta_pb.creation_time();
935
6
    int64_t retention_seconds = config::retention_seconds;
936
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
937
6
        retention_seconds =
938
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
939
6
    }
940
6
    int64_t final_expiration = expiration + retention_seconds;
941
6
    if (*earlest_ts > final_expiration) {
942
2
        *earlest_ts = final_expiration;
943
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
944
2
    }
945
6
    return final_expiration;
946
8
}
947
948
int64_t calculate_index_expired_time(const std::string& instance_id_,
949
                                     const RecycleIndexPB& index_meta_pb,
950
8
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
951
8
    if (config::force_immediate_recycle) {
952
2
        return 0L;
953
2
    }
954
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
955
6
                                                        : index_meta_pb.creation_time();
956
6
    int64_t retention_seconds = config::retention_seconds;
957
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
958
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
959
6
    }
960
6
    int64_t final_expiration = expiration + retention_seconds;
961
6
    if (*earlest_ts > final_expiration) {
962
2
        *earlest_ts = final_expiration;
963
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
964
2
    }
965
6
    return final_expiration;
966
8
}
967
968
int64_t calculate_tmp_rowset_expired_time(
969
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
970
3.05k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
971
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
972
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
973
    //  duration or timeout always < `retention_time` in practice.
974
3.05k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
975
3.05k
                                 ? tmp_rowset_meta_pb.txn_expiration()
976
3.05k
                                 : tmp_rowset_meta_pb.creation_time();
977
3.05k
    expiration = config::force_immediate_recycle ? 0 : expiration;
978
3.05k
    int64_t final_expiration = expiration + config::retention_seconds;
979
3.05k
    if (*earlest_ts > final_expiration) {
980
6
        *earlest_ts = final_expiration;
981
6
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
982
6
    }
983
3.05k
    return final_expiration;
984
3.05k
}
985
986
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
987
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
988
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
989
30.0k
    if (*earlest_ts > final_expiration / 1000) {
990
6
        *earlest_ts = final_expiration / 1000;
991
6
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
992
6
    }
993
30.0k
    return final_expiration;
994
30.0k
}
995
996
int64_t calculate_restore_job_expired_time(
997
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
998
20
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
999
20
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED) {
1000
20
        return 0L;
1001
20
    }
1002
0
    int64_t expiration = restore_job.expiration() > 0
1003
0
                                 ? restore_job.creation_time() + restore_job.expiration()
1004
0
                                 : restore_job.creation_time();
1005
0
    int64_t final_expiration = expiration + config::retention_seconds;
1006
0
    if (*earlest_ts > final_expiration) {
1007
0
        *earlest_ts = final_expiration;
1008
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1009
0
    }
1010
0
    return final_expiration;
1011
20
}
1012
1013
14
int InstanceRecycler::recycle_indexes() {
1014
14
    const std::string task_name = "recycle_indexes";
1015
14
    int64_t num_scanned = 0;
1016
14
    int64_t num_expired = 0;
1017
14
    int64_t num_recycled = 0;
1018
14
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1019
1020
14
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
1021
14
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
1022
14
    std::string index_key0;
1023
14
    std::string index_key1;
1024
14
    recycle_index_key(index_key_info0, &index_key0);
1025
14
    recycle_index_key(index_key_info1, &index_key1);
1026
1027
14
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
1028
1029
14
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1030
14
    register_recycle_task(task_name, start_time);
1031
1032
14
    DORIS_CLOUD_DEFER {
1033
14
        unregister_recycle_task(task_name);
1034
14
        int64_t cost =
1035
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1036
14
        metrics_context.finish_report();
1037
14
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1038
14
                .tag("instance_id", instance_id_)
1039
14
                .tag("num_scanned", num_scanned)
1040
14
                .tag("num_expired", num_expired)
1041
14
                .tag("num_recycled", num_recycled);
1042
14
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1032
12
    DORIS_CLOUD_DEFER {
1033
12
        unregister_recycle_task(task_name);
1034
12
        int64_t cost =
1035
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1036
12
        metrics_context.finish_report();
1037
12
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1038
12
                .tag("instance_id", instance_id_)
1039
12
                .tag("num_scanned", num_scanned)
1040
12
                .tag("num_expired", num_expired)
1041
12
                .tag("num_recycled", num_recycled);
1042
12
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1032
2
    DORIS_CLOUD_DEFER {
1033
2
        unregister_recycle_task(task_name);
1034
2
        int64_t cost =
1035
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1036
2
        metrics_context.finish_report();
1037
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1038
2
                .tag("instance_id", instance_id_)
1039
2
                .tag("num_scanned", num_scanned)
1040
2
                .tag("num_expired", num_expired)
1041
2
                .tag("num_recycled", num_recycled);
1042
2
    };
1043
1044
14
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1045
1046
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
1047
14
    std::vector<std::string_view> index_keys;
1048
14
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1049
8
        ++num_scanned;
1050
8
        RecycleIndexPB index_pb;
1051
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1052
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1053
0
            return -1;
1054
0
        }
1055
8
        int64_t current_time = ::time(nullptr);
1056
8
        if (current_time <
1057
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1058
0
            return 0;
1059
0
        }
1060
8
        ++num_expired;
1061
        // decode index_id
1062
8
        auto k1 = k;
1063
8
        k1.remove_prefix(1);
1064
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1065
8
        decode_key(&k1, &out);
1066
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1067
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1068
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1069
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1070
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1071
        // Change state to RECYCLING
1072
8
        std::unique_ptr<Transaction> txn;
1073
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1074
8
        if (err != TxnErrorCode::TXN_OK) {
1075
0
            LOG_WARNING("failed to create txn").tag("err", err);
1076
0
            return -1;
1077
0
        }
1078
8
        std::string val;
1079
8
        err = txn->get(k, &val);
1080
8
        if (err ==
1081
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1082
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1083
0
            return 0;
1084
0
        }
1085
8
        if (err != TxnErrorCode::TXN_OK) {
1086
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1087
0
            return -1;
1088
0
        }
1089
8
        index_pb.Clear();
1090
8
        if (!index_pb.ParseFromString(val)) {
1091
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1092
0
            return -1;
1093
0
        }
1094
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1095
7
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1096
7
            txn->put(k, index_pb.SerializeAsString());
1097
7
            err = txn->commit();
1098
7
            if (err != TxnErrorCode::TXN_OK) {
1099
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1100
0
                return -1;
1101
0
            }
1102
7
        }
1103
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1104
1
            LOG_WARNING("failed to recycle tablets under index")
1105
1
                    .tag("table_id", index_pb.table_id())
1106
1
                    .tag("instance_id", instance_id_)
1107
1
                    .tag("index_id", index_id);
1108
1
            return -1;
1109
1
        }
1110
7
        metrics_context.total_recycled_num = ++num_recycled;
1111
7
        metrics_context.report();
1112
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1113
7
        index_keys.push_back(k);
1114
7
        return 0;
1115
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1048
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1049
6
        ++num_scanned;
1050
6
        RecycleIndexPB index_pb;
1051
6
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1052
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1053
0
            return -1;
1054
0
        }
1055
6
        int64_t current_time = ::time(nullptr);
1056
6
        if (current_time <
1057
6
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1058
0
            return 0;
1059
0
        }
1060
6
        ++num_expired;
1061
        // decode index_id
1062
6
        auto k1 = k;
1063
6
        k1.remove_prefix(1);
1064
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1065
6
        decode_key(&k1, &out);
1066
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1067
6
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1068
6
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1069
6
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1070
6
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1071
        // Change state to RECYCLING
1072
6
        std::unique_ptr<Transaction> txn;
1073
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1074
6
        if (err != TxnErrorCode::TXN_OK) {
1075
0
            LOG_WARNING("failed to create txn").tag("err", err);
1076
0
            return -1;
1077
0
        }
1078
6
        std::string val;
1079
6
        err = txn->get(k, &val);
1080
6
        if (err ==
1081
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1082
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1083
0
            return 0;
1084
0
        }
1085
6
        if (err != TxnErrorCode::TXN_OK) {
1086
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1087
0
            return -1;
1088
0
        }
1089
6
        index_pb.Clear();
1090
6
        if (!index_pb.ParseFromString(val)) {
1091
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1092
0
            return -1;
1093
0
        }
1094
6
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1095
6
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1096
6
            txn->put(k, index_pb.SerializeAsString());
1097
6
            err = txn->commit();
1098
6
            if (err != TxnErrorCode::TXN_OK) {
1099
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1100
0
                return -1;
1101
0
            }
1102
6
        }
1103
6
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1104
0
            LOG_WARNING("failed to recycle tablets under index")
1105
0
                    .tag("table_id", index_pb.table_id())
1106
0
                    .tag("instance_id", instance_id_)
1107
0
                    .tag("index_id", index_id);
1108
0
            return -1;
1109
0
        }
1110
6
        metrics_context.total_recycled_num = ++num_recycled;
1111
6
        metrics_context.report();
1112
6
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1113
6
        index_keys.push_back(k);
1114
6
        return 0;
1115
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1048
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1049
2
        ++num_scanned;
1050
2
        RecycleIndexPB index_pb;
1051
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1052
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1053
0
            return -1;
1054
0
        }
1055
2
        int64_t current_time = ::time(nullptr);
1056
2
        if (current_time <
1057
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1058
0
            return 0;
1059
0
        }
1060
2
        ++num_expired;
1061
        // decode index_id
1062
2
        auto k1 = k;
1063
2
        k1.remove_prefix(1);
1064
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1065
2
        decode_key(&k1, &out);
1066
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1067
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1068
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1069
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1070
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1071
        // Change state to RECYCLING
1072
2
        std::unique_ptr<Transaction> txn;
1073
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1074
2
        if (err != TxnErrorCode::TXN_OK) {
1075
0
            LOG_WARNING("failed to create txn").tag("err", err);
1076
0
            return -1;
1077
0
        }
1078
2
        std::string val;
1079
2
        err = txn->get(k, &val);
1080
2
        if (err ==
1081
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1082
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1083
0
            return 0;
1084
0
        }
1085
2
        if (err != TxnErrorCode::TXN_OK) {
1086
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1087
0
            return -1;
1088
0
        }
1089
2
        index_pb.Clear();
1090
2
        if (!index_pb.ParseFromString(val)) {
1091
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1092
0
            return -1;
1093
0
        }
1094
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1095
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1096
1
            txn->put(k, index_pb.SerializeAsString());
1097
1
            err = txn->commit();
1098
1
            if (err != TxnErrorCode::TXN_OK) {
1099
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1100
0
                return -1;
1101
0
            }
1102
1
        }
1103
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1104
1
            LOG_WARNING("failed to recycle tablets under index")
1105
1
                    .tag("table_id", index_pb.table_id())
1106
1
                    .tag("instance_id", instance_id_)
1107
1
                    .tag("index_id", index_id);
1108
1
            return -1;
1109
1
        }
1110
1
        metrics_context.total_recycled_num = ++num_recycled;
1111
1
        metrics_context.report();
1112
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1113
1
        index_keys.push_back(k);
1114
1
        return 0;
1115
2
    };
1116
1117
14
    auto loop_done = [&index_keys, this]() -> int {
1118
4
        if (index_keys.empty()) return 0;
1119
3
        DORIS_CLOUD_DEFER {
1120
3
            index_keys.clear();
1121
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1119
2
        DORIS_CLOUD_DEFER {
1120
2
            index_keys.clear();
1121
2
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1119
1
        DORIS_CLOUD_DEFER {
1120
1
            index_keys.clear();
1121
1
        };
1122
3
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1123
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1124
0
            return -1;
1125
0
        }
1126
3
        return 0;
1127
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1117
2
    auto loop_done = [&index_keys, this]() -> int {
1118
2
        if (index_keys.empty()) return 0;
1119
2
        DORIS_CLOUD_DEFER {
1120
2
            index_keys.clear();
1121
2
        };
1122
2
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1123
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1124
0
            return -1;
1125
0
        }
1126
2
        return 0;
1127
2
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1117
2
    auto loop_done = [&index_keys, this]() -> int {
1118
2
        if (index_keys.empty()) return 0;
1119
1
        DORIS_CLOUD_DEFER {
1120
1
            index_keys.clear();
1121
1
        };
1122
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1123
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1124
0
            return -1;
1125
0
        }
1126
1
        return 0;
1127
1
    };
1128
1129
14
    if (config::enable_recycler_stats_metrics) {
1130
0
        scan_and_statistics_indexes();
1131
0
    }
1132
    // recycle_func and loop_done for scan and recycle
1133
14
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
1134
14
}
1135
1136
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
1137
332
                             int64_t tablet_id) {
1138
332
    std::unique_ptr<Transaction> txn;
1139
332
    TxnErrorCode err = txn_kv->create_txn(&txn);
1140
332
    if (err != TxnErrorCode::TXN_OK) {
1141
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
1142
0
                     << " tablet_id=" << tablet_id << " err=" << err;
1143
0
        return false;
1144
0
    }
1145
1146
332
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
1147
332
    std::string tablet_idx_val;
1148
332
    err = txn->get(tablet_idx_key, &tablet_idx_val);
1149
332
    if (TxnErrorCode::TXN_OK != err) {
1150
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
1151
0
                     << " tablet_id=" << tablet_id << " err=" << err
1152
0
                     << " key=" << hex(tablet_idx_key);
1153
0
        return false;
1154
0
    }
1155
1156
332
    TabletIndexPB tablet_idx_pb;
1157
332
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
1158
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
1159
0
                     << " tablet_id=" << tablet_id;
1160
0
        return false;
1161
0
    }
1162
1163
332
    if (!tablet_idx_pb.has_db_id()) {
1164
        // In the previous version, the db_id was not set in the index_pb.
1165
        // If updating to the version which enable txn lazy commit, the db_id will be set.
1166
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
1167
0
                  << " instance_id=" << instance_id
1168
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
1169
0
        return true;
1170
0
    }
1171
1172
332
    std::string ver_val;
1173
332
    std::string ver_key =
1174
332
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
1175
332
                                   tablet_idx_pb.partition_id()});
1176
332
    err = txn->get(ver_key, &ver_val);
1177
1178
332
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1179
202
        LOG(INFO) << ""
1180
202
                     "partition version not found, instance_id="
1181
202
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
1182
202
                  << " table_id=" << tablet_idx_pb.table_id()
1183
202
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
1184
202
                  << " key=" << hex(ver_key);
1185
202
        return true;
1186
202
    }
1187
1188
130
    if (TxnErrorCode::TXN_OK != err) {
1189
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
1190
0
                     << " db_id=" << tablet_idx_pb.db_id()
1191
0
                     << " table_id=" << tablet_idx_pb.table_id()
1192
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1193
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
1194
0
        return false;
1195
0
    }
1196
1197
130
    VersionPB version_pb;
1198
130
    if (!version_pb.ParseFromString(ver_val)) {
1199
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
1200
0
                     << " db_id=" << tablet_idx_pb.db_id()
1201
0
                     << " table_id=" << tablet_idx_pb.table_id()
1202
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1203
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
1204
0
        return false;
1205
0
    }
1206
1207
130
    if (version_pb.pending_txn_ids_size() > 0) {
1208
20
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
1209
20
        DCHECK(version_pb.pending_txn_ids_size() == 1);
1210
20
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
1211
20
                     << " db_id=" << tablet_idx_pb.db_id()
1212
20
                     << " table_id=" << tablet_idx_pb.table_id()
1213
20
                     << " partition_id=" << tablet_idx_pb.partition_id()
1214
20
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
1215
20
                     << " key=" << hex(ver_key);
1216
20
        return false;
1217
20
    }
1218
110
    return true;
1219
130
}
1220
1221
14
int InstanceRecycler::recycle_partitions() {
1222
14
    const std::string task_name = "recycle_partitions";
1223
14
    int64_t num_scanned = 0;
1224
14
    int64_t num_expired = 0;
1225
14
    int64_t num_recycled = 0;
1226
14
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1227
1228
14
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
1229
14
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
1230
14
    std::string part_key0;
1231
14
    std::string part_key1;
1232
14
    recycle_partition_key(part_key_info0, &part_key0);
1233
14
    recycle_partition_key(part_key_info1, &part_key1);
1234
1235
14
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
1236
1237
14
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1238
14
    register_recycle_task(task_name, start_time);
1239
1240
14
    DORIS_CLOUD_DEFER {
1241
14
        unregister_recycle_task(task_name);
1242
14
        int64_t cost =
1243
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1244
14
        metrics_context.finish_report();
1245
14
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1246
14
                .tag("instance_id", instance_id_)
1247
14
                .tag("num_scanned", num_scanned)
1248
14
                .tag("num_expired", num_expired)
1249
14
                .tag("num_recycled", num_recycled);
1250
14
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1240
12
    DORIS_CLOUD_DEFER {
1241
12
        unregister_recycle_task(task_name);
1242
12
        int64_t cost =
1243
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1244
12
        metrics_context.finish_report();
1245
12
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1246
12
                .tag("instance_id", instance_id_)
1247
12
                .tag("num_scanned", num_scanned)
1248
12
                .tag("num_expired", num_expired)
1249
12
                .tag("num_recycled", num_recycled);
1250
12
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1240
2
    DORIS_CLOUD_DEFER {
1241
2
        unregister_recycle_task(task_name);
1242
2
        int64_t cost =
1243
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1244
2
        metrics_context.finish_report();
1245
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1246
2
                .tag("instance_id", instance_id_)
1247
2
                .tag("num_scanned", num_scanned)
1248
2
                .tag("num_expired", num_expired)
1249
2
                .tag("num_recycled", num_recycled);
1250
2
    };
1251
1252
14
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1253
1254
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
1255
14
    std::vector<std::string_view> partition_keys;
1256
14
    std::vector<std::string> partition_version_keys;
1257
14
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1258
8
        ++num_scanned;
1259
8
        RecyclePartitionPB part_pb;
1260
8
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1261
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1262
0
            return -1;
1263
0
        }
1264
8
        int64_t current_time = ::time(nullptr);
1265
8
        if (current_time <
1266
8
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1267
0
            return 0;
1268
0
        }
1269
8
        ++num_expired;
1270
        // decode partition_id
1271
8
        auto k1 = k;
1272
8
        k1.remove_prefix(1);
1273
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1274
8
        decode_key(&k1, &out);
1275
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1276
8
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1277
8
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1278
8
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1279
8
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1280
        // Change state to RECYCLING
1281
8
        std::unique_ptr<Transaction> txn;
1282
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1283
8
        if (err != TxnErrorCode::TXN_OK) {
1284
0
            LOG_WARNING("failed to create txn").tag("err", err);
1285
0
            return -1;
1286
0
        }
1287
8
        std::string val;
1288
8
        err = txn->get(k, &val);
1289
8
        if (err ==
1290
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1291
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1292
0
            return 0;
1293
0
        }
1294
8
        if (err != TxnErrorCode::TXN_OK) {
1295
0
            LOG_WARNING("failed to get kv");
1296
0
            return -1;
1297
0
        }
1298
8
        part_pb.Clear();
1299
8
        if (!part_pb.ParseFromString(val)) {
1300
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1301
0
            return -1;
1302
0
        }
1303
        // Partitions with PREPARED state MUST have no data
1304
8
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1305
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1306
7
            txn->put(k, part_pb.SerializeAsString());
1307
7
            err = txn->commit();
1308
7
            if (err != TxnErrorCode::TXN_OK) {
1309
0
                LOG_WARNING("failed to commit txn: {}", err);
1310
0
                return -1;
1311
0
            }
1312
7
        }
1313
1314
8
        int ret = 0;
1315
32
        for (int64_t index_id : part_pb.index_id()) {
1316
32
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1317
1
                LOG_WARNING("failed to recycle tablets under partition")
1318
1
                        .tag("table_id", part_pb.table_id())
1319
1
                        .tag("instance_id", instance_id_)
1320
1
                        .tag("index_id", index_id)
1321
1
                        .tag("partition_id", partition_id);
1322
1
                ret = -1;
1323
1
            }
1324
32
        }
1325
8
        if (ret == 0 && part_pb.has_db_id()) {
1326
            // Recycle the versioned keys
1327
7
            std::unique_ptr<Transaction> txn;
1328
7
            err = txn_kv_->create_txn(&txn);
1329
7
            if (err != TxnErrorCode::TXN_OK) {
1330
0
                LOG_WARNING("failed to create txn").tag("err", err);
1331
0
                return -1;
1332
0
            }
1333
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1334
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1335
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1336
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1337
7
            versioned_remove_all(txn.get(), meta_key);
1338
7
            txn->remove(index_key);
1339
7
            txn->remove(inverted_index_key);
1340
7
            err = txn->commit();
1341
7
            if (err != TxnErrorCode::TXN_OK) {
1342
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1343
0
                return -1;
1344
0
            }
1345
7
        }
1346
1347
8
        if (ret == 0) {
1348
7
            ++num_recycled;
1349
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1350
7
            partition_keys.push_back(k);
1351
7
            if (part_pb.db_id() > 0) {
1352
7
                partition_version_keys.push_back(partition_version_key(
1353
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1354
7
            }
1355
7
            metrics_context.total_recycled_num = num_recycled;
1356
7
            metrics_context.report();
1357
7
        }
1358
8
        return ret;
1359
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1257
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1258
6
        ++num_scanned;
1259
6
        RecyclePartitionPB part_pb;
1260
6
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1261
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1262
0
            return -1;
1263
0
        }
1264
6
        int64_t current_time = ::time(nullptr);
1265
6
        if (current_time <
1266
6
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1267
0
            return 0;
1268
0
        }
1269
6
        ++num_expired;
1270
        // decode partition_id
1271
6
        auto k1 = k;
1272
6
        k1.remove_prefix(1);
1273
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1274
6
        decode_key(&k1, &out);
1275
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1276
6
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1277
6
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1278
6
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1279
6
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1280
        // Change state to RECYCLING
1281
6
        std::unique_ptr<Transaction> txn;
1282
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1283
6
        if (err != TxnErrorCode::TXN_OK) {
1284
0
            LOG_WARNING("failed to create txn").tag("err", err);
1285
0
            return -1;
1286
0
        }
1287
6
        std::string val;
1288
6
        err = txn->get(k, &val);
1289
6
        if (err ==
1290
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1291
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1292
0
            return 0;
1293
0
        }
1294
6
        if (err != TxnErrorCode::TXN_OK) {
1295
0
            LOG_WARNING("failed to get kv");
1296
0
            return -1;
1297
0
        }
1298
6
        part_pb.Clear();
1299
6
        if (!part_pb.ParseFromString(val)) {
1300
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1301
0
            return -1;
1302
0
        }
1303
        // Partitions with PREPARED state MUST have no data
1304
6
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1305
6
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1306
6
            txn->put(k, part_pb.SerializeAsString());
1307
6
            err = txn->commit();
1308
6
            if (err != TxnErrorCode::TXN_OK) {
1309
0
                LOG_WARNING("failed to commit txn: {}", err);
1310
0
                return -1;
1311
0
            }
1312
6
        }
1313
1314
6
        int ret = 0;
1315
30
        for (int64_t index_id : part_pb.index_id()) {
1316
30
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1317
0
                LOG_WARNING("failed to recycle tablets under partition")
1318
0
                        .tag("table_id", part_pb.table_id())
1319
0
                        .tag("instance_id", instance_id_)
1320
0
                        .tag("index_id", index_id)
1321
0
                        .tag("partition_id", partition_id);
1322
0
                ret = -1;
1323
0
            }
1324
30
        }
1325
6
        if (ret == 0 && part_pb.has_db_id()) {
1326
            // Recycle the versioned keys
1327
6
            std::unique_ptr<Transaction> txn;
1328
6
            err = txn_kv_->create_txn(&txn);
1329
6
            if (err != TxnErrorCode::TXN_OK) {
1330
0
                LOG_WARNING("failed to create txn").tag("err", err);
1331
0
                return -1;
1332
0
            }
1333
6
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1334
6
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1335
6
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1336
6
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1337
6
            versioned_remove_all(txn.get(), meta_key);
1338
6
            txn->remove(index_key);
1339
6
            txn->remove(inverted_index_key);
1340
6
            err = txn->commit();
1341
6
            if (err != TxnErrorCode::TXN_OK) {
1342
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1343
0
                return -1;
1344
0
            }
1345
6
        }
1346
1347
6
        if (ret == 0) {
1348
6
            ++num_recycled;
1349
6
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1350
6
            partition_keys.push_back(k);
1351
6
            if (part_pb.db_id() > 0) {
1352
6
                partition_version_keys.push_back(partition_version_key(
1353
6
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1354
6
            }
1355
6
            metrics_context.total_recycled_num = num_recycled;
1356
6
            metrics_context.report();
1357
6
        }
1358
6
        return ret;
1359
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1257
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1258
2
        ++num_scanned;
1259
2
        RecyclePartitionPB part_pb;
1260
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1261
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1262
0
            return -1;
1263
0
        }
1264
2
        int64_t current_time = ::time(nullptr);
1265
2
        if (current_time <
1266
2
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1267
0
            return 0;
1268
0
        }
1269
2
        ++num_expired;
1270
        // decode partition_id
1271
2
        auto k1 = k;
1272
2
        k1.remove_prefix(1);
1273
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1274
2
        decode_key(&k1, &out);
1275
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1276
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1277
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1278
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1279
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1280
        // Change state to RECYCLING
1281
2
        std::unique_ptr<Transaction> txn;
1282
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1283
2
        if (err != TxnErrorCode::TXN_OK) {
1284
0
            LOG_WARNING("failed to create txn").tag("err", err);
1285
0
            return -1;
1286
0
        }
1287
2
        std::string val;
1288
2
        err = txn->get(k, &val);
1289
2
        if (err ==
1290
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1291
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1292
0
            return 0;
1293
0
        }
1294
2
        if (err != TxnErrorCode::TXN_OK) {
1295
0
            LOG_WARNING("failed to get kv");
1296
0
            return -1;
1297
0
        }
1298
2
        part_pb.Clear();
1299
2
        if (!part_pb.ParseFromString(val)) {
1300
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1301
0
            return -1;
1302
0
        }
1303
        // Partitions with PREPARED state MUST have no data
1304
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1305
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1306
1
            txn->put(k, part_pb.SerializeAsString());
1307
1
            err = txn->commit();
1308
1
            if (err != TxnErrorCode::TXN_OK) {
1309
0
                LOG_WARNING("failed to commit txn: {}", err);
1310
0
                return -1;
1311
0
            }
1312
1
        }
1313
1314
2
        int ret = 0;
1315
2
        for (int64_t index_id : part_pb.index_id()) {
1316
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1317
1
                LOG_WARNING("failed to recycle tablets under partition")
1318
1
                        .tag("table_id", part_pb.table_id())
1319
1
                        .tag("instance_id", instance_id_)
1320
1
                        .tag("index_id", index_id)
1321
1
                        .tag("partition_id", partition_id);
1322
1
                ret = -1;
1323
1
            }
1324
2
        }
1325
2
        if (ret == 0 && part_pb.has_db_id()) {
1326
            // Recycle the versioned keys
1327
1
            std::unique_ptr<Transaction> txn;
1328
1
            err = txn_kv_->create_txn(&txn);
1329
1
            if (err != TxnErrorCode::TXN_OK) {
1330
0
                LOG_WARNING("failed to create txn").tag("err", err);
1331
0
                return -1;
1332
0
            }
1333
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1334
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1335
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1336
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1337
1
            versioned_remove_all(txn.get(), meta_key);
1338
1
            txn->remove(index_key);
1339
1
            txn->remove(inverted_index_key);
1340
1
            err = txn->commit();
1341
1
            if (err != TxnErrorCode::TXN_OK) {
1342
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1343
0
                return -1;
1344
0
            }
1345
1
        }
1346
1347
2
        if (ret == 0) {
1348
1
            ++num_recycled;
1349
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1350
1
            partition_keys.push_back(k);
1351
1
            if (part_pb.db_id() > 0) {
1352
1
                partition_version_keys.push_back(partition_version_key(
1353
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1354
1
            }
1355
1
            metrics_context.total_recycled_num = num_recycled;
1356
1
            metrics_context.report();
1357
1
        }
1358
2
        return ret;
1359
2
    };
1360
1361
14
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1362
4
        if (partition_keys.empty()) return 0;
1363
3
        DORIS_CLOUD_DEFER {
1364
3
            partition_keys.clear();
1365
3
            partition_version_keys.clear();
1366
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1363
2
        DORIS_CLOUD_DEFER {
1364
2
            partition_keys.clear();
1365
2
            partition_version_keys.clear();
1366
2
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1363
1
        DORIS_CLOUD_DEFER {
1364
1
            partition_keys.clear();
1365
1
            partition_version_keys.clear();
1366
1
        };
1367
3
        std::unique_ptr<Transaction> txn;
1368
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1369
3
        if (err != TxnErrorCode::TXN_OK) {
1370
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1371
0
            return -1;
1372
0
        }
1373
7
        for (auto& k : partition_keys) {
1374
7
            txn->remove(k);
1375
7
        }
1376
7
        for (auto& k : partition_version_keys) {
1377
7
            txn->remove(k);
1378
7
        }
1379
3
        err = txn->commit();
1380
3
        if (err != TxnErrorCode::TXN_OK) {
1381
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1382
0
                         << " err=" << err;
1383
0
            return -1;
1384
0
        }
1385
3
        return 0;
1386
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1361
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1362
2
        if (partition_keys.empty()) return 0;
1363
2
        DORIS_CLOUD_DEFER {
1364
2
            partition_keys.clear();
1365
2
            partition_version_keys.clear();
1366
2
        };
1367
2
        std::unique_ptr<Transaction> txn;
1368
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1369
2
        if (err != TxnErrorCode::TXN_OK) {
1370
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1371
0
            return -1;
1372
0
        }
1373
6
        for (auto& k : partition_keys) {
1374
6
            txn->remove(k);
1375
6
        }
1376
6
        for (auto& k : partition_version_keys) {
1377
6
            txn->remove(k);
1378
6
        }
1379
2
        err = txn->commit();
1380
2
        if (err != TxnErrorCode::TXN_OK) {
1381
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1382
0
                         << " err=" << err;
1383
0
            return -1;
1384
0
        }
1385
2
        return 0;
1386
2
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1361
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1362
2
        if (partition_keys.empty()) return 0;
1363
1
        DORIS_CLOUD_DEFER {
1364
1
            partition_keys.clear();
1365
1
            partition_version_keys.clear();
1366
1
        };
1367
1
        std::unique_ptr<Transaction> txn;
1368
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1369
1
        if (err != TxnErrorCode::TXN_OK) {
1370
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1371
0
            return -1;
1372
0
        }
1373
1
        for (auto& k : partition_keys) {
1374
1
            txn->remove(k);
1375
1
        }
1376
1
        for (auto& k : partition_version_keys) {
1377
1
            txn->remove(k);
1378
1
        }
1379
1
        err = txn->commit();
1380
1
        if (err != TxnErrorCode::TXN_OK) {
1381
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1382
0
                         << " err=" << err;
1383
0
            return -1;
1384
0
        }
1385
1
        return 0;
1386
1
    };
1387
1388
14
    if (config::enable_recycler_stats_metrics) {
1389
0
        scan_and_statistics_partitions();
1390
0
    }
1391
    // recycle_func and loop_done for scan and recycle
1392
14
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
1393
14
}
1394
1395
12
int InstanceRecycler::recycle_versions() {
1396
12
    int64_t num_scanned = 0;
1397
12
    int64_t num_recycled = 0;
1398
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
1399
1400
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
1401
1402
12
    auto start_time = steady_clock::now();
1403
1404
12
    DORIS_CLOUD_DEFER {
1405
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1406
12
        metrics_context.finish_report();
1407
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1408
12
                .tag("instance_id", instance_id_)
1409
12
                .tag("num_scanned", num_scanned)
1410
12
                .tag("num_recycled", num_recycled);
1411
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
1404
12
    DORIS_CLOUD_DEFER {
1405
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1406
12
        metrics_context.finish_report();
1407
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1408
12
                .tag("instance_id", instance_id_)
1409
12
                .tag("num_scanned", num_scanned)
1410
12
                .tag("num_recycled", num_recycled);
1411
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
1412
1413
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
1414
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
1415
12
    int64_t last_scanned_table_id = 0;
1416
12
    bool is_recycled = false; // Is last scanned kv recycled
1417
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
1418
12
                         &metrics_context, this](std::string_view k, std::string_view) {
1419
2
        ++num_scanned;
1420
2
        auto k1 = k;
1421
2
        k1.remove_prefix(1);
1422
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1423
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1424
2
        decode_key(&k1, &out);
1425
2
        DCHECK_EQ(out.size(), 6) << k;
1426
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1427
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1428
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1429
0
            return 0;
1430
0
        }
1431
2
        last_scanned_table_id = table_id;
1432
2
        is_recycled = false;
1433
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1434
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1435
2
        std::unique_ptr<Transaction> txn;
1436
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1437
2
        if (err != TxnErrorCode::TXN_OK) {
1438
0
            return -1;
1439
0
        }
1440
2
        std::unique_ptr<RangeGetIterator> iter;
1441
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1442
2
        if (err != TxnErrorCode::TXN_OK) {
1443
0
            return -1;
1444
0
        }
1445
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1446
1
            return 0;
1447
1
        }
1448
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1449
        // 1. Remove all partition version kvs of this table
1450
1
        auto partition_version_key_begin =
1451
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1452
1
        auto partition_version_key_end =
1453
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1454
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1455
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1456
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1457
1
                     << " table_id=" << table_id;
1458
        // 2. Remove the table version kv of this table
1459
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1460
1
        txn->remove(tbl_version_key);
1461
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1462
        // 3. Remove mow delete bitmap update lock and tablet job lock
1463
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1464
1
        txn->remove(lock_key);
1465
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1466
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1467
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1468
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1469
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1470
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1471
1
                     << " table_id=" << table_id;
1472
1
        err = txn->commit();
1473
1
        if (err != TxnErrorCode::TXN_OK) {
1474
0
            return -1;
1475
0
        }
1476
1
        metrics_context.total_recycled_num = ++num_recycled;
1477
1
        metrics_context.report();
1478
1
        is_recycled = true;
1479
1
        return 0;
1480
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1418
2
                         &metrics_context, this](std::string_view k, std::string_view) {
1419
2
        ++num_scanned;
1420
2
        auto k1 = k;
1421
2
        k1.remove_prefix(1);
1422
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1423
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1424
2
        decode_key(&k1, &out);
1425
2
        DCHECK_EQ(out.size(), 6) << k;
1426
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1427
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1428
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1429
0
            return 0;
1430
0
        }
1431
2
        last_scanned_table_id = table_id;
1432
2
        is_recycled = false;
1433
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1434
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1435
2
        std::unique_ptr<Transaction> txn;
1436
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1437
2
        if (err != TxnErrorCode::TXN_OK) {
1438
0
            return -1;
1439
0
        }
1440
2
        std::unique_ptr<RangeGetIterator> iter;
1441
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1442
2
        if (err != TxnErrorCode::TXN_OK) {
1443
0
            return -1;
1444
0
        }
1445
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1446
1
            return 0;
1447
1
        }
1448
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1449
        // 1. Remove all partition version kvs of this table
1450
1
        auto partition_version_key_begin =
1451
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1452
1
        auto partition_version_key_end =
1453
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1454
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1455
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1456
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1457
1
                     << " table_id=" << table_id;
1458
        // 2. Remove the table version kv of this table
1459
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1460
1
        txn->remove(tbl_version_key);
1461
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1462
        // 3. Remove mow delete bitmap update lock and tablet job lock
1463
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1464
1
        txn->remove(lock_key);
1465
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1466
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1467
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1468
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1469
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1470
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1471
1
                     << " table_id=" << table_id;
1472
1
        err = txn->commit();
1473
1
        if (err != TxnErrorCode::TXN_OK) {
1474
0
            return -1;
1475
0
        }
1476
1
        metrics_context.total_recycled_num = ++num_recycled;
1477
1
        metrics_context.report();
1478
1
        is_recycled = true;
1479
1
        return 0;
1480
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1481
1482
12
    if (config::enable_recycler_stats_metrics) {
1483
0
        scan_and_statistics_versions();
1484
0
    }
1485
    // recycle_func and loop_done for scan and recycle
1486
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
1487
12
}
1488
1489
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
1490
                                      RecyclerMetricsContext& metrics_context,
1491
62
                                      int64_t partition_id) {
1492
62
    int64_t num_scanned = 0;
1493
62
    std::atomic_long num_recycled = 0;
1494
1495
62
    std::string tablet_key_begin, tablet_key_end;
1496
62
    std::string stats_key_begin, stats_key_end;
1497
62
    std::string job_key_begin, job_key_end;
1498
1499
62
    std::string tablet_belongs;
1500
62
    if (partition_id > 0) {
1501
        // recycle tablets in a partition belonging to the index
1502
32
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
1503
32
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
1504
32
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
1505
32
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
1506
32
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
1507
32
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
1508
32
        tablet_belongs = "partition";
1509
32
    } else {
1510
        // recycle tablets in the index
1511
30
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
1512
30
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
1513
30
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
1514
30
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
1515
30
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
1516
30
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
1517
30
        tablet_belongs = "index";
1518
30
    }
1519
1520
62
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
1521
62
            .tag("table_id", table_id)
1522
62
            .tag("index_id", index_id)
1523
62
            .tag("partition_id", partition_id);
1524
1525
62
    auto start_time = steady_clock::now();
1526
1527
62
    DORIS_CLOUD_DEFER {
1528
62
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1529
62
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1530
62
                .tag("instance_id", instance_id_)
1531
62
                .tag("table_id", table_id)
1532
62
                .tag("index_id", index_id)
1533
62
                .tag("partition_id", partition_id)
1534
62
                .tag("num_scanned", num_scanned)
1535
62
                .tag("num_recycled", num_recycled);
1536
62
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
1527
58
    DORIS_CLOUD_DEFER {
1528
58
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1529
58
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1530
58
                .tag("instance_id", instance_id_)
1531
58
                .tag("table_id", table_id)
1532
58
                .tag("index_id", index_id)
1533
58
                .tag("partition_id", partition_id)
1534
58
                .tag("num_scanned", num_scanned)
1535
58
                .tag("num_recycled", num_recycled);
1536
58
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
1527
4
    DORIS_CLOUD_DEFER {
1528
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1529
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1530
4
                .tag("instance_id", instance_id_)
1531
4
                .tag("table_id", table_id)
1532
4
                .tag("index_id", index_id)
1533
4
                .tag("partition_id", partition_id)
1534
4
                .tag("num_scanned", num_scanned)
1535
4
                .tag("num_recycled", num_recycled);
1536
4
    };
1537
1538
    // The first string_view represents the tablet key which has been recycled
1539
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
1540
62
    using TabletKeyPair = std::pair<std::string_view, bool>;
1541
62
    SyncExecutor<TabletKeyPair> sync_executor(
1542
62
            _thread_pool_group.recycle_tablet_pool,
1543
62
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
1544
62
                        index_id, partition_id),
1545
312
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1545
292
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1545
20
            [](const TabletKeyPair& k) { return k.first.empty(); });
1546
1547
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
1548
62
    std::vector<std::string> tablet_idx_keys;
1549
62
    std::vector<std::string> restore_job_keys;
1550
62
    std::vector<std::string> init_rs_keys;
1551
332
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1552
332
        bool use_range_remove = true;
1553
332
        ++num_scanned;
1554
332
        doris::TabletMetaCloudPB tablet_meta_pb;
1555
332
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1556
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1557
0
            use_range_remove = false;
1558
0
            return -1;
1559
0
        }
1560
332
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1561
1562
332
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1563
20
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1564
20
            return -1;
1565
20
        }
1566
1567
312
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1568
312
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
1569
312
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1570
312
                           &metrics_context, k]() mutable -> TabletKeyPair {
1571
312
            if (recycle_tablet(tid, metrics_context) != 0) {
1572
0
                LOG_WARNING("failed to recycle tablet")
1573
0
                        .tag("instance_id", instance_id_)
1574
0
                        .tag("tablet_id", tid);
1575
0
                range_move = false;
1576
0
                return {std::string_view(), range_move};
1577
0
            }
1578
312
            ++num_recycled;
1579
312
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1580
312
            return {k, range_move};
1581
312
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
1570
292
                           &metrics_context, k]() mutable -> TabletKeyPair {
1571
292
            if (recycle_tablet(tid, metrics_context) != 0) {
1572
0
                LOG_WARNING("failed to recycle tablet")
1573
0
                        .tag("instance_id", instance_id_)
1574
0
                        .tag("tablet_id", tid);
1575
0
                range_move = false;
1576
0
                return {std::string_view(), range_move};
1577
0
            }
1578
292
            ++num_recycled;
1579
292
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1580
292
            return {k, range_move};
1581
292
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
1570
20
                           &metrics_context, k]() mutable -> TabletKeyPair {
1571
20
            if (recycle_tablet(tid, metrics_context) != 0) {
1572
0
                LOG_WARNING("failed to recycle tablet")
1573
0
                        .tag("instance_id", instance_id_)
1574
0
                        .tag("tablet_id", tid);
1575
0
                range_move = false;
1576
0
                return {std::string_view(), range_move};
1577
0
            }
1578
20
            ++num_recycled;
1579
20
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1580
20
            return {k, range_move};
1581
20
        });
1582
312
        return 0;
1583
332
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1551
292
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1552
292
        bool use_range_remove = true;
1553
292
        ++num_scanned;
1554
292
        doris::TabletMetaCloudPB tablet_meta_pb;
1555
292
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1556
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1557
0
            use_range_remove = false;
1558
0
            return -1;
1559
0
        }
1560
292
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1561
1562
292
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1563
0
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1564
0
            return -1;
1565
0
        }
1566
1567
292
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1568
292
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
1569
292
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1570
292
                           &metrics_context, k]() mutable -> TabletKeyPair {
1571
292
            if (recycle_tablet(tid, metrics_context) != 0) {
1572
292
                LOG_WARNING("failed to recycle tablet")
1573
292
                        .tag("instance_id", instance_id_)
1574
292
                        .tag("tablet_id", tid);
1575
292
                range_move = false;
1576
292
                return {std::string_view(), range_move};
1577
292
            }
1578
292
            ++num_recycled;
1579
292
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1580
292
            return {k, range_move};
1581
292
        });
1582
292
        return 0;
1583
292
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1551
40
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1552
40
        bool use_range_remove = true;
1553
40
        ++num_scanned;
1554
40
        doris::TabletMetaCloudPB tablet_meta_pb;
1555
40
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1556
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1557
0
            use_range_remove = false;
1558
0
            return -1;
1559
0
        }
1560
40
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1561
1562
40
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1563
20
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1564
20
            return -1;
1565
20
        }
1566
1567
20
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1568
20
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
1569
20
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1570
20
                           &metrics_context, k]() mutable -> TabletKeyPair {
1571
20
            if (recycle_tablet(tid, metrics_context) != 0) {
1572
20
                LOG_WARNING("failed to recycle tablet")
1573
20
                        .tag("instance_id", instance_id_)
1574
20
                        .tag("tablet_id", tid);
1575
20
                range_move = false;
1576
20
                return {std::string_view(), range_move};
1577
20
            }
1578
20
            ++num_recycled;
1579
20
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1580
20
            return {k, range_move};
1581
20
        });
1582
20
        return 0;
1583
40
    };
1584
1585
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
1586
62
    auto loop_done = [&, this]() -> int {
1587
52
        bool finished = true;
1588
52
        auto tablet_keys = sync_executor.when_all(&finished);
1589
52
        if (!finished) {
1590
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1591
0
            return -1;
1592
0
        }
1593
52
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1594
        // sort the vector using key's order
1595
50
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1596
1.08k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
1596
1.04k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
1596
36
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1597
50
        bool use_range_remove = true;
1598
312
        for (auto& [_, remove] : tablet_keys) {
1599
312
            if (!remove) {
1600
0
                use_range_remove = remove;
1601
0
                break;
1602
0
            }
1603
312
        }
1604
50
        DORIS_CLOUD_DEFER {
1605
50
            tablet_idx_keys.clear();
1606
50
            restore_job_keys.clear();
1607
50
            init_rs_keys.clear();
1608
50
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1604
48
        DORIS_CLOUD_DEFER {
1605
48
            tablet_idx_keys.clear();
1606
48
            restore_job_keys.clear();
1607
48
            init_rs_keys.clear();
1608
48
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1604
2
        DORIS_CLOUD_DEFER {
1605
2
            tablet_idx_keys.clear();
1606
2
            restore_job_keys.clear();
1607
2
            init_rs_keys.clear();
1608
2
        };
1609
50
        std::unique_ptr<Transaction> txn;
1610
50
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1611
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1612
0
            return -1;
1613
0
        }
1614
50
        std::string tablet_key_end;
1615
50
        if (!tablet_keys.empty()) {
1616
50
            if (use_range_remove) {
1617
50
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1618
50
                txn->remove(tablet_keys.front().first, tablet_key_end);
1619
50
            } else {
1620
0
                for (auto& [k, _] : tablet_keys) {
1621
0
                    txn->remove(k);
1622
0
                }
1623
0
            }
1624
50
        }
1625
312
        for (auto& k : tablet_idx_keys) {
1626
312
            txn->remove(k);
1627
312
        }
1628
312
        for (auto& k : restore_job_keys) {
1629
312
            txn->remove(k);
1630
312
        }
1631
50
        for (auto& k : init_rs_keys) {
1632
0
            txn->remove(k);
1633
0
        }
1634
50
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1635
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1636
0
                         << ", err=" << err;
1637
0
            return -1;
1638
0
        }
1639
50
        return 0;
1640
50
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
1586
48
    auto loop_done = [&, this]() -> int {
1587
48
        bool finished = true;
1588
48
        auto tablet_keys = sync_executor.when_all(&finished);
1589
48
        if (!finished) {
1590
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1591
0
            return -1;
1592
0
        }
1593
48
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1594
        // sort the vector using key's order
1595
48
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1596
48
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1597
48
        bool use_range_remove = true;
1598
292
        for (auto& [_, remove] : tablet_keys) {
1599
292
            if (!remove) {
1600
0
                use_range_remove = remove;
1601
0
                break;
1602
0
            }
1603
292
        }
1604
48
        DORIS_CLOUD_DEFER {
1605
48
            tablet_idx_keys.clear();
1606
48
            restore_job_keys.clear();
1607
48
            init_rs_keys.clear();
1608
48
        };
1609
48
        std::unique_ptr<Transaction> txn;
1610
48
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1611
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1612
0
            return -1;
1613
0
        }
1614
48
        std::string tablet_key_end;
1615
48
        if (!tablet_keys.empty()) {
1616
48
            if (use_range_remove) {
1617
48
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1618
48
                txn->remove(tablet_keys.front().first, tablet_key_end);
1619
48
            } else {
1620
0
                for (auto& [k, _] : tablet_keys) {
1621
0
                    txn->remove(k);
1622
0
                }
1623
0
            }
1624
48
        }
1625
292
        for (auto& k : tablet_idx_keys) {
1626
292
            txn->remove(k);
1627
292
        }
1628
292
        for (auto& k : restore_job_keys) {
1629
292
            txn->remove(k);
1630
292
        }
1631
48
        for (auto& k : init_rs_keys) {
1632
0
            txn->remove(k);
1633
0
        }
1634
48
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1635
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1636
0
                         << ", err=" << err;
1637
0
            return -1;
1638
0
        }
1639
48
        return 0;
1640
48
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
1586
4
    auto loop_done = [&, this]() -> int {
1587
4
        bool finished = true;
1588
4
        auto tablet_keys = sync_executor.when_all(&finished);
1589
4
        if (!finished) {
1590
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1591
0
            return -1;
1592
0
        }
1593
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1594
        // sort the vector using key's order
1595
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1596
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1597
2
        bool use_range_remove = true;
1598
20
        for (auto& [_, remove] : tablet_keys) {
1599
20
            if (!remove) {
1600
0
                use_range_remove = remove;
1601
0
                break;
1602
0
            }
1603
20
        }
1604
2
        DORIS_CLOUD_DEFER {
1605
2
            tablet_idx_keys.clear();
1606
2
            restore_job_keys.clear();
1607
2
            init_rs_keys.clear();
1608
2
        };
1609
2
        std::unique_ptr<Transaction> txn;
1610
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1611
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1612
0
            return -1;
1613
0
        }
1614
2
        std::string tablet_key_end;
1615
2
        if (!tablet_keys.empty()) {
1616
2
            if (use_range_remove) {
1617
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1618
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
1619
2
            } else {
1620
0
                for (auto& [k, _] : tablet_keys) {
1621
0
                    txn->remove(k);
1622
0
                }
1623
0
            }
1624
2
        }
1625
20
        for (auto& k : tablet_idx_keys) {
1626
20
            txn->remove(k);
1627
20
        }
1628
20
        for (auto& k : restore_job_keys) {
1629
20
            txn->remove(k);
1630
20
        }
1631
2
        for (auto& k : init_rs_keys) {
1632
0
            txn->remove(k);
1633
0
        }
1634
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1635
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1636
0
                         << ", err=" << err;
1637
0
            return -1;
1638
0
        }
1639
2
        return 0;
1640
2
    };
1641
1642
62
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
1643
62
                               std::move(loop_done));
1644
62
    if (ret != 0) {
1645
2
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
1646
2
        return ret;
1647
2
    }
1648
1649
    // directly remove tablet stats and tablet jobs of these dropped index or partition
1650
60
    std::unique_ptr<Transaction> txn;
1651
60
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1652
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
1653
0
        return -1;
1654
0
    }
1655
60
    txn->remove(stats_key_begin, stats_key_end);
1656
60
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
1657
60
                 << " end=" << hex(stats_key_end);
1658
60
    txn->remove(job_key_begin, job_key_end);
1659
60
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
1660
60
    std::string schema_key_begin, schema_key_end;
1661
60
    std::string schema_dict_key;
1662
60
    if (partition_id <= 0) {
1663
        // Delete schema kv of this index
1664
29
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
1665
29
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
1666
29
        txn->remove(schema_key_begin, schema_key_end);
1667
29
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
1668
29
                     << " end=" << hex(schema_key_end);
1669
29
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
1670
29
        txn->remove(schema_dict_key);
1671
29
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
1672
29
    }
1673
1674
60
    TxnErrorCode err = txn->commit();
1675
60
    if (err != TxnErrorCode::TXN_OK) {
1676
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
1677
0
                     << " err=" << err;
1678
0
        return -1;
1679
0
    }
1680
1681
60
    return ret;
1682
60
}
1683
1684
4.01k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
1685
4.01k
    int64_t num_segments = rs_meta_pb.num_segments();
1686
4.01k
    if (num_segments <= 0) return 0;
1687
1688
    // Process inverted indexes
1689
4.01k
    std::vector<std::pair<int64_t, std::string>> index_ids;
1690
    // default format as v1.
1691
4.01k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1692
4.01k
    bool delete_rowset_data_by_prefix = false;
1693
4.01k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
1694
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
1695
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
1696
0
        delete_rowset_data_by_prefix = true;
1697
4.01k
    } else if (rs_meta_pb.has_tablet_schema()) {
1698
8.00k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
1699
8.00k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
1700
8.00k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
1701
8.00k
            }
1702
8.00k
        }
1703
4.00k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
1704
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
1705
2.00k
        }
1706
4.00k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
1707
        // schema version and index id are not found, delete rowset data by prefix directly.
1708
10
        delete_rowset_data_by_prefix = true;
1709
10
    } else {
1710
        // otherwise, try to get schema kv
1711
1
        InvertedIndexInfo index_info;
1712
1
        int inverted_index_get_ret = inverted_index_id_cache_->get(
1713
1
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
1714
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
1715
1
                                 &inverted_index_get_ret);
1716
1
        if (inverted_index_get_ret == 0) {
1717
1
            index_format = index_info.first;
1718
1
            index_ids = index_info.second;
1719
1
        } else if (inverted_index_get_ret == 1) {
1720
            // 1. Schema kv not found means tablet has been recycled
1721
            // Maybe some tablet recycle failed by some bugs
1722
            // We need to delete again to double check
1723
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
1724
            // because we are uncertain about the inverted index information.
1725
            // If there are inverted indexes, some data might not be deleted,
1726
            // but this is acceptable as we have made our best effort to delete the data.
1727
0
            LOG_INFO(
1728
0
                    "delete rowset data schema kv not found, need to delete again to double "
1729
0
                    "check")
1730
0
                    .tag("instance_id", instance_id_)
1731
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
1732
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
1733
            // Currently index_ids is guaranteed to be empty,
1734
            // but we clear it again here as a safeguard against future code changes
1735
            // that might cause index_ids to no longer be empty
1736
0
            index_format = InvertedIndexStorageFormatPB::V2;
1737
0
            index_ids.clear();
1738
0
        } else {
1739
            // failed to get schema kv, delete rowset data by prefix directly.
1740
0
            delete_rowset_data_by_prefix = true;
1741
0
        }
1742
1
    }
1743
1744
4.01k
    if (delete_rowset_data_by_prefix) {
1745
10
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
1746
10
                                  rs_meta_pb.rowset_id_v2());
1747
10
    }
1748
1749
4.00k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
1750
4.00k
    if (it == accessor_map_.end()) {
1751
0
        LOG_WARNING("instance has no such resource id")
1752
0
                .tag("instance_id", instance_id_)
1753
0
                .tag("resource_id", rs_meta_pb.resource_id());
1754
0
        return -1;
1755
0
    }
1756
4.00k
    auto& accessor = it->second;
1757
4.00k
    int64_t tablet_id = rs_meta_pb.tablet_id();
1758
4.00k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
1759
4.00k
    std::vector<std::string> file_paths;
1760
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
1761
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1762
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
1763
40.0k
            for (const auto& index_id : index_ids) {
1764
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
1765
40.0k
                                                            index_id.second));
1766
40.0k
            }
1767
20.0k
        } else if (!index_ids.empty()) {
1768
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1769
0
        }
1770
20.0k
    }
1771
1772
    // TODO(AlexYue): seems could do do batch
1773
4.00k
    return accessor->delete_files(file_paths);
1774
4.00k
}
1775
1776
int InstanceRecycler::delete_rowset_data(
1777
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
1778
32
        RecyclerMetricsContext& metrics_context) {
1779
32
    int ret = 0;
1780
    // resource_id -> file_paths
1781
32
    std::map<std::string, std::vector<std::string>> resource_file_paths;
1782
    // (resource_id, tablet_id, rowset_id)
1783
32
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
1784
32
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
1785
1786
6.14k
    for (const auto& [_, rs] : rowsets) {
1787
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
1788
        // due to aborted schema change.
1789
6.14k
        if (is_formal_rowset) {
1790
3.12k
            std::lock_guard lock(recycled_tablets_mtx_);
1791
3.12k
            if (recycled_tablets_.count(rs.tablet_id())) {
1792
0
                continue; // Rowset data has already been deleted
1793
0
            }
1794
3.12k
        }
1795
1796
6.14k
        auto it = accessor_map_.find(rs.resource_id());
1797
        // possible if the accessor is not initilized correctly
1798
6.14k
        if (it == accessor_map_.end()) [[unlikely]] {
1799
1
            LOG_WARNING("instance has no such resource id")
1800
1
                    .tag("instance_id", instance_id_)
1801
1
                    .tag("resource_id", rs.resource_id());
1802
1
            ret = -1;
1803
1
            continue;
1804
1
        }
1805
1806
6.14k
        auto& file_paths = resource_file_paths[rs.resource_id()];
1807
6.14k
        const auto& rowset_id = rs.rowset_id_v2();
1808
6.14k
        int64_t tablet_id = rs.tablet_id();
1809
6.14k
        int64_t num_segments = rs.num_segments();
1810
6.14k
        if (num_segments <= 0) {
1811
0
            metrics_context.total_recycled_num++;
1812
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
1813
0
            continue;
1814
0
        }
1815
1816
        // Process inverted indexes
1817
6.14k
        std::vector<std::pair<int64_t, std::string>> index_ids;
1818
        // default format as v1.
1819
6.14k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1820
6.14k
        int inverted_index_get_ret = 0;
1821
6.14k
        if (rs.has_tablet_schema()) {
1822
5.54k
            for (const auto& index : rs.tablet_schema().index()) {
1823
5.54k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
1824
5.54k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
1825
5.54k
                }
1826
5.54k
            }
1827
2.59k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
1828
2.56k
                index_format = rs.tablet_schema().inverted_index_storage_format();
1829
2.56k
            }
1830
3.55k
        } else {
1831
3.55k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
1832
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
1833
0
                                "instance_id="
1834
0
                             << instance_id_ << " tablet_id=" << tablet_id
1835
0
                             << " rowset_id=" << rowset_id;
1836
0
                ret = -1;
1837
0
                continue;
1838
0
            }
1839
3.55k
            InvertedIndexInfo index_info;
1840
3.55k
            inverted_index_get_ret =
1841
3.55k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
1842
3.55k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
1843
3.55k
                                     &inverted_index_get_ret);
1844
3.55k
            if (inverted_index_get_ret == 0) {
1845
3.05k
                index_format = index_info.first;
1846
3.05k
                index_ids = index_info.second;
1847
3.05k
            } else if (inverted_index_get_ret == 1) {
1848
                // 1. Schema kv not found means tablet has been recycled
1849
                // Maybe some tablet recycle failed by some bugs
1850
                // We need to delete again to double check
1851
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
1852
                // because we are uncertain about the inverted index information.
1853
                // If there are inverted indexes, some data might not be deleted,
1854
                // but this is acceptable as we have made our best effort to delete the data.
1855
503
                LOG_INFO(
1856
503
                        "delete rowset data schema kv not found, need to delete again to double "
1857
503
                        "check")
1858
503
                        .tag("instance_id", instance_id_)
1859
503
                        .tag("tablet_id", tablet_id)
1860
503
                        .tag("rowset", rs.ShortDebugString());
1861
                // Currently index_ids is guaranteed to be empty,
1862
                // but we clear it again here as a safeguard against future code changes
1863
                // that might cause index_ids to no longer be empty
1864
503
                index_format = InvertedIndexStorageFormatPB::V2;
1865
503
                index_ids.clear();
1866
503
            } else {
1867
0
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
1868
0
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
1869
0
                ret = -1;
1870
0
                continue;
1871
0
            }
1872
3.55k
        }
1873
6.14k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
1874
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
1875
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
1876
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
1877
5
            continue;
1878
5
        }
1879
36.8k
        for (int64_t i = 0; i < num_segments; ++i) {
1880
30.6k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1881
30.6k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
1882
59.2k
                for (const auto& index_id : index_ids) {
1883
59.2k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
1884
59.2k
                                                                index_id.first, index_id.second));
1885
59.2k
                }
1886
28.1k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
1887
                // try to recycle inverted index v2 when get_ret == 1
1888
                // we treat schema not found as if it has a v2 format inverted index
1889
                // to reduce chance of data leakage
1890
2.50k
                if (inverted_index_get_ret == 1) {
1891
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
1892
2.50k
                            .tag("instance_id", instance_id_)
1893
2.50k
                            .tag("inverted index v2 path",
1894
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
1895
2.50k
                }
1896
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1897
2.50k
            }
1898
30.6k
        }
1899
6.13k
    }
1900
1901
32
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
1902
32
                                                 "delete_rowset_data",
1903
34
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
1903
34
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
1904
32
    for (auto& [resource_id, file_paths] : resource_file_paths) {
1905
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1906
29
            DCHECK(accessor_map_.count(*rid))
1907
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1908
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1909
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1910
29
                                     &accessor_map_);
1911
29
            if (!accessor_map_.contains(*rid)) {
1912
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1913
0
                        .tag("resource_id", resource_id)
1914
0
                        .tag("instance_id", instance_id_);
1915
0
                return -1;
1916
0
            }
1917
29
            auto& accessor = accessor_map_[*rid];
1918
29
            int ret = accessor->delete_files(*paths);
1919
29
            if (!ret) {
1920
                // deduplication of different files with the same rowset id
1921
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
1922
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
1923
29
                std::set<std::string> deleted_rowset_id;
1924
1925
29
                std::for_each(
1926
29
                        paths->begin(), paths->end(),
1927
92.3k
                        [&metrics_context, &rowsets, &deleted_rowset_id](const std::string& path) {
1928
92.3k
                            std::vector<std::string> str;
1929
92.3k
                            butil::SplitString(path, '/', &str);
1930
92.3k
                            std::string rowset_id;
1931
92.3k
                            if (auto pos = str.back().find('_'); pos != std::string::npos) {
1932
92.3k
                                rowset_id = str.back().substr(0, pos);
1933
92.3k
                            } else {
1934
4
                                LOG(WARNING) << "failed to parse rowset_id, path=" << path;
1935
4
                                return;
1936
4
                            }
1937
92.3k
                            auto rs_meta = rowsets.find(rowset_id);
1938
92.3k
                            if (rs_meta != rowsets.end() &&
1939
92.3k
                                !deleted_rowset_id.contains(rowset_id)) {
1940
6.13k
                                deleted_rowset_id.emplace(rowset_id);
1941
6.13k
                                metrics_context.total_recycled_data_size +=
1942
6.13k
                                        rs_meta->second.total_disk_size();
1943
6.13k
                                segment_metrics_context_.total_recycled_num +=
1944
6.13k
                                        rs_meta->second.num_segments();
1945
6.13k
                                segment_metrics_context_.total_recycled_data_size +=
1946
6.13k
                                        rs_meta->second.total_disk_size();
1947
6.13k
                                metrics_context.total_recycled_num++;
1948
6.13k
                            }
1949
92.3k
                        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
1927
92.3k
                        [&metrics_context, &rowsets, &deleted_rowset_id](const std::string& path) {
1928
92.3k
                            std::vector<std::string> str;
1929
92.3k
                            butil::SplitString(path, '/', &str);
1930
92.3k
                            std::string rowset_id;
1931
92.3k
                            if (auto pos = str.back().find('_'); pos != std::string::npos) {
1932
92.3k
                                rowset_id = str.back().substr(0, pos);
1933
92.3k
                            } else {
1934
4
                                LOG(WARNING) << "failed to parse rowset_id, path=" << path;
1935
4
                                return;
1936
4
                            }
1937
92.3k
                            auto rs_meta = rowsets.find(rowset_id);
1938
92.3k
                            if (rs_meta != rowsets.end() &&
1939
92.3k
                                !deleted_rowset_id.contains(rowset_id)) {
1940
6.13k
                                deleted_rowset_id.emplace(rowset_id);
1941
6.13k
                                metrics_context.total_recycled_data_size +=
1942
6.13k
                                        rs_meta->second.total_disk_size();
1943
6.13k
                                segment_metrics_context_.total_recycled_num +=
1944
6.13k
                                        rs_meta->second.num_segments();
1945
6.13k
                                segment_metrics_context_.total_recycled_data_size +=
1946
6.13k
                                        rs_meta->second.total_disk_size();
1947
6.13k
                                metrics_context.total_recycled_num++;
1948
6.13k
                            }
1949
92.3k
                        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
1950
29
                segment_metrics_context_.report();
1951
29
                metrics_context.report();
1952
29
            }
1953
29
            return ret;
1954
29
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
1905
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1906
29
            DCHECK(accessor_map_.count(*rid))
1907
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1908
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1909
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1910
29
                                     &accessor_map_);
1911
29
            if (!accessor_map_.contains(*rid)) {
1912
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1913
0
                        .tag("resource_id", resource_id)
1914
0
                        .tag("instance_id", instance_id_);
1915
0
                return -1;
1916
0
            }
1917
29
            auto& accessor = accessor_map_[*rid];
1918
29
            int ret = accessor->delete_files(*paths);
1919
29
            if (!ret) {
1920
                // deduplication of different files with the same rowset id
1921
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
1922
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
1923
29
                std::set<std::string> deleted_rowset_id;
1924
1925
29
                std::for_each(
1926
29
                        paths->begin(), paths->end(),
1927
29
                        [&metrics_context, &rowsets, &deleted_rowset_id](const std::string& path) {
1928
29
                            std::vector<std::string> str;
1929
29
                            butil::SplitString(path, '/', &str);
1930
29
                            std::string rowset_id;
1931
29
                            if (auto pos = str.back().find('_'); pos != std::string::npos) {
1932
29
                                rowset_id = str.back().substr(0, pos);
1933
29
                            } else {
1934
29
                                LOG(WARNING) << "failed to parse rowset_id, path=" << path;
1935
29
                                return;
1936
29
                            }
1937
29
                            auto rs_meta = rowsets.find(rowset_id);
1938
29
                            if (rs_meta != rowsets.end() &&
1939
29
                                !deleted_rowset_id.contains(rowset_id)) {
1940
29
                                deleted_rowset_id.emplace(rowset_id);
1941
29
                                metrics_context.total_recycled_data_size +=
1942
29
                                        rs_meta->second.total_disk_size();
1943
29
                                segment_metrics_context_.total_recycled_num +=
1944
29
                                        rs_meta->second.num_segments();
1945
29
                                segment_metrics_context_.total_recycled_data_size +=
1946
29
                                        rs_meta->second.total_disk_size();
1947
29
                                metrics_context.total_recycled_num++;
1948
29
                            }
1949
29
                        });
1950
29
                segment_metrics_context_.report();
1951
29
                metrics_context.report();
1952
29
            }
1953
29
            return ret;
1954
29
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
1955
29
    }
1956
32
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
1957
5
        LOG_INFO(
1958
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
1959
5
                "resource_id={}, tablet_id={}, instance_id={}",
1960
5
                rowset_id, resource_id, tablet_id, instance_id_);
1961
5
        concurrent_delete_executor.add([&]() -> int {
1962
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
1963
5
            if (!ret) {
1964
5
                auto rs = rowsets.at(rowset_id);
1965
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
1966
5
                metrics_context.total_recycled_num++;
1967
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
1968
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
1969
5
                metrics_context.report();
1970
5
                segment_metrics_context_.report();
1971
5
            }
1972
5
            return ret;
1973
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
1961
5
        concurrent_delete_executor.add([&]() -> int {
1962
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
1963
5
            if (!ret) {
1964
5
                auto rs = rowsets.at(rowset_id);
1965
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
1966
5
                metrics_context.total_recycled_num++;
1967
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
1968
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
1969
5
                metrics_context.report();
1970
5
                segment_metrics_context_.report();
1971
5
            }
1972
5
            return ret;
1973
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
1974
5
    }
1975
1976
32
    bool finished = true;
1977
32
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
1978
34
    for (int r : rets) {
1979
34
        if (r != 0) {
1980
0
            ret = -1;
1981
0
            break;
1982
0
        }
1983
34
    }
1984
32
    ret = finished ? ret : -1;
1985
32
    return ret;
1986
32
}
1987
1988
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
1989
2.91k
                                         const std::string& rowset_id) {
1990
2.91k
    auto it = accessor_map_.find(resource_id);
1991
2.91k
    if (it == accessor_map_.end()) {
1992
0
        LOG_WARNING("instance has no such resource id")
1993
0
                .tag("instance_id", instance_id_)
1994
0
                .tag("resource_id", resource_id)
1995
0
                .tag("tablet_id", tablet_id)
1996
0
                .tag("rowset_id", rowset_id);
1997
0
        return -1;
1998
0
    }
1999
2.91k
    auto& accessor = it->second;
2000
2.91k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
2001
2.91k
}
2002
2003
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
2004
                                                  RecyclerMetricsContext& metrics_context,
2005
0
                                                  int64_t partition_id, bool is_empty_tablet) {
2006
0
    std::string tablet_key_begin, tablet_key_end;
2007
2008
0
    if (partition_id > 0) {
2009
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2010
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2011
0
    } else {
2012
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2013
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2014
0
    }
2015
    // for calculate the total num or bytes of recyled objects
2016
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
2017
0
                                                          std::string_view v) -> int {
2018
0
        doris::TabletMetaCloudPB tablet_meta_pb;
2019
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2020
0
            return 0;
2021
0
        }
2022
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2023
2024
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2025
0
            return 0;
2026
0
        }
2027
2028
0
        if (!is_empty_tablet) {
2029
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
2030
0
                return 0;
2031
0
            }
2032
0
            tablet_metrics_context_.total_need_recycle_num++;
2033
0
        }
2034
0
        return 0;
2035
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
2036
0
    return scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics),
2037
0
                            [&metrics_context]() -> int {
2038
0
                                metrics_context.report();
2039
0
                                tablet_metrics_context_.report();
2040
0
                                return 0;
2041
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEv
2042
0
}
2043
2044
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
2045
0
                                                 RecyclerMetricsContext& metrics_context) {
2046
0
    int ret = 0;
2047
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
2048
0
    std::unique_ptr<Transaction> txn;
2049
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2050
0
        LOG_WARNING("failed to recycle tablet ")
2051
0
                .tag("tablet id", tablet_id)
2052
0
                .tag("instance_id", instance_id_)
2053
0
                .tag("reason", "failed to create txn");
2054
0
        ret = -1;
2055
0
    }
2056
0
    GetRowsetResponse resp;
2057
0
    std::string msg;
2058
0
    MetaServiceCode code = MetaServiceCode::OK;
2059
    // get rowsets in tablet
2060
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2061
0
                        tablet_id, code, msg, &resp);
2062
0
    if (code != MetaServiceCode::OK) {
2063
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2064
0
                .tag("tablet id", tablet_id)
2065
0
                .tag("msg", msg)
2066
0
                .tag("code", code)
2067
0
                .tag("instance id", instance_id_);
2068
0
        ret = -1;
2069
0
    }
2070
0
    for (const auto& rs_meta : resp.rowset_meta()) {
2071
        /*
2072
        * For compatibility, we skip the loop for [0-1] here. 
2073
        * The purpose of this loop is to delete object files,
2074
        * and since [0-1] only has meta and doesn't have object files, 
2075
        * skipping it doesn't affect system correctness. 
2076
        *
2077
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below 
2078
        * would return error -1 directly, causing the recycle operation to fail.
2079
        *
2080
        * [0-1] doesn't have resource id is a bug.
2081
        * In the future, we will fix this problem, after that,
2082
        * we can remove this if statement.
2083
        *
2084
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
2085
        */
2086
2087
0
        if (rs_meta.end_version() == 1) {
2088
            // Assert that [0-1] has no resource_id to make sure
2089
            // this if statement will not be forgetted to remove
2090
            // when the resource id bug is fixed
2091
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2092
0
            continue;
2093
0
        }
2094
0
        if (!rs_meta.has_resource_id()) {
2095
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2096
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2097
0
                    .tag("instance_id", instance_id_)
2098
0
                    .tag("tablet_id", tablet_id);
2099
0
            continue;
2100
0
        }
2101
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2102
0
        auto it = accessor_map_.find(rs_meta.resource_id());
2103
        // possible if the accessor is not initilized correctly
2104
0
        if (it == accessor_map_.end()) [[unlikely]] {
2105
0
            LOG_WARNING(
2106
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2107
0
                    "recycle process")
2108
0
                    .tag("tablet id", tablet_id)
2109
0
                    .tag("instance_id", instance_id_)
2110
0
                    .tag("resource_id", rs_meta.resource_id())
2111
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2112
0
            continue;
2113
0
        }
2114
2115
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
2116
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2117
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2118
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
2119
0
    }
2120
0
    return ret;
2121
0
}
2122
2123
335
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
2124
335
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
2125
335
            .tag("instance_id", instance_id_)
2126
335
            .tag("tablet_id", tablet_id);
2127
2128
335
    if (instance_info_.has_multi_version_status() &&
2129
335
        instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) {
2130
61
        return recycle_versioned_tablet(tablet_id, metrics_context);
2131
61
    }
2132
2133
274
    int ret = 0;
2134
274
    auto start_time = steady_clock::now();
2135
2136
274
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
2137
2138
    // collect resource ids
2139
254
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
2140
254
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
2141
254
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
2142
254
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
2143
254
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
2144
254
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
2145
2146
254
    std::set<std::string> resource_ids;
2147
254
    int64_t recycle_rowsets_number = 0;
2148
254
    int64_t recycle_segments_number = 0;
2149
254
    int64_t recycle_rowsets_data_size = 0;
2150
254
    int64_t recycle_rowsets_index_size = 0;
2151
254
    int64_t recycle_restore_job_rowsets_number = 0;
2152
254
    int64_t recycle_restore_job_segments_number = 0;
2153
254
    int64_t recycle_restore_job_rowsets_data_size = 0;
2154
254
    int64_t recycle_restore_job_rowsets_index_size = 0;
2155
254
    int64_t max_rowset_version = 0;
2156
254
    int64_t min_rowset_creation_time = INT64_MAX;
2157
254
    int64_t max_rowset_creation_time = 0;
2158
254
    int64_t min_rowset_expiration_time = INT64_MAX;
2159
254
    int64_t max_rowset_expiration_time = 0;
2160
2161
254
    DORIS_CLOUD_DEFER {
2162
254
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2163
254
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2164
254
                .tag("instance_id", instance_id_)
2165
254
                .tag("tablet_id", tablet_id)
2166
254
                .tag("recycle rowsets number", recycle_rowsets_number)
2167
254
                .tag("recycle segments number", recycle_segments_number)
2168
254
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2169
254
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2170
254
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
2171
254
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
2172
254
                .tag("all restore job rowsets recycle data size",
2173
254
                     recycle_restore_job_rowsets_data_size)
2174
254
                .tag("all restore job rowsets recycle index size",
2175
254
                     recycle_restore_job_rowsets_index_size)
2176
254
                .tag("max rowset version", max_rowset_version)
2177
254
                .tag("min rowset creation time", min_rowset_creation_time)
2178
254
                .tag("max rowset creation time", max_rowset_creation_time)
2179
254
                .tag("min rowset expiration time", min_rowset_expiration_time)
2180
254
                .tag("max rowset expiration time", max_rowset_expiration_time)
2181
254
                .tag("ret", ret);
2182
254
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2161
254
    DORIS_CLOUD_DEFER {
2162
254
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2163
254
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2164
254
                .tag("instance_id", instance_id_)
2165
254
                .tag("tablet_id", tablet_id)
2166
254
                .tag("recycle rowsets number", recycle_rowsets_number)
2167
254
                .tag("recycle segments number", recycle_segments_number)
2168
254
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2169
254
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2170
254
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
2171
254
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
2172
254
                .tag("all restore job rowsets recycle data size",
2173
254
                     recycle_restore_job_rowsets_data_size)
2174
254
                .tag("all restore job rowsets recycle index size",
2175
254
                     recycle_restore_job_rowsets_index_size)
2176
254
                .tag("max rowset version", max_rowset_version)
2177
254
                .tag("min rowset creation time", min_rowset_creation_time)
2178
254
                .tag("max rowset creation time", max_rowset_creation_time)
2179
254
                .tag("min rowset expiration time", min_rowset_expiration_time)
2180
254
                .tag("max rowset expiration time", max_rowset_expiration_time)
2181
254
                .tag("ret", ret);
2182
254
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
2183
2184
254
    std::unique_ptr<Transaction> txn;
2185
254
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2186
0
        LOG_WARNING("failed to recycle tablet ")
2187
0
                .tag("tablet id", tablet_id)
2188
0
                .tag("instance_id", instance_id_)
2189
0
                .tag("reason", "failed to create txn");
2190
0
        ret = -1;
2191
0
    }
2192
254
    GetRowsetResponse resp;
2193
254
    std::string msg;
2194
254
    MetaServiceCode code = MetaServiceCode::OK;
2195
    // get rowsets in tablet
2196
254
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2197
254
                        tablet_id, code, msg, &resp);
2198
254
    if (code != MetaServiceCode::OK) {
2199
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2200
0
                .tag("tablet id", tablet_id)
2201
0
                .tag("msg", msg)
2202
0
                .tag("code", code)
2203
0
                .tag("instance id", instance_id_);
2204
0
        ret = -1;
2205
0
    }
2206
254
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
2207
2208
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
2209
        /*
2210
        * For compatibility, we skip the loop for [0-1] here. 
2211
        * The purpose of this loop is to delete object files,
2212
        * and since [0-1] only has meta and doesn't have object files, 
2213
        * skipping it doesn't affect system correctness. 
2214
        *
2215
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below 
2216
        * would return error -1 directly, causing the recycle operation to fail.
2217
        *
2218
        * [0-1] doesn't have resource id is a bug.
2219
        * In the future, we will fix this problem, after that,
2220
        * we can remove this if statement.
2221
        *
2222
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
2223
        */
2224
2225
2.50k
        if (rs_meta.end_version() == 1) {
2226
            // Assert that [0-1] has no resource_id to make sure
2227
            // this if statement will not be forgetted to remove
2228
            // when the resource id bug is fixed
2229
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2230
0
            recycle_rowsets_number += 1;
2231
0
            continue;
2232
0
        }
2233
2.50k
        if (!rs_meta.has_resource_id()) {
2234
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2235
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
2236
1
                    .tag("instance_id", instance_id_)
2237
1
                    .tag("tablet_id", tablet_id);
2238
1
            return -1;
2239
1
        }
2240
2.50k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2241
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
2242
        // possible if the accessor is not initilized correctly
2243
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
2244
1
            LOG_WARNING(
2245
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2246
1
                    "recycle process")
2247
1
                    .tag("tablet id", tablet_id)
2248
1
                    .tag("instance_id", instance_id_)
2249
1
                    .tag("resource_id", rs_meta.resource_id())
2250
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2251
1
            return -1;
2252
1
        }
2253
2.50k
        recycle_rowsets_number += 1;
2254
2.50k
        recycle_segments_number += rs_meta.num_segments();
2255
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2256
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2257
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2258
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2259
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2260
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2261
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2262
2.50k
        resource_ids.emplace(rs_meta.resource_id());
2263
2.50k
    }
2264
2265
    // get restore job rowset in tablet
2266
252
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
2267
252
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
2268
252
    if (code != MetaServiceCode::OK) {
2269
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
2270
0
                .tag("tablet id", tablet_id)
2271
0
                .tag("msg", msg)
2272
0
                .tag("code", code)
2273
0
                .tag("instance id", instance_id_);
2274
0
        return -1;
2275
0
    }
2276
2277
252
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
2278
100
        if (!rs_meta.has_resource_id()) {
2279
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2280
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2281
0
                    .tag("instance_id", instance_id_)
2282
0
                    .tag("tablet_id", tablet_id);
2283
0
            return -1;
2284
0
        }
2285
2286
100
        auto it = accessor_map_.find(rs_meta.resource_id());
2287
        // possible if the accessor is not initilized correctly
2288
100
        if (it == accessor_map_.end()) [[unlikely]] {
2289
0
            LOG_WARNING(
2290
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2291
0
                    "recycle process")
2292
0
                    .tag("tablet id", tablet_id)
2293
0
                    .tag("instance_id", instance_id_)
2294
0
                    .tag("resource_id", rs_meta.resource_id())
2295
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2296
0
            return -1;
2297
0
        }
2298
100
        recycle_restore_job_rowsets_number += 1;
2299
100
        recycle_restore_job_segments_number += rs_meta.num_segments();
2300
100
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
2301
100
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
2302
100
        resource_ids.emplace(rs_meta.resource_id());
2303
100
    }
2304
2305
252
    LOG_INFO("recycle tablet start to delete object")
2306
252
            .tag("instance id", instance_id_)
2307
252
            .tag("tablet id", tablet_id)
2308
252
            .tag("recycle tablet resource ids are",
2309
252
                 std::accumulate(resource_ids.begin(), resource_ids.begin(), std::string(),
2310
252
                                 [](std::string rs_id, const auto& it) {
2311
0
                                     return rs_id.empty() ? it : rs_id + ", " + it;
2312
0
                                 }));
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
2313
2314
252
    SyncExecutor<int> concurrent_delete_executor(
2315
252
            _thread_pool_group.s3_producer_pool,
2316
252
            fmt::format("delete tablet {} s3 rowset", tablet_id),
2317
252
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKi
Line
Count
Source
2317
223
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKi
2318
2319
    // delete all rowset data in this tablet
2320
    // ATTN: there may be data leak if not all accessor initilized successfully
2321
    //       partial data deleted if the tablet is stored cross-storage vault
2322
    //       vault id is not attached to TabletMeta...
2323
252
    for (const auto& resource_id : resource_ids) {
2324
223
        concurrent_delete_executor.add([&, rs_id = resource_id,
2325
223
                                        accessor_ptr = accessor_map_[resource_id]]() {
2326
223
            std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2327
223
                g_bvar_recycler_vault_recycle_task_concurrency.put(
2328
223
                        {instance_id_, metrics_context.operation_type, rs_id}, -1);
2329
223
                metrics_context.report();
2330
223
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEvENKUlPiE_clES5_
Line
Count
Source
2326
223
            std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2327
223
                g_bvar_recycler_vault_recycle_task_concurrency.put(
2328
223
                        {instance_id_, metrics_context.operation_type, rs_id}, -1);
2329
223
                metrics_context.report();
2330
223
            });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEvENKUlPiE_clES5_
2331
223
            g_bvar_recycler_vault_recycle_task_concurrency.put(
2332
223
                    {instance_id_, metrics_context.operation_type, rs_id}, 1);
2333
223
            int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2334
223
            if (res != 0) {
2335
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2336
1
                             << " path=" << accessor_ptr->uri();
2337
1
                g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "abnormal"}, 1);
2338
1
                return -1;
2339
1
            }
2340
222
            g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "normal"}, 1);
2341
222
            return 0;
2342
223
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
Line
Count
Source
2325
223
                                        accessor_ptr = accessor_map_[resource_id]]() {
2326
223
            std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2327
223
                g_bvar_recycler_vault_recycle_task_concurrency.put(
2328
223
                        {instance_id_, metrics_context.operation_type, rs_id}, -1);
2329
223
                metrics_context.report();
2330
223
            });
2331
223
            g_bvar_recycler_vault_recycle_task_concurrency.put(
2332
223
                    {instance_id_, metrics_context.operation_type, rs_id}, 1);
2333
223
            int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2334
223
            if (res != 0) {
2335
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2336
1
                             << " path=" << accessor_ptr->uri();
2337
1
                g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "abnormal"}, 1);
2338
1
                return -1;
2339
1
            }
2340
222
            g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "normal"}, 1);
2341
222
            return 0;
2342
223
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
2343
223
    }
2344
2345
252
    bool finished = true;
2346
252
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2347
252
    for (int r : rets) {
2348
223
        if (r != 0) {
2349
1
            ret = -1;
2350
1
        }
2351
223
    }
2352
2353
252
    ret = finished ? ret : -1;
2354
2355
252
    if (ret != 0) { // failed recycle tablet data
2356
1
        LOG_WARNING("ret!=0")
2357
1
                .tag("finished", finished)
2358
1
                .tag("ret", ret)
2359
1
                .tag("instance_id", instance_id_)
2360
1
                .tag("tablet_id", tablet_id);
2361
1
        return ret;
2362
1
    }
2363
2364
251
    tablet_metrics_context_.total_recycled_data_size +=
2365
251
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2366
251
    tablet_metrics_context_.total_recycled_num += 1;
2367
251
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
2368
251
    segment_metrics_context_.total_recycled_data_size +=
2369
251
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2370
251
    metrics_context.total_recycled_data_size +=
2371
251
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2372
251
    tablet_metrics_context_.report();
2373
251
    segment_metrics_context_.report();
2374
251
    metrics_context.report();
2375
2376
251
    txn.reset();
2377
251
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2378
0
        LOG_WARNING("failed to recycle tablet ")
2379
0
                .tag("tablet id", tablet_id)
2380
0
                .tag("instance_id", instance_id_)
2381
0
                .tag("reason", "failed to create txn");
2382
0
        ret = -1;
2383
0
    }
2384
    // delete all rowset kv in this tablet
2385
251
    txn->remove(rs_key0, rs_key1);
2386
251
    txn->remove(recyc_rs_key0, recyc_rs_key1);
2387
251
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
2388
2389
    // remove delete bitmap for MoW table
2390
251
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
2391
251
    txn->remove(pending_key);
2392
251
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
2393
251
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
2394
251
    txn->remove(delete_bitmap_start, delete_bitmap_end);
2395
2396
251
    TxnErrorCode err = txn->commit();
2397
251
    if (err != TxnErrorCode::TXN_OK) {
2398
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
2399
0
        ret = -1;
2400
0
    }
2401
2402
251
    if (ret == 0) {
2403
        // All object files under tablet have been deleted
2404
251
        std::lock_guard lock(recycled_tablets_mtx_);
2405
251
        recycled_tablets_.insert(tablet_id);
2406
251
    }
2407
2408
251
    return ret;
2409
252
}
2410
2411
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
2412
61
                                               RecyclerMetricsContext& metrics_context) {
2413
61
    int ret = 0;
2414
61
    auto start_time = steady_clock::now();
2415
2416
61
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
2417
2418
    // collect resource ids
2419
61
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
2420
61
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
2421
61
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
2422
61
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
2423
2424
61
    int64_t recycle_rowsets_number = 0;
2425
61
    int64_t recycle_segments_number = 0;
2426
61
    int64_t recycle_rowsets_data_size = 0;
2427
61
    int64_t recycle_rowsets_index_size = 0;
2428
61
    int64_t max_rowset_version = 0;
2429
61
    int64_t min_rowset_creation_time = INT64_MAX;
2430
61
    int64_t max_rowset_creation_time = 0;
2431
61
    int64_t min_rowset_expiration_time = INT64_MAX;
2432
61
    int64_t max_rowset_expiration_time = 0;
2433
2434
61
    DORIS_CLOUD_DEFER {
2435
61
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2436
61
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2437
61
                .tag("instance_id", instance_id_)
2438
61
                .tag("tablet_id", tablet_id)
2439
61
                .tag("recycle rowsets number", recycle_rowsets_number)
2440
61
                .tag("recycle segments number", recycle_segments_number)
2441
61
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2442
61
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2443
61
                .tag("max rowset version", max_rowset_version)
2444
61
                .tag("min rowset creation time", min_rowset_creation_time)
2445
61
                .tag("max rowset creation time", max_rowset_creation_time)
2446
61
                .tag("min rowset expiration time", min_rowset_expiration_time)
2447
61
                .tag("max rowset expiration time", max_rowset_expiration_time)
2448
61
                .tag("ret", ret);
2449
61
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2434
61
    DORIS_CLOUD_DEFER {
2435
61
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2436
61
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2437
61
                .tag("instance_id", instance_id_)
2438
61
                .tag("tablet_id", tablet_id)
2439
61
                .tag("recycle rowsets number", recycle_rowsets_number)
2440
61
                .tag("recycle segments number", recycle_segments_number)
2441
61
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2442
61
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2443
61
                .tag("max rowset version", max_rowset_version)
2444
61
                .tag("min rowset creation time", min_rowset_creation_time)
2445
61
                .tag("max rowset creation time", max_rowset_creation_time)
2446
61
                .tag("min rowset expiration time", min_rowset_expiration_time)
2447
61
                .tag("max rowset expiration time", max_rowset_expiration_time)
2448
61
                .tag("ret", ret);
2449
61
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
2450
2451
61
    std::unique_ptr<Transaction> txn;
2452
61
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2453
0
        LOG_WARNING("failed to recycle tablet ")
2454
0
                .tag("tablet id", tablet_id)
2455
0
                .tag("instance_id", instance_id_)
2456
0
                .tag("reason", "failed to create txn");
2457
0
        ret = -1;
2458
0
    }
2459
61
    GetRowsetResponse resp;
2460
61
    std::string msg;
2461
61
    MetaServiceCode code = MetaServiceCode::OK;
2462
    // get rowsets in tablet
2463
61
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2464
61
                        tablet_id, code, msg, &resp);
2465
61
    if (code != MetaServiceCode::OK) {
2466
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2467
0
                .tag("tablet id", tablet_id)
2468
0
                .tag("msg", msg)
2469
0
                .tag("code", code)
2470
0
                .tag("instance id", instance_id_);
2471
0
        ret = -1;
2472
0
    }
2473
61
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
2474
2475
61
    SyncExecutor<int> concurrent_delete_executor(
2476
61
            _thread_pool_group.s3_producer_pool,
2477
61
            fmt::format("delete tablet {} s3 rowset", tablet_id),
2478
62
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
2478
62
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
2479
2480
62
    for (const auto& rs_meta : resp.rowset_meta()) {
2481
62
        recycle_rowsets_number += 1;
2482
62
        recycle_segments_number += rs_meta.num_segments();
2483
62
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2484
62
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2485
62
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2486
62
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2487
62
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2488
62
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2489
62
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2490
2491
62
        concurrent_delete_executor.add([tablet_id, rs_meta_pb = rs_meta, this]() {
2492
62
            std::string rowset_key =
2493
62
                    meta_rowset_key({instance_id_, tablet_id, rs_meta_pb.end_version()});
2494
62
            return recycle_rowset_meta_and_data(rowset_key, rs_meta_pb);
2495
62
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
2491
62
        concurrent_delete_executor.add([tablet_id, rs_meta_pb = rs_meta, this]() {
2492
62
            std::string rowset_key =
2493
62
                    meta_rowset_key({instance_id_, tablet_id, rs_meta_pb.end_version()});
2494
62
            return recycle_rowset_meta_and_data(rowset_key, rs_meta_pb);
2495
62
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clEv
2496
62
    }
2497
2498
61
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
2499
0
        RecycleRowsetPB recycle_rowset;
2500
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
2501
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2502
0
            return -1;
2503
0
        }
2504
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
2505
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
2506
                // in old version, keep this key-value pair and it needs to be checked manually
2507
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2508
0
                return -1;
2509
0
            }
2510
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
2511
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2512
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2513
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
2514
0
                return -1;
2515
0
            }
2516
            // decode rowset_id
2517
0
            auto k1 = k;
2518
0
            k1.remove_prefix(1);
2519
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2520
0
            decode_key(&k1, &out);
2521
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2522
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2523
0
            LOG_INFO("delete rowset data")
2524
0
                    .tag("instance_id", instance_id_)
2525
0
                    .tag("tablet_id", tablet_id)
2526
0
                    .tag("rowset_id", rowset_id);
2527
2528
0
            concurrent_delete_executor.add(
2529
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
2530
                        // delete by prefix, the recycle rowset key will be deleted by range later.
2531
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
2532
0
                    });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
2533
0
        } else {
2534
0
            concurrent_delete_executor.add(
2535
0
                    [k = std::string(k), recycle_rowset = std::move(recycle_rowset), this]() {
2536
0
                        return recycle_rowset_meta_and_data(k, recycle_rowset.rowset_meta());
2537
0
                    });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv
2538
0
        }
2539
0
        return 0;
2540
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
2541
2542
61
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
2543
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
2544
0
                .tag("tablet id", tablet_id)
2545
0
                .tag("instance_id", instance_id_)
2546
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
2547
0
        ret = -1;
2548
0
    }
2549
2550
61
    bool finished = true;
2551
61
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2552
62
    for (int r : rets) {
2553
62
        if (r != 0) {
2554
0
            ret = -1;
2555
0
        }
2556
62
    }
2557
2558
61
    ret = finished ? ret : -1;
2559
2560
61
    if (ret != 0) { // failed recycle tablet data
2561
0
        LOG_WARNING("ret!=0")
2562
0
                .tag("finished", finished)
2563
0
                .tag("ret", ret)
2564
0
                .tag("instance_id", instance_id_)
2565
0
                .tag("tablet_id", tablet_id);
2566
0
        return ret;
2567
0
    }
2568
2569
61
    tablet_metrics_context_.total_recycled_data_size +=
2570
61
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2571
61
    tablet_metrics_context_.total_recycled_num += 1;
2572
61
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
2573
61
    segment_metrics_context_.total_recycled_data_size +=
2574
61
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2575
61
    metrics_context.total_recycled_data_size +=
2576
61
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2577
61
    tablet_metrics_context_.report();
2578
61
    segment_metrics_context_.report();
2579
61
    metrics_context.report();
2580
2581
61
    txn.reset();
2582
61
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2583
0
        LOG_WARNING("failed to recycle tablet ")
2584
0
                .tag("tablet id", tablet_id)
2585
0
                .tag("instance_id", instance_id_)
2586
0
                .tag("reason", "failed to create txn");
2587
0
        ret = -1;
2588
0
    }
2589
    // delete all rowset kv in this tablet
2590
61
    txn->remove(rs_key0, rs_key1);
2591
61
    txn->remove(recyc_rs_key0, recyc_rs_key1);
2592
2593
    // remove delete bitmap for MoW table
2594
61
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
2595
61
    txn->remove(pending_key);
2596
61
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
2597
61
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
2598
61
    txn->remove(delete_bitmap_start, delete_bitmap_end);
2599
2600
61
    TxnErrorCode err = txn->commit();
2601
61
    if (err != TxnErrorCode::TXN_OK) {
2602
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
2603
0
        ret = -1;
2604
0
    }
2605
2606
61
    if (ret == 0) {
2607
        // All object files under tablet have been deleted
2608
61
        std::lock_guard lock(recycled_tablets_mtx_);
2609
61
        recycled_tablets_.insert(tablet_id);
2610
61
    }
2611
2612
61
    return ret;
2613
61
}
2614
2615
14
int InstanceRecycler::recycle_rowsets() {
2616
14
    if (instance_info_.has_multi_version_status() &&
2617
14
        instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) {
2618
1
        return recycle_versioned_rowsets();
2619
1
    }
2620
2621
13
    const std::string task_name = "recycle_rowsets";
2622
13
    int64_t num_scanned = 0;
2623
13
    int64_t num_expired = 0;
2624
13
    int64_t num_prepare = 0;
2625
13
    int64_t num_compacted = 0;
2626
13
    int64_t num_empty_rowset = 0;
2627
13
    size_t total_rowset_key_size = 0;
2628
13
    size_t total_rowset_value_size = 0;
2629
13
    size_t expired_rowset_size = 0;
2630
13
    std::atomic_long num_recycled = 0;
2631
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2632
2633
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
2634
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
2635
13
    std::string recyc_rs_key0;
2636
13
    std::string recyc_rs_key1;
2637
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
2638
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
2639
2640
13
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
2641
2642
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2643
13
    register_recycle_task(task_name, start_time);
2644
2645
13
    DORIS_CLOUD_DEFER {
2646
13
        unregister_recycle_task(task_name);
2647
13
        int64_t cost =
2648
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2649
13
        metrics_context.finish_report();
2650
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
2651
13
                .tag("instance_id", instance_id_)
2652
13
                .tag("num_scanned", num_scanned)
2653
13
                .tag("num_expired", num_expired)
2654
13
                .tag("num_recycled", num_recycled)
2655
13
                .tag("num_recycled.prepare", num_prepare)
2656
13
                .tag("num_recycled.compacted", num_compacted)
2657
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
2658
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2659
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2660
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
2661
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
2645
13
    DORIS_CLOUD_DEFER {
2646
13
        unregister_recycle_task(task_name);
2647
13
        int64_t cost =
2648
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2649
13
        metrics_context.finish_report();
2650
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
2651
13
                .tag("instance_id", instance_id_)
2652
13
                .tag("num_scanned", num_scanned)
2653
13
                .tag("num_expired", num_expired)
2654
13
                .tag("num_recycled", num_recycled)
2655
13
                .tag("num_recycled.prepare", num_prepare)
2656
13
                .tag("num_recycled.compacted", num_compacted)
2657
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
2658
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2659
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2660
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
2661
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
2662
2663
13
    std::vector<std::string> rowset_keys;
2664
    // rowset_id -> rowset_meta
2665
    // store rowset id and meta for statistics rs size when delete
2666
13
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
2667
2668
    // Store keys of rowset recycled by background workers
2669
13
    std::mutex async_recycled_rowset_keys_mutex;
2670
13
    std::vector<std::string> async_recycled_rowset_keys;
2671
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
2672
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
2673
13
    worker_pool->start();
2674
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
2675
900
                                            int64_t tablet_id, const std::string& rowset_id) {
2676
        // Try to delete rowset data in background thread
2677
900
        int ret = worker_pool->submit_with_timeout(
2678
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2679
780
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2680
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2681
0
                        return;
2682
0
                    }
2683
780
                    std::vector<std::string> keys;
2684
780
                    {
2685
780
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2686
780
                        async_recycled_rowset_keys.push_back(std::move(key));
2687
780
                        if (async_recycled_rowset_keys.size() > 100) {
2688
7
                            keys.swap(async_recycled_rowset_keys);
2689
7
                        }
2690
780
                    }
2691
780
                    if (keys.empty()) return;
2692
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
2693
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
2694
0
                                     << instance_id_;
2695
7
                    } else {
2696
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
2697
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
2698
7
                                           num_recycled, start_time);
2699
7
                    }
2700
7
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
2678
780
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2679
780
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2680
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2681
0
                        return;
2682
0
                    }
2683
780
                    std::vector<std::string> keys;
2684
780
                    {
2685
780
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2686
780
                        async_recycled_rowset_keys.push_back(std::move(key));
2687
780
                        if (async_recycled_rowset_keys.size() > 100) {
2688
7
                            keys.swap(async_recycled_rowset_keys);
2689
7
                        }
2690
780
                    }
2691
780
                    if (keys.empty()) return;
2692
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
2693
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
2694
0
                                     << instance_id_;
2695
7
                    } else {
2696
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
2697
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
2698
7
                                           num_recycled, start_time);
2699
7
                    }
2700
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
2701
900
                0);
2702
900
        if (ret == 0) return 0;
2703
        // Submit task failed, delete rowset data in current thread
2704
120
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2705
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2706
0
            return -1;
2707
0
        }
2708
120
        rowset_keys.push_back(std::move(key));
2709
120
        return 0;
2710
120
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
2675
900
                                            int64_t tablet_id, const std::string& rowset_id) {
2676
        // Try to delete rowset data in background thread
2677
900
        int ret = worker_pool->submit_with_timeout(
2678
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2679
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2680
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2681
900
                        return;
2682
900
                    }
2683
900
                    std::vector<std::string> keys;
2684
900
                    {
2685
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2686
900
                        async_recycled_rowset_keys.push_back(std::move(key));
2687
900
                        if (async_recycled_rowset_keys.size() > 100) {
2688
900
                            keys.swap(async_recycled_rowset_keys);
2689
900
                        }
2690
900
                    }
2691
900
                    if (keys.empty()) return;
2692
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
2693
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
2694
900
                                     << instance_id_;
2695
900
                    } else {
2696
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
2697
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
2698
900
                                           num_recycled, start_time);
2699
900
                    }
2700
900
                },
2701
900
                0);
2702
900
        if (ret == 0) return 0;
2703
        // Submit task failed, delete rowset data in current thread
2704
120
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2705
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2706
0
            return -1;
2707
0
        }
2708
120
        rowset_keys.push_back(std::move(key));
2709
120
        return 0;
2710
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
2711
2712
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2713
2714
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
2715
4.00k
        ++num_scanned;
2716
4.00k
        total_rowset_key_size += k.size();
2717
4.00k
        total_rowset_value_size += v.size();
2718
4.00k
        RecycleRowsetPB rowset;
2719
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2720
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2721
0
            return -1;
2722
0
        }
2723
2724
4.00k
        int64_t current_time = ::time(nullptr);
2725
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
2726
2727
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2728
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2729
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2730
4.00k
        if (current_time < expiration) { // not expired
2731
0
            return 0;
2732
0
        }
2733
4.00k
        ++num_expired;
2734
4.00k
        expired_rowset_size += v.size();
2735
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2736
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2737
                // in old version, keep this key-value pair and it needs to be checked manually
2738
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2739
0
                return -1;
2740
0
            }
2741
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2742
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2743
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2744
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2745
0
                rowset_keys.emplace_back(k);
2746
0
                return -1;
2747
0
            }
2748
            // decode rowset_id
2749
250
            auto k1 = k;
2750
250
            k1.remove_prefix(1);
2751
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2752
250
            decode_key(&k1, &out);
2753
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2754
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2755
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2756
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2757
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2758
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2759
0
                return -1;
2760
0
            }
2761
250
            return 0;
2762
250
        }
2763
        // TODO(plat1ko): check rowset not referenced
2764
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2765
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2766
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2767
0
                LOG_INFO("recycle rowset that has empty resource id");
2768
0
            } else {
2769
                // other situations, keep this key-value pair and it needs to be checked manually
2770
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2771
0
                return -1;
2772
0
            }
2773
0
        }
2774
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2775
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2776
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2777
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2778
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2779
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2780
3.75k
                  << " rowset_meta_size=" << v.size()
2781
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2782
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2783
            // unable to calculate file path, can only be deleted by rowset id prefix
2784
650
            num_prepare += 1;
2785
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2786
650
                                             rowset_meta->tablet_id(),
2787
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2788
0
                return -1;
2789
0
            }
2790
3.10k
        } else {
2791
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2792
3.10k
            rowset_keys.emplace_back(k);
2793
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
2794
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
2795
3.10k
                ++num_empty_rowset;
2796
3.10k
            }
2797
3.10k
        }
2798
3.75k
        return 0;
2799
3.75k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2714
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
2715
4.00k
        ++num_scanned;
2716
4.00k
        total_rowset_key_size += k.size();
2717
4.00k
        total_rowset_value_size += v.size();
2718
4.00k
        RecycleRowsetPB rowset;
2719
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2720
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2721
0
            return -1;
2722
0
        }
2723
2724
4.00k
        int64_t current_time = ::time(nullptr);
2725
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
2726
2727
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2728
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2729
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2730
4.00k
        if (current_time < expiration) { // not expired
2731
0
            return 0;
2732
0
        }
2733
4.00k
        ++num_expired;
2734
4.00k
        expired_rowset_size += v.size();
2735
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2736
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2737
                // in old version, keep this key-value pair and it needs to be checked manually
2738
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2739
0
                return -1;
2740
0
            }
2741
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2742
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2743
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2744
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2745
0
                rowset_keys.emplace_back(k);
2746
0
                return -1;
2747
0
            }
2748
            // decode rowset_id
2749
250
            auto k1 = k;
2750
250
            k1.remove_prefix(1);
2751
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2752
250
            decode_key(&k1, &out);
2753
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2754
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2755
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2756
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2757
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2758
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2759
0
                return -1;
2760
0
            }
2761
250
            return 0;
2762
250
        }
2763
        // TODO(plat1ko): check rowset not referenced
2764
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2765
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2766
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2767
0
                LOG_INFO("recycle rowset that has empty resource id");
2768
0
            } else {
2769
                // other situations, keep this key-value pair and it needs to be checked manually
2770
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2771
0
                return -1;
2772
0
            }
2773
0
        }
2774
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2775
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2776
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2777
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2778
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2779
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2780
3.75k
                  << " rowset_meta_size=" << v.size()
2781
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2782
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2783
            // unable to calculate file path, can only be deleted by rowset id prefix
2784
650
            num_prepare += 1;
2785
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2786
650
                                             rowset_meta->tablet_id(),
2787
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2788
0
                return -1;
2789
0
            }
2790
3.10k
        } else {
2791
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2792
3.10k
            rowset_keys.emplace_back(k);
2793
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
2794
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
2795
3.10k
                ++num_empty_rowset;
2796
3.10k
            }
2797
3.10k
        }
2798
3.75k
        return 0;
2799
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
2800
2801
21
    auto loop_done = [&]() -> int {
2802
21
        std::vector<std::string> rowset_keys_to_delete;
2803
        // rowset_id -> rowset_meta
2804
        // store rowset id and meta for statistics rs size when delete
2805
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
2806
21
        rowset_keys_to_delete.swap(rowset_keys);
2807
21
        rowsets_to_delete.swap(rowsets);
2808
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2809
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2810
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
2811
21
                                   metrics_context) != 0) {
2812
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2813
0
                return;
2814
0
            }
2815
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2816
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2817
0
                return;
2818
0
            }
2819
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2820
21
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
2809
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2810
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
2811
21
                                   metrics_context) != 0) {
2812
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2813
0
                return;
2814
0
            }
2815
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2816
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2817
0
                return;
2818
0
            }
2819
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2820
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
2821
21
        return 0;
2822
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
2801
21
    auto loop_done = [&]() -> int {
2802
21
        std::vector<std::string> rowset_keys_to_delete;
2803
        // rowset_id -> rowset_meta
2804
        // store rowset id and meta for statistics rs size when delete
2805
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
2806
21
        rowset_keys_to_delete.swap(rowset_keys);
2807
21
        rowsets_to_delete.swap(rowsets);
2808
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2809
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2810
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
2811
21
                                   metrics_context) != 0) {
2812
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2813
21
                return;
2814
21
            }
2815
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2816
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2817
21
                return;
2818
21
            }
2819
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2820
21
        });
2821
21
        return 0;
2822
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
2823
2824
13
    if (config::enable_recycler_stats_metrics) {
2825
0
        scan_and_statistics_rowsets();
2826
0
    }
2827
    // recycle_func and loop_done for scan and recycle
2828
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
2829
13
                               std::move(loop_done));
2830
2831
13
    worker_pool->stop();
2832
2833
13
    if (!async_recycled_rowset_keys.empty()) {
2834
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
2835
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2836
0
            return -1;
2837
2
        } else {
2838
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
2839
2
        }
2840
2
    }
2841
13
    return ret;
2842
13
}
2843
2844
11
int InstanceRecycler::recycle_restore_jobs() {
2845
11
    const std::string task_name = "recycle_restore_jobs";
2846
11
    int64_t num_scanned = 0;
2847
11
    int64_t num_expired = 0;
2848
11
    int64_t num_recycled = 0;
2849
2850
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2851
2852
11
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
2853
11
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
2854
11
    std::string restore_job_key0;
2855
11
    std::string restore_job_key1;
2856
11
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
2857
11
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
2858
2859
11
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
2860
2861
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2862
11
    register_recycle_task(task_name, start_time);
2863
2864
11
    DORIS_CLOUD_DEFER {
2865
11
        unregister_recycle_task(task_name);
2866
11
        int64_t cost =
2867
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2868
11
        metrics_context.finish_report();
2869
2870
11
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
2871
11
                .tag("instance_id", instance_id_)
2872
11
                .tag("num_scanned", num_scanned)
2873
11
                .tag("num_expired", num_expired)
2874
11
                .tag("num_recycled", num_recycled);
2875
11
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
2864
11
    DORIS_CLOUD_DEFER {
2865
11
        unregister_recycle_task(task_name);
2866
11
        int64_t cost =
2867
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2868
11
        metrics_context.finish_report();
2869
2870
11
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
2871
11
                .tag("instance_id", instance_id_)
2872
11
                .tag("num_scanned", num_scanned)
2873
11
                .tag("num_expired", num_expired)
2874
11
                .tag("num_recycled", num_recycled);
2875
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
2876
2877
11
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2878
2879
11
    std::vector<std::string_view> restore_job_keys;
2880
20
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2881
20
        ++num_scanned;
2882
20
        RestoreJobCloudPB restore_job_pb;
2883
20
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
2884
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2885
0
            return -1;
2886
0
        }
2887
20
        int64_t expiration =
2888
20
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
2889
20
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
2890
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
2891
0
                   << " job expiration=" << restore_job_pb.expiration()
2892
0
                   << " creation_time=" << restore_job_pb.creation_time()
2893
0
                   << " state=" << restore_job_pb.state();
2894
20
        int64_t current_time = ::time(nullptr);
2895
20
        if (current_time < expiration) { // not expired
2896
0
            return 0;
2897
0
        }
2898
20
        ++num_expired;
2899
2900
20
        int64_t tablet_id = restore_job_pb.tablet_id();
2901
20
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
2902
20
                  << " restore_job_pb=" << restore_job_pb.DebugString();
2903
2904
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
2905
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
2906
2907
20
        std::unique_ptr<Transaction> txn;
2908
20
        std::string msg;
2909
20
        MetaServiceCode code = MetaServiceCode::OK;
2910
20
        if (code != MetaServiceCode::OK) {
2911
0
            LOG_WARNING("scan restore job rowsets failed when recycle restore jobs")
2912
0
                    .tag("tablet id", tablet_id)
2913
0
                    .tag("msg", msg)
2914
0
                    .tag("code", code)
2915
0
                    .tag("instance id", instance_id_);
2916
0
            return -1;
2917
0
        }
2918
2919
        // Recycle all data and KV associated with the tablet.
2920
        // This includes rowsets, segments, and related resources.
2921
20
        if (recycle_tablet(tablet_id, metrics_context) != 0) {
2922
0
            LOG_WARNING("failed to recycle tablet")
2923
0
                    .tag("tablet_id", tablet_id)
2924
0
                    .tag("instance_id", instance_id_);
2925
0
            return -1;
2926
20
        } else {
2927
            // Delete restore job rowsets kv only if tablet recycling succeeded
2928
            // to prevent data leak.
2929
20
            TxnErrorCode err = txn_kv_->create_txn(&txn);
2930
20
            if (err != TxnErrorCode::TXN_OK) {
2931
0
                LOG_WARNING("failed to recycle restore job")
2932
0
                        .tag("err", err)
2933
0
                        .tag("tablet id", tablet_id)
2934
0
                        .tag("instance_id", instance_id_)
2935
0
                        .tag("reason", "failed to create txn");
2936
0
                return -1;
2937
0
            }
2938
2939
            // delete all restore job rowset kv
2940
20
            txn->remove(restore_job_rs_key0, restore_job_rs_key1);
2941
2942
20
            err = txn->commit();
2943
20
            if (err != TxnErrorCode::TXN_OK) {
2944
0
                LOG_WARNING("failed to recycle tablet restore job rowset kv")
2945
0
                        .tag("err", err)
2946
0
                        .tag("tablet id", tablet_id)
2947
0
                        .tag("instance_id", instance_id_)
2948
0
                        .tag("reason", "failed to commit txn");
2949
0
                return -1;
2950
0
            }
2951
20
        }
2952
2953
20
        metrics_context.total_recycled_num = ++num_recycled;
2954
20
        metrics_context.report();
2955
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2956
20
        restore_job_keys.push_back(k);
2957
2958
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
2959
20
                  << " tablet_id=" << tablet_id;
2960
20
        return 0;
2961
20
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2880
20
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2881
20
        ++num_scanned;
2882
20
        RestoreJobCloudPB restore_job_pb;
2883
20
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
2884
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2885
0
            return -1;
2886
0
        }
2887
20
        int64_t expiration =
2888
20
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
2889
20
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
2890
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
2891
0
                   << " job expiration=" << restore_job_pb.expiration()
2892
0
                   << " creation_time=" << restore_job_pb.creation_time()
2893
0
                   << " state=" << restore_job_pb.state();
2894
20
        int64_t current_time = ::time(nullptr);
2895
20
        if (current_time < expiration) { // not expired
2896
0
            return 0;
2897
0
        }
2898
20
        ++num_expired;
2899
2900
20
        int64_t tablet_id = restore_job_pb.tablet_id();
2901
20
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
2902
20
                  << " restore_job_pb=" << restore_job_pb.DebugString();
2903
2904
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
2905
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
2906
2907
20
        std::unique_ptr<Transaction> txn;
2908
20
        std::string msg;
2909
20
        MetaServiceCode code = MetaServiceCode::OK;
2910
20
        if (code != MetaServiceCode::OK) {
2911
0
            LOG_WARNING("scan restore job rowsets failed when recycle restore jobs")
2912
0
                    .tag("tablet id", tablet_id)
2913
0
                    .tag("msg", msg)
2914
0
                    .tag("code", code)
2915
0
                    .tag("instance id", instance_id_);
2916
0
            return -1;
2917
0
        }
2918
2919
        // Recycle all data and KV associated with the tablet.
2920
        // This includes rowsets, segments, and related resources.
2921
20
        if (recycle_tablet(tablet_id, metrics_context) != 0) {
2922
0
            LOG_WARNING("failed to recycle tablet")
2923
0
                    .tag("tablet_id", tablet_id)
2924
0
                    .tag("instance_id", instance_id_);
2925
0
            return -1;
2926
20
        } else {
2927
            // Delete restore job rowsets kv only if tablet recycling succeeded
2928
            // to prevent data leak.
2929
20
            TxnErrorCode err = txn_kv_->create_txn(&txn);
2930
20
            if (err != TxnErrorCode::TXN_OK) {
2931
0
                LOG_WARNING("failed to recycle restore job")
2932
0
                        .tag("err", err)
2933
0
                        .tag("tablet id", tablet_id)
2934
0
                        .tag("instance_id", instance_id_)
2935
0
                        .tag("reason", "failed to create txn");
2936
0
                return -1;
2937
0
            }
2938
2939
            // delete all restore job rowset kv
2940
20
            txn->remove(restore_job_rs_key0, restore_job_rs_key1);
2941
2942
20
            err = txn->commit();
2943
20
            if (err != TxnErrorCode::TXN_OK) {
2944
0
                LOG_WARNING("failed to recycle tablet restore job rowset kv")
2945
0
                        .tag("err", err)
2946
0
                        .tag("tablet id", tablet_id)
2947
0
                        .tag("instance_id", instance_id_)
2948
0
                        .tag("reason", "failed to commit txn");
2949
0
                return -1;
2950
0
            }
2951
20
        }
2952
2953
20
        metrics_context.total_recycled_num = ++num_recycled;
2954
20
        metrics_context.report();
2955
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2956
20
        restore_job_keys.push_back(k);
2957
2958
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
2959
20
                  << " tablet_id=" << tablet_id;
2960
20
        return 0;
2961
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
2962
2963
11
    auto loop_done = [&restore_job_keys, this]() -> int {
2964
1
        if (restore_job_keys.empty()) return 0;
2965
1
        DORIS_CLOUD_DEFER {
2966
1
            restore_job_keys.clear();
2967
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2965
1
        DORIS_CLOUD_DEFER {
2966
1
            restore_job_keys.clear();
2967
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
2968
2969
1
        std::unique_ptr<Transaction> txn;
2970
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2971
1
        if (err != TxnErrorCode::TXN_OK) {
2972
0
            LOG_WARNING("failed to recycle restore job")
2973
0
                    .tag("err", err)
2974
0
                    .tag("instance_id", instance_id_)
2975
0
                    .tag("reason", "failed to create txn");
2976
0
            return -1;
2977
0
        }
2978
20
        for (auto& k : restore_job_keys) {
2979
20
            txn->remove(k);
2980
20
        }
2981
1
        err = txn->commit();
2982
1
        if (err != TxnErrorCode::TXN_OK) {
2983
0
            LOG_WARNING("failed to recycle restore job")
2984
0
                    .tag("err", err)
2985
0
                    .tag("instance_id", instance_id_)
2986
0
                    .tag("reason", "failed to commit txn");
2987
0
            return -1;
2988
0
        }
2989
1
        return 0;
2990
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
2963
1
    auto loop_done = [&restore_job_keys, this]() -> int {
2964
1
        if (restore_job_keys.empty()) return 0;
2965
1
        DORIS_CLOUD_DEFER {
2966
1
            restore_job_keys.clear();
2967
1
        };
2968
2969
1
        std::unique_ptr<Transaction> txn;
2970
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2971
1
        if (err != TxnErrorCode::TXN_OK) {
2972
0
            LOG_WARNING("failed to recycle restore job")
2973
0
                    .tag("err", err)
2974
0
                    .tag("instance_id", instance_id_)
2975
0
                    .tag("reason", "failed to create txn");
2976
0
            return -1;
2977
0
        }
2978
20
        for (auto& k : restore_job_keys) {
2979
20
            txn->remove(k);
2980
20
        }
2981
1
        err = txn->commit();
2982
1
        if (err != TxnErrorCode::TXN_OK) {
2983
0
            LOG_WARNING("failed to recycle restore job")
2984
0
                    .tag("err", err)
2985
0
                    .tag("instance_id", instance_id_)
2986
0
                    .tag("reason", "failed to commit txn");
2987
0
            return -1;
2988
0
        }
2989
1
        return 0;
2990
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
2991
2992
11
    if (config::enable_recycler_stats_metrics) {
2993
0
        scan_and_statistics_restore_jobs();
2994
0
    }
2995
2996
11
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
2997
11
                            std::move(loop_done));
2998
11
}
2999
3000
1
int InstanceRecycler::recycle_versioned_rowsets() {
3001
1
    const std::string task_name = "recycle_rowsets";
3002
1
    int64_t num_scanned = 0;
3003
1
    int64_t num_expired = 0;
3004
1
    int64_t num_prepare = 0;
3005
1
    int64_t num_compacted = 0;
3006
1
    int64_t num_empty_rowset = 0;
3007
1
    size_t total_rowset_key_size = 0;
3008
1
    size_t total_rowset_value_size = 0;
3009
1
    size_t expired_rowset_size = 0;
3010
1
    std::atomic_long num_recycled = 0;
3011
1
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3012
3013
1
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
3014
1
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
3015
1
    std::string recyc_rs_key0;
3016
1
    std::string recyc_rs_key1;
3017
1
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
3018
1
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
3019
3020
1
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
3021
3022
1
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3023
1
    register_recycle_task(task_name, start_time);
3024
3025
1
    DORIS_CLOUD_DEFER {
3026
1
        unregister_recycle_task(task_name);
3027
1
        int64_t cost =
3028
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3029
1
        metrics_context.finish_report();
3030
1
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3031
1
                .tag("instance_id", instance_id_)
3032
1
                .tag("num_scanned", num_scanned)
3033
1
                .tag("num_expired", num_expired)
3034
1
                .tag("num_recycled", num_recycled)
3035
1
                .tag("num_recycled.prepare", num_prepare)
3036
1
                .tag("num_recycled.compacted", num_compacted)
3037
1
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3038
1
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3039
1
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3040
1
                .tag("expired_rowset_meta_size", expired_rowset_size);
3041
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
3025
1
    DORIS_CLOUD_DEFER {
3026
1
        unregister_recycle_task(task_name);
3027
1
        int64_t cost =
3028
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3029
1
        metrics_context.finish_report();
3030
1
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3031
1
                .tag("instance_id", instance_id_)
3032
1
                .tag("num_scanned", num_scanned)
3033
1
                .tag("num_expired", num_expired)
3034
1
                .tag("num_recycled", num_recycled)
3035
1
                .tag("num_recycled.prepare", num_prepare)
3036
1
                .tag("num_recycled.compacted", num_compacted)
3037
1
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3038
1
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3039
1
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3040
1
                .tag("expired_rowset_meta_size", expired_rowset_size);
3041
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
3042
3043
1
    std::vector<std::string> orphan_rowset_keys;
3044
3045
    // Store keys of rowset recycled by background workers
3046
1
    std::mutex async_recycled_rowset_keys_mutex;
3047
1
    std::vector<std::string> async_recycled_rowset_keys;
3048
1
    auto worker_pool = std::make_unique<SimpleThreadPool>(
3049
1
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
3050
1
    worker_pool->start();
3051
1
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
3052
1
                                            int64_t tablet_id, const std::string& rowset_id) {
3053
        // Try to delete rowset data in background thread
3054
0
        int ret = worker_pool->submit_with_timeout(
3055
0
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3056
0
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3057
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3058
0
                        return;
3059
0
                    }
3060
                    // The async recycled rowsets are staled format or has not been used,
3061
                    // so we don't need to check the rowset ref count key.
3062
0
                    std::vector<std::string> keys;
3063
0
                    {
3064
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3065
0
                        async_recycled_rowset_keys.push_back(std::move(key));
3066
0
                        if (async_recycled_rowset_keys.size() > 100) {
3067
0
                            keys.swap(async_recycled_rowset_keys);
3068
0
                        }
3069
0
                    }
3070
0
                    if (keys.empty()) return;
3071
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3072
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3073
0
                                     << instance_id_;
3074
0
                    } else {
3075
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3076
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3077
0
                                           num_recycled, start_time);
3078
0
                    }
3079
0
                },
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
3080
0
                0);
3081
0
        if (ret == 0) return 0;
3082
        // Submit task failed, delete rowset data in current thread
3083
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3084
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3085
0
            return -1;
3086
0
        }
3087
0
        orphan_rowset_keys.push_back(std::move(key));
3088
0
        return 0;
3089
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
3090
3091
1
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3092
3093
5
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
3094
5
        ++num_scanned;
3095
5
        total_rowset_key_size += k.size();
3096
5
        total_rowset_value_size += v.size();
3097
5
        RecycleRowsetPB rowset;
3098
5
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3099
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3100
0
            return -1;
3101
0
        }
3102
3103
5
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3104
3105
5
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3106
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
3107
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3108
5
        int64_t current_time = ::time(nullptr);
3109
5
        if (current_time < final_expiration) { // not expired
3110
0
            return 0;
3111
0
        }
3112
5
        ++num_expired;
3113
5
        expired_rowset_size += v.size();
3114
5
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3115
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3116
                // in old version, keep this key-value pair and it needs to be checked manually
3117
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3118
0
                return -1;
3119
0
            }
3120
0
            if (rowset.resource_id().empty()) [[unlikely]] {
3121
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3122
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3123
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3124
0
                orphan_rowset_keys.emplace_back(k);
3125
0
                return -1;
3126
0
            }
3127
            // decode rowset_id
3128
0
            auto k1 = k;
3129
0
            k1.remove_prefix(1);
3130
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3131
0
            decode_key(&k1, &out);
3132
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3133
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3134
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3135
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3136
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3137
0
                                             rowset.tablet_id(), rowset_id) != 0) {
3138
0
                return -1;
3139
0
            }
3140
0
            return 0;
3141
0
        }
3142
        // TODO(plat1ko): check rowset not referenced
3143
5
        auto rowset_meta = rowset.mutable_rowset_meta();
3144
5
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3145
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3146
0
                LOG_INFO("recycle rowset that has empty resource id");
3147
0
            } else {
3148
                // other situations, keep this key-value pair and it needs to be checked manually
3149
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3150
0
                return -1;
3151
0
            }
3152
0
        }
3153
5
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3154
5
                  << " tablet_id=" << rowset_meta->tablet_id()
3155
5
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3156
5
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3157
5
                  << "] txn_id=" << rowset_meta->txn_id()
3158
5
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3159
5
                  << " rowset_meta_size=" << v.size()
3160
5
                  << " creation_time=" << rowset_meta->creation_time();
3161
5
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3162
            // unable to calculate file path, can only be deleted by rowset id prefix
3163
0
            num_prepare += 1;
3164
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3165
0
                                             rowset_meta->tablet_id(),
3166
0
                                             rowset_meta->rowset_id_v2()) != 0) {
3167
0
                return -1;
3168
0
            }
3169
5
        } else {
3170
5
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
3171
5
            worker_pool->submit(
3172
5
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3173
5
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3174
0
                            return;
3175
0
                        }
3176
5
                        num_compacted += is_compacted;
3177
5
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3178
5
                        if (rowset_meta.num_segments() == 0) {
3179
0
                            ++num_empty_rowset;
3180
0
                        }
3181
5
                    });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
3172
5
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3173
5
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3174
0
                            return;
3175
0
                        }
3176
5
                        num_compacted += is_compacted;
3177
5
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3178
5
                        if (rowset_meta.num_segments() == 0) {
3179
0
                            ++num_empty_rowset;
3180
0
                        }
3181
5
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
3182
5
        }
3183
5
        return 0;
3184
5
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3093
5
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
3094
5
        ++num_scanned;
3095
5
        total_rowset_key_size += k.size();
3096
5
        total_rowset_value_size += v.size();
3097
5
        RecycleRowsetPB rowset;
3098
5
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3099
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3100
0
            return -1;
3101
0
        }
3102
3103
5
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3104
3105
5
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3106
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
3107
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3108
5
        int64_t current_time = ::time(nullptr);
3109
5
        if (current_time < final_expiration) { // not expired
3110
0
            return 0;
3111
0
        }
3112
5
        ++num_expired;
3113
5
        expired_rowset_size += v.size();
3114
5
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3115
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3116
                // in old version, keep this key-value pair and it needs to be checked manually
3117
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3118
0
                return -1;
3119
0
            }
3120
0
            if (rowset.resource_id().empty()) [[unlikely]] {
3121
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3122
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3123
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3124
0
                orphan_rowset_keys.emplace_back(k);
3125
0
                return -1;
3126
0
            }
3127
            // decode rowset_id
3128
0
            auto k1 = k;
3129
0
            k1.remove_prefix(1);
3130
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3131
0
            decode_key(&k1, &out);
3132
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3133
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3134
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3135
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3136
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3137
0
                                             rowset.tablet_id(), rowset_id) != 0) {
3138
0
                return -1;
3139
0
            }
3140
0
            return 0;
3141
0
        }
3142
        // TODO(plat1ko): check rowset not referenced
3143
5
        auto rowset_meta = rowset.mutable_rowset_meta();
3144
5
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3145
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3146
0
                LOG_INFO("recycle rowset that has empty resource id");
3147
0
            } else {
3148
                // other situations, keep this key-value pair and it needs to be checked manually
3149
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3150
0
                return -1;
3151
0
            }
3152
0
        }
3153
5
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3154
5
                  << " tablet_id=" << rowset_meta->tablet_id()
3155
5
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3156
5
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3157
5
                  << "] txn_id=" << rowset_meta->txn_id()
3158
5
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3159
5
                  << " rowset_meta_size=" << v.size()
3160
5
                  << " creation_time=" << rowset_meta->creation_time();
3161
5
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3162
            // unable to calculate file path, can only be deleted by rowset id prefix
3163
0
            num_prepare += 1;
3164
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3165
0
                                             rowset_meta->tablet_id(),
3166
0
                                             rowset_meta->rowset_id_v2()) != 0) {
3167
0
                return -1;
3168
0
            }
3169
5
        } else {
3170
5
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
3171
5
            worker_pool->submit(
3172
5
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3173
5
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3174
5
                            return;
3175
5
                        }
3176
5
                        num_compacted += is_compacted;
3177
5
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3178
5
                        if (rowset_meta.num_segments() == 0) {
3179
5
                            ++num_empty_rowset;
3180
5
                        }
3181
5
                    });
3182
5
        }
3183
5
        return 0;
3184
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3185
3186
1
    if (config::enable_recycler_stats_metrics) {
3187
0
        scan_and_statistics_rowsets();
3188
0
    }
3189
3190
1
    auto loop_done = [&]() -> int {
3191
1
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
3192
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3193
0
        }
3194
1
        orphan_rowset_keys.clear();
3195
1
        return 0;
3196
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
3190
1
    auto loop_done = [&]() -> int {
3191
1
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
3192
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3193
0
        }
3194
1
        orphan_rowset_keys.clear();
3195
1
        return 0;
3196
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
3197
3198
    // recycle_func and loop_done for scan and recycle
3199
1
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
3200
1
                               std::move(loop_done));
3201
3202
1
    worker_pool->stop();
3203
3204
1
    if (!async_recycled_rowset_keys.empty()) {
3205
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
3206
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3207
0
            return -1;
3208
0
        } else {
3209
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
3210
0
        }
3211
0
    }
3212
1
    return ret;
3213
1
}
3214
3215
int InstanceRecycler::recycle_rowset_meta_and_data(std::string_view rowset_meta_key,
3216
67
                                                   const RowsetMetaCloudPB& rowset_meta) {
3217
67
    constexpr int MAX_RETRY = 10;
3218
67
    int64_t tablet_id = rowset_meta.tablet_id();
3219
67
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
3220
89
    for (int i = 0; i < MAX_RETRY; ++i) {
3221
89
        std::unique_ptr<Transaction> txn;
3222
89
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3223
89
        if (err != TxnErrorCode::TXN_OK) {
3224
0
            LOG_WARNING("failed to create txn").tag("err", err);
3225
0
            return -1;
3226
0
        }
3227
3228
89
        std::string rowset_ref_count_key =
3229
89
                versioned::data_rowset_ref_count_key({instance_id_, tablet_id, rowset_id});
3230
89
        int64_t ref_count = 0;
3231
89
        {
3232
89
            std::string value;
3233
89
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
3234
89
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3235
                // This is the old version rowset, we could recycle it directly.
3236
3
                ref_count = 1;
3237
86
            } else if (err != TxnErrorCode::TXN_OK) {
3238
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
3239
0
                return -1;
3240
86
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
3241
0
                LOG_WARNING("failed to decode rowset data ref count")
3242
0
                        .tag("key", hex(rowset_meta_key))
3243
0
                        .tag("rowset_id", rowset_id)
3244
0
                        .tag("tablet_id", tablet_id)
3245
0
                        .tag("value", hex(value));
3246
0
                return -1;
3247
0
            }
3248
89
        };
3249
3250
89
        if (ref_count == 1) {
3251
            // It would not be added since it is recycling.
3252
13
            if (delete_rowset_data(rowset_meta) != 0) {
3253
0
                LOG_WARNING("failed to delete rowset data")
3254
0
                        .tag("tablet_id", tablet_id)
3255
0
                        .tag("rowset_id", rowset_id)
3256
0
                        .tag("key", hex(rowset_meta_key));
3257
0
                return -1;
3258
0
            }
3259
3260
            // Reset the transaction to avoid timeout.
3261
13
            err = txn_kv_->create_txn(&txn);
3262
13
            if (err != TxnErrorCode::TXN_OK) {
3263
0
                LOG_WARNING("failed to create txn").tag("err", err);
3264
0
                return -1;
3265
0
            }
3266
13
            txn->remove(rowset_ref_count_key);
3267
13
            LOG_INFO("delete rowset data ref count key")
3268
13
                    .tag("txn_id", rowset_meta.txn_id())
3269
13
                    .tag("key", hex(rowset_meta_key))
3270
13
                    .tag("tablet_id", tablet_id)
3271
13
                    .tag("rowset_id", rowset_id);
3272
76
        } else {
3273
            // Decrease the rowset ref count.
3274
            //
3275
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
3276
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
3277
76
            txn->atomic_add(rowset_ref_count_key, -1);
3278
76
            LOG_INFO("decrease rowset data ref count")
3279
76
                    .tag("txn_id", rowset_meta.txn_id())
3280
76
                    .tag("key", hex(rowset_meta_key))
3281
76
                    .tag("tablet_id", tablet_id)
3282
76
                    .tag("rowset_id", rowset_id)
3283
76
                    .tag("ref_count", ref_count - 1);
3284
76
        }
3285
3286
89
        txn->remove(rowset_meta_key);
3287
89
        err = txn->commit();
3288
89
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
3289
            // The rowset ref count key has been changed, we need to retry.
3290
22
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
3291
0
                       << "key=" << hex(rowset_meta_key) << " tablet_id=" << tablet_id
3292
0
                       << " rowset_id=" << rowset_id << ", ref_count=" << ref_count
3293
0
                       << ", retry=" << i;
3294
22
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
3295
22
            continue;
3296
67
        } else if (err != TxnErrorCode::TXN_OK) {
3297
0
            LOG_WARNING("failed to recycle rowset meta and data")
3298
0
                    .tag("key", hex(rowset_meta_key))
3299
0
                    .tag("err", err);
3300
0
            return -1;
3301
0
        }
3302
67
        LOG_INFO("recycle rowset meta and data success")
3303
67
                .tag("key", hex(rowset_meta_key))
3304
67
                .tag("tablet_id", tablet_id)
3305
67
                .tag("rowset_id", rowset_id);
3306
67
        return 0;
3307
89
    }
3308
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
3309
0
            .tag("key", hex(rowset_meta_key))
3310
0
            .tag("tablet_id", tablet_id)
3311
0
            .tag("rowset_id", rowset_id)
3312
0
            .tag("retry", MAX_RETRY);
3313
0
    return -1;
3314
67
}
3315
3316
18
int InstanceRecycler::recycle_tmp_rowsets() {
3317
18
    const std::string task_name = "recycle_tmp_rowsets";
3318
18
    int64_t num_scanned = 0;
3319
18
    int64_t num_expired = 0;
3320
18
    int64_t num_recycled = 0;
3321
18
    size_t expired_rowset_size = 0;
3322
18
    size_t total_rowset_key_size = 0;
3323
18
    size_t total_rowset_value_size = 0;
3324
18
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3325
3326
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
3327
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
3328
18
    std::string tmp_rs_key0;
3329
18
    std::string tmp_rs_key1;
3330
18
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
3331
18
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
3332
3333
18
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
3334
3335
18
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3336
18
    register_recycle_task(task_name, start_time);
3337
3338
18
    DORIS_CLOUD_DEFER {
3339
18
        unregister_recycle_task(task_name);
3340
18
        int64_t cost =
3341
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3342
18
        metrics_context.finish_report();
3343
18
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
3344
18
                .tag("instance_id", instance_id_)
3345
18
                .tag("num_scanned", num_scanned)
3346
18
                .tag("num_expired", num_expired)
3347
18
                .tag("num_recycled", num_recycled)
3348
18
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3349
18
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3350
18
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
3351
18
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
3338
14
    DORIS_CLOUD_DEFER {
3339
14
        unregister_recycle_task(task_name);
3340
14
        int64_t cost =
3341
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3342
14
        metrics_context.finish_report();
3343
14
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
3344
14
                .tag("instance_id", instance_id_)
3345
14
                .tag("num_scanned", num_scanned)
3346
14
                .tag("num_expired", num_expired)
3347
14
                .tag("num_recycled", num_recycled)
3348
14
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3349
14
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3350
14
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
3351
14
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
3338
4
    DORIS_CLOUD_DEFER {
3339
4
        unregister_recycle_task(task_name);
3340
4
        int64_t cost =
3341
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3342
4
        metrics_context.finish_report();
3343
4
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
3344
4
                .tag("instance_id", instance_id_)
3345
4
                .tag("num_scanned", num_scanned)
3346
4
                .tag("num_expired", num_expired)
3347
4
                .tag("num_recycled", num_recycled)
3348
4
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3349
4
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3350
4
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
3351
4
    };
3352
3353
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
3354
18
    std::vector<std::string_view> tmp_rowset_keys;
3355
18
    std::vector<std::string> tmp_rowset_ref_count_keys;
3356
    // rowset_id -> rowset_meta
3357
    // store tmp_rowset id and meta for statistics rs size when delete
3358
18
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
3359
3360
18
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3361
3362
18
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
3363
18
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
3364
18
                             &earlest_ts, &tmp_rowset_ref_count_keys,
3365
3.05k
                             this](std::string_view k, std::string_view v) -> int {
3366
3.05k
        ++num_scanned;
3367
3.05k
        total_rowset_key_size += k.size();
3368
3.05k
        total_rowset_value_size += v.size();
3369
3.05k
        doris::RowsetMetaCloudPB rowset;
3370
3.05k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3371
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
3372
0
            return -1;
3373
0
        }
3374
3.05k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3375
3.05k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3376
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3377
0
                   << " txn_expiration=" << rowset.txn_expiration()
3378
0
                   << " rowset_creation_time=" << rowset.creation_time();
3379
3.05k
        int64_t current_time = ::time(nullptr);
3380
3.05k
        if (current_time < expiration) { // not expired
3381
0
            return 0;
3382
0
        }
3383
3384
3.05k
        DCHECK_GT(rowset.txn_id(), 0)
3385
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
3386
3.05k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
3387
10
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
3388
10
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
3389
10
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
3390
10
                      << rowset.start_version() << '-' << rowset.end_version()
3391
10
                      << "] txn_id=" << rowset.txn_id()
3392
10
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
3393
10
                      << " txn_expiration=" << rowset.txn_expiration();
3394
10
            return 0;
3395
10
        }
3396
3397
3.04k
        ++num_expired;
3398
3.04k
        expired_rowset_size += v.size();
3399
3.04k
        if (!rowset.has_resource_id()) {
3400
20
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
3401
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
3402
0
                return -1;
3403
0
            }
3404
            // might be a delete pred rowset
3405
20
            tmp_rowset_keys.push_back(k);
3406
20
            return 0;
3407
20
        }
3408
        // TODO(plat1ko): check rowset not referenced
3409
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3410
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
3411
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
3412
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
3413
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
3414
3.02k
                  << " num_expired=" << num_expired;
3415
3416
3.02k
        tmp_rowset_keys.push_back(k);
3417
        // Remove the rowset ref count key directly since it has not been used.
3418
3.02k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
3419
3.02k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
3420
3.02k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
3421
3.02k
                  << "key=" << hex(rowset_ref_count_key);
3422
3.02k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
3423
3424
3.02k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
3425
3.02k
        return 0;
3426
3.04k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3365
3.02k
                             this](std::string_view k, std::string_view v) -> int {
3366
3.02k
        ++num_scanned;
3367
3.02k
        total_rowset_key_size += k.size();
3368
3.02k
        total_rowset_value_size += v.size();
3369
3.02k
        doris::RowsetMetaCloudPB rowset;
3370
3.02k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3371
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
3372
0
            return -1;
3373
0
        }
3374
3.02k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3375
3.02k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3376
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3377
0
                   << " txn_expiration=" << rowset.txn_expiration()
3378
0
                   << " rowset_creation_time=" << rowset.creation_time();
3379
3.02k
        int64_t current_time = ::time(nullptr);
3380
3.02k
        if (current_time < expiration) { // not expired
3381
0
            return 0;
3382
0
        }
3383
3384
3.02k
        DCHECK_GT(rowset.txn_id(), 0)
3385
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
3386
3.02k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
3387
0
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
3388
0
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
3389
0
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
3390
0
                      << rowset.start_version() << '-' << rowset.end_version()
3391
0
                      << "] txn_id=" << rowset.txn_id()
3392
0
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
3393
0
                      << " txn_expiration=" << rowset.txn_expiration();
3394
0
            return 0;
3395
0
        }
3396
3397
3.02k
        ++num_expired;
3398
3.02k
        expired_rowset_size += v.size();
3399
3.02k
        if (!rowset.has_resource_id()) {
3400
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
3401
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
3402
0
                return -1;
3403
0
            }
3404
            // might be a delete pred rowset
3405
0
            tmp_rowset_keys.push_back(k);
3406
0
            return 0;
3407
0
        }
3408
        // TODO(plat1ko): check rowset not referenced
3409
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3410
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
3411
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
3412
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
3413
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
3414
3.02k
                  << " num_expired=" << num_expired;
3415
3416
3.02k
        tmp_rowset_keys.push_back(k);
3417
        // Remove the rowset ref count key directly since it has not been used.
3418
3.02k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
3419
3.02k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
3420
3.02k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
3421
3.02k
                  << "key=" << hex(rowset_ref_count_key);
3422
3.02k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
3423
3424
3.02k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
3425
3.02k
        return 0;
3426
3.02k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3365
30
                             this](std::string_view k, std::string_view v) -> int {
3366
30
        ++num_scanned;
3367
30
        total_rowset_key_size += k.size();
3368
30
        total_rowset_value_size += v.size();
3369
30
        doris::RowsetMetaCloudPB rowset;
3370
30
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3371
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
3372
0
            return -1;
3373
0
        }
3374
30
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3375
30
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3376
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3377
0
                   << " txn_expiration=" << rowset.txn_expiration()
3378
0
                   << " rowset_creation_time=" << rowset.creation_time();
3379
30
        int64_t current_time = ::time(nullptr);
3380
30
        if (current_time < expiration) { // not expired
3381
0
            return 0;
3382
0
        }
3383
3384
30
        DCHECK_GT(rowset.txn_id(), 0)
3385
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
3386
30
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
3387
10
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
3388
10
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
3389
10
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
3390
10
                      << rowset.start_version() << '-' << rowset.end_version()
3391
10
                      << "] txn_id=" << rowset.txn_id()
3392
10
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
3393
10
                      << " txn_expiration=" << rowset.txn_expiration();
3394
10
            return 0;
3395
10
        }
3396
3397
20
        ++num_expired;
3398
20
        expired_rowset_size += v.size();
3399
20
        if (!rowset.has_resource_id()) {
3400
20
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
3401
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
3402
0
                return -1;
3403
0
            }
3404
            // might be a delete pred rowset
3405
20
            tmp_rowset_keys.push_back(k);
3406
20
            return 0;
3407
20
        }
3408
        // TODO(plat1ko): check rowset not referenced
3409
0
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3410
0
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
3411
0
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
3412
0
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
3413
0
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
3414
0
                  << " num_expired=" << num_expired;
3415
3416
0
        tmp_rowset_keys.push_back(k);
3417
        // Remove the rowset ref count key directly since it has not been used.
3418
0
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
3419
0
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
3420
0
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
3421
0
                  << "key=" << hex(rowset_ref_count_key);
3422
0
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
3423
3424
0
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
3425
0
        return 0;
3426
20
    };
3427
3428
18
    auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, &metrics_context,
3429
18
                      &tmp_rowset_ref_count_keys, this]() -> int {
3430
6
        DORIS_CLOUD_DEFER {
3431
6
            tmp_rowset_keys.clear();
3432
6
            tmp_rowsets.clear();
3433
6
            tmp_rowset_ref_count_keys.clear();
3434
6
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3430
3
        DORIS_CLOUD_DEFER {
3431
3
            tmp_rowset_keys.clear();
3432
3
            tmp_rowsets.clear();
3433
3
            tmp_rowset_ref_count_keys.clear();
3434
3
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3430
3
        DORIS_CLOUD_DEFER {
3431
3
            tmp_rowset_keys.clear();
3432
3
            tmp_rowsets.clear();
3433
3
            tmp_rowset_ref_count_keys.clear();
3434
3
        };
3435
6
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET, metrics_context) !=
3436
6
            0) {
3437
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3438
0
            return -1;
3439
0
        }
3440
6
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
3441
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
3442
0
            return -1;
3443
0
        }
3444
6
        if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys) != 0) {
3445
0
            LOG(WARNING) << "failed to delete tmp rowset ref count kv, instance_id="
3446
0
                         << instance_id_;
3447
0
            return -1;
3448
0
        }
3449
6
        num_recycled += tmp_rowset_keys.size();
3450
6
        return 0;
3451
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
3429
3
                      &tmp_rowset_ref_count_keys, this]() -> int {
3430
3
        DORIS_CLOUD_DEFER {
3431
3
            tmp_rowset_keys.clear();
3432
3
            tmp_rowsets.clear();
3433
3
            tmp_rowset_ref_count_keys.clear();
3434
3
        };
3435
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET, metrics_context) !=
3436
3
            0) {
3437
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3438
0
            return -1;
3439
0
        }
3440
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
3441
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
3442
0
            return -1;
3443
0
        }
3444
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys) != 0) {
3445
0
            LOG(WARNING) << "failed to delete tmp rowset ref count kv, instance_id="
3446
0
                         << instance_id_;
3447
0
            return -1;
3448
0
        }
3449
3
        num_recycled += tmp_rowset_keys.size();
3450
3
        return 0;
3451
3
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
3429
3
                      &tmp_rowset_ref_count_keys, this]() -> int {
3430
3
        DORIS_CLOUD_DEFER {
3431
3
            tmp_rowset_keys.clear();
3432
3
            tmp_rowsets.clear();
3433
3
            tmp_rowset_ref_count_keys.clear();
3434
3
        };
3435
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET, metrics_context) !=
3436
3
            0) {
3437
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3438
0
            return -1;
3439
0
        }
3440
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
3441
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
3442
0
            return -1;
3443
0
        }
3444
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys) != 0) {
3445
0
            LOG(WARNING) << "failed to delete tmp rowset ref count kv, instance_id="
3446
0
                         << instance_id_;
3447
0
            return -1;
3448
0
        }
3449
3
        num_recycled += tmp_rowset_keys.size();
3450
3
        return 0;
3451
3
    };
3452
3453
18
    if (config::enable_recycler_stats_metrics) {
3454
0
        scan_and_statistics_tmp_rowsets();
3455
0
    }
3456
    // recycle_func and loop_done for scan and recycle
3457
18
    return scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
3458
18
                            std::move(loop_done));
3459
18
}
3460
3461
int InstanceRecycler::scan_and_recycle(
3462
        std::string begin, std::string_view end,
3463
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
3464
274
        std::function<int()> loop_done) {
3465
274
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
3466
274
    int ret = 0;
3467
274
    int64_t cnt = 0;
3468
274
    int get_range_retried = 0;
3469
274
    std::string err;
3470
274
    DORIS_CLOUD_DEFER_COPY(begin, end) {
3471
274
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
3472
274
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
3473
274
                  << " ret=" << ret << " err=" << err;
3474
274
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
3470
255
    DORIS_CLOUD_DEFER_COPY(begin, end) {
3471
255
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
3472
255
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
3473
255
                  << " ret=" << ret << " err=" << err;
3474
255
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
3470
19
    DORIS_CLOUD_DEFER_COPY(begin, end) {
3471
19
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
3472
19
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
3473
19
                  << " ret=" << ret << " err=" << err;
3474
19
    };
3475
3476
274
    std::unique_ptr<RangeGetIterator> it;
3477
297
    do {
3478
297
        if (get_range_retried > 1000) {
3479
0
            err = "txn_get exceeds max retry, may not scan all keys";
3480
0
            ret = -1;
3481
0
            return -1;
3482
0
        }
3483
297
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
3484
297
        if (get_ret != 0) { // txn kv may complain "Request for future version"
3485
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
3486
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
3487
0
                         << " get_range_retried=" << get_range_retried;
3488
0
            ++get_range_retried;
3489
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
3490
0
            continue; // try again
3491
0
        }
3492
297
        if (!it->has_next()) {
3493
177
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
3494
177
            break; // scan finished
3495
177
        }
3496
37.5k
        while (it->has_next()) {
3497
37.4k
            ++cnt;
3498
            // recycle corresponding resources
3499
37.4k
            auto [k, v] = it->next();
3500
37.4k
            if (!it->has_next()) {
3501
120
                begin = k;
3502
120
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
3503
120
            }
3504
            // if we want to continue scanning, the recycle_func should not return non-zero
3505
37.4k
            if (recycle_func(k, v) != 0) {
3506
22
                err = "recycle_func error";
3507
22
                ret = -1;
3508
22
            }
3509
37.4k
        }
3510
120
        begin.push_back('\x00'); // Update to next smallest key for iteration
3511
        // if we want to continue scanning, the recycle_func should not return non-zero
3512
120
        if (loop_done && loop_done() != 0) {
3513
2
            err = "loop_done error";
3514
2
            ret = -1;
3515
2
        }
3516
120
    } while (it->more() && !stopped());
3517
274
    return ret;
3518
274
}
3519
3520
20
int InstanceRecycler::abort_timeout_txn() {
3521
20
    const std::string task_name = "abort_timeout_txn";
3522
20
    int64_t num_scanned = 0;
3523
20
    int64_t num_timeout = 0;
3524
20
    int64_t num_abort = 0;
3525
20
    int64_t num_advance = 0;
3526
20
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3527
3528
20
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
3529
20
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
3530
20
    std::string begin_txn_running_key;
3531
20
    std::string end_txn_running_key;
3532
20
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
3533
20
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
3534
3535
20
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
3536
3537
20
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3538
20
    register_recycle_task(task_name, start_time);
3539
3540
20
    DORIS_CLOUD_DEFER {
3541
20
        unregister_recycle_task(task_name);
3542
20
        int64_t cost =
3543
20
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3544
20
        metrics_context.finish_report();
3545
20
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
3546
20
                .tag("instance_id", instance_id_)
3547
20
                .tag("num_scanned", num_scanned)
3548
20
                .tag("num_timeout", num_timeout)
3549
20
                .tag("num_abort", num_abort)
3550
20
                .tag("num_advance", num_advance);
3551
20
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
3540
16
    DORIS_CLOUD_DEFER {
3541
16
        unregister_recycle_task(task_name);
3542
16
        int64_t cost =
3543
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3544
16
        metrics_context.finish_report();
3545
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
3546
16
                .tag("instance_id", instance_id_)
3547
16
                .tag("num_scanned", num_scanned)
3548
16
                .tag("num_timeout", num_timeout)
3549
16
                .tag("num_abort", num_abort)
3550
16
                .tag("num_advance", num_advance);
3551
16
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
3540
4
    DORIS_CLOUD_DEFER {
3541
4
        unregister_recycle_task(task_name);
3542
4
        int64_t cost =
3543
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3544
4
        metrics_context.finish_report();
3545
4
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
3546
4
                .tag("instance_id", instance_id_)
3547
4
                .tag("num_scanned", num_scanned)
3548
4
                .tag("num_timeout", num_timeout)
3549
4
                .tag("num_abort", num_abort)
3550
4
                .tag("num_advance", num_advance);
3551
4
    };
3552
3553
20
    int64_t current_time =
3554
20
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3555
3556
20
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
3557
20
                                  &current_time, &metrics_context,
3558
20
                                  this](std::string_view k, std::string_view v) -> int {
3559
10
        ++num_scanned;
3560
3561
10
        std::unique_ptr<Transaction> txn;
3562
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3563
10
        if (err != TxnErrorCode::TXN_OK) {
3564
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3565
0
            return -1;
3566
0
        }
3567
10
        std::string_view k1 = k;
3568
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
3569
10
        k1.remove_prefix(1); // Remove key space
3570
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3571
10
        if (decode_key(&k1, &out) != 0) {
3572
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
3573
0
            return -1;
3574
0
        }
3575
10
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3576
10
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3577
10
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3578
        // Update txn_info
3579
10
        std::string txn_inf_key, txn_inf_val;
3580
10
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
3581
10
        err = txn->get(txn_inf_key, &txn_inf_val);
3582
10
        if (err != TxnErrorCode::TXN_OK) {
3583
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
3584
0
            return -1;
3585
0
        }
3586
10
        TxnInfoPB txn_info;
3587
10
        if (!txn_info.ParseFromString(txn_inf_val)) {
3588
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
3589
0
            return -1;
3590
0
        }
3591
3592
10
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
3593
4
            txn.reset();
3594
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
3595
4
            std::shared_ptr<TxnLazyCommitTask> task =
3596
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
3597
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
3598
4
            if (ret.first != MetaServiceCode::OK) {
3599
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
3600
0
                             << "msg=" << ret.second;
3601
0
                return -1;
3602
0
            }
3603
4
            ++num_advance;
3604
4
            return 0;
3605
6
        } else {
3606
6
            TxnRunningPB txn_running_pb;
3607
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
3608
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
3609
0
                return -1;
3610
0
            }
3611
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
3612
4
                return 0;
3613
4
            }
3614
2
            ++num_timeout;
3615
3616
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
3617
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
3618
2
            txn_info.set_finish_time(current_time);
3619
2
            txn_info.set_reason("timeout");
3620
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
3621
2
            txn_inf_val.clear();
3622
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
3623
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
3624
0
                return -1;
3625
0
            }
3626
2
            txn->put(txn_inf_key, txn_inf_val);
3627
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
3628
            // Put recycle txn key
3629
2
            std::string recyc_txn_key, recyc_txn_val;
3630
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
3631
2
            RecycleTxnPB recycle_txn_pb;
3632
2
            recycle_txn_pb.set_creation_time(current_time);
3633
2
            recycle_txn_pb.set_label(txn_info.label());
3634
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
3635
0
                LOG_WARNING("failed to serialize txn recycle info")
3636
0
                        .tag("key", hex(k))
3637
0
                        .tag("db_id", db_id)
3638
0
                        .tag("txn_id", txn_id);
3639
0
                return -1;
3640
0
            }
3641
2
            txn->put(recyc_txn_key, recyc_txn_val);
3642
            // Remove txn running key
3643
2
            txn->remove(k);
3644
2
            err = txn->commit();
3645
2
            if (err != TxnErrorCode::TXN_OK) {
3646
0
                LOG_WARNING("failed to commit txn err={}", err)
3647
0
                        .tag("key", hex(k))
3648
0
                        .tag("db_id", db_id)
3649
0
                        .tag("txn_id", txn_id);
3650
0
                return -1;
3651
0
            }
3652
2
            metrics_context.total_recycled_num = ++num_abort;
3653
2
            metrics_context.report();
3654
2
        }
3655
3656
2
        return 0;
3657
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3558
6
                                  this](std::string_view k, std::string_view v) -> int {
3559
6
        ++num_scanned;
3560
3561
6
        std::unique_ptr<Transaction> txn;
3562
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3563
6
        if (err != TxnErrorCode::TXN_OK) {
3564
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3565
0
            return -1;
3566
0
        }
3567
6
        std::string_view k1 = k;
3568
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
3569
6
        k1.remove_prefix(1); // Remove key space
3570
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3571
6
        if (decode_key(&k1, &out) != 0) {
3572
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
3573
0
            return -1;
3574
0
        }
3575
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3576
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3577
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3578
        // Update txn_info
3579
6
        std::string txn_inf_key, txn_inf_val;
3580
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
3581
6
        err = txn->get(txn_inf_key, &txn_inf_val);
3582
6
        if (err != TxnErrorCode::TXN_OK) {
3583
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
3584
0
            return -1;
3585
0
        }
3586
6
        TxnInfoPB txn_info;
3587
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
3588
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
3589
0
            return -1;
3590
0
        }
3591
3592
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
3593
0
            txn.reset();
3594
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
3595
0
            std::shared_ptr<TxnLazyCommitTask> task =
3596
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
3597
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
3598
0
            if (ret.first != MetaServiceCode::OK) {
3599
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
3600
0
                             << "msg=" << ret.second;
3601
0
                return -1;
3602
0
            }
3603
0
            ++num_advance;
3604
0
            return 0;
3605
6
        } else {
3606
6
            TxnRunningPB txn_running_pb;
3607
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
3608
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
3609
0
                return -1;
3610
0
            }
3611
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
3612
4
                return 0;
3613
4
            }
3614
2
            ++num_timeout;
3615
3616
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
3617
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
3618
2
            txn_info.set_finish_time(current_time);
3619
2
            txn_info.set_reason("timeout");
3620
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
3621
2
            txn_inf_val.clear();
3622
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
3623
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
3624
0
                return -1;
3625
0
            }
3626
2
            txn->put(txn_inf_key, txn_inf_val);
3627
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
3628
            // Put recycle txn key
3629
2
            std::string recyc_txn_key, recyc_txn_val;
3630
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
3631
2
            RecycleTxnPB recycle_txn_pb;
3632
2
            recycle_txn_pb.set_creation_time(current_time);
3633
2
            recycle_txn_pb.set_label(txn_info.label());
3634
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
3635
0
                LOG_WARNING("failed to serialize txn recycle info")
3636
0
                        .tag("key", hex(k))
3637
0
                        .tag("db_id", db_id)
3638
0
                        .tag("txn_id", txn_id);
3639
0
                return -1;
3640
0
            }
3641
2
            txn->put(recyc_txn_key, recyc_txn_val);
3642
            // Remove txn running key
3643
2
            txn->remove(k);
3644
2
            err = txn->commit();
3645
2
            if (err != TxnErrorCode::TXN_OK) {
3646
0
                LOG_WARNING("failed to commit txn err={}", err)
3647
0
                        .tag("key", hex(k))
3648
0
                        .tag("db_id", db_id)
3649
0
                        .tag("txn_id", txn_id);
3650
0
                return -1;
3651
0
            }
3652
2
            metrics_context.total_recycled_num = ++num_abort;
3653
2
            metrics_context.report();
3654
2
        }
3655
3656
2
        return 0;
3657
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3558
4
                                  this](std::string_view k, std::string_view v) -> int {
3559
4
        ++num_scanned;
3560
3561
4
        std::unique_ptr<Transaction> txn;
3562
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3563
4
        if (err != TxnErrorCode::TXN_OK) {
3564
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3565
0
            return -1;
3566
0
        }
3567
4
        std::string_view k1 = k;
3568
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
3569
4
        k1.remove_prefix(1); // Remove key space
3570
4
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3571
4
        if (decode_key(&k1, &out) != 0) {
3572
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
3573
0
            return -1;
3574
0
        }
3575
4
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3576
4
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3577
4
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3578
        // Update txn_info
3579
4
        std::string txn_inf_key, txn_inf_val;
3580
4
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
3581
4
        err = txn->get(txn_inf_key, &txn_inf_val);
3582
4
        if (err != TxnErrorCode::TXN_OK) {
3583
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
3584
0
            return -1;
3585
0
        }
3586
4
        TxnInfoPB txn_info;
3587
4
        if (!txn_info.ParseFromString(txn_inf_val)) {
3588
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
3589
0
            return -1;
3590
0
        }
3591
3592
4
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
3593
4
            txn.reset();
3594
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
3595
4
            std::shared_ptr<TxnLazyCommitTask> task =
3596
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
3597
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
3598
4
            if (ret.first != MetaServiceCode::OK) {
3599
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
3600
0
                             << "msg=" << ret.second;
3601
0
                return -1;
3602
0
            }
3603
4
            ++num_advance;
3604
4
            return 0;
3605
4
        } else {
3606
0
            TxnRunningPB txn_running_pb;
3607
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
3608
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
3609
0
                return -1;
3610
0
            }
3611
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
3612
0
                return 0;
3613
0
            }
3614
0
            ++num_timeout;
3615
3616
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
3617
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
3618
0
            txn_info.set_finish_time(current_time);
3619
0
            txn_info.set_reason("timeout");
3620
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
3621
0
            txn_inf_val.clear();
3622
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
3623
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
3624
0
                return -1;
3625
0
            }
3626
0
            txn->put(txn_inf_key, txn_inf_val);
3627
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
3628
            // Put recycle txn key
3629
0
            std::string recyc_txn_key, recyc_txn_val;
3630
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
3631
0
            RecycleTxnPB recycle_txn_pb;
3632
0
            recycle_txn_pb.set_creation_time(current_time);
3633
0
            recycle_txn_pb.set_label(txn_info.label());
3634
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
3635
0
                LOG_WARNING("failed to serialize txn recycle info")
3636
0
                        .tag("key", hex(k))
3637
0
                        .tag("db_id", db_id)
3638
0
                        .tag("txn_id", txn_id);
3639
0
                return -1;
3640
0
            }
3641
0
            txn->put(recyc_txn_key, recyc_txn_val);
3642
            // Remove txn running key
3643
0
            txn->remove(k);
3644
0
            err = txn->commit();
3645
0
            if (err != TxnErrorCode::TXN_OK) {
3646
0
                LOG_WARNING("failed to commit txn err={}", err)
3647
0
                        .tag("key", hex(k))
3648
0
                        .tag("db_id", db_id)
3649
0
                        .tag("txn_id", txn_id);
3650
0
                return -1;
3651
0
            }
3652
0
            metrics_context.total_recycled_num = ++num_abort;
3653
0
            metrics_context.report();
3654
0
        }
3655
3656
0
        return 0;
3657
4
    };
3658
3659
20
    if (config::enable_recycler_stats_metrics) {
3660
0
        scan_and_statistics_abort_timeout_txn();
3661
0
    }
3662
    // recycle_func and loop_done for scan and recycle
3663
20
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
3664
20
                            std::move(handle_txn_running_kv));
3665
20
}
3666
3667
19
int InstanceRecycler::recycle_expired_txn_label() {
3668
19
    const std::string task_name = "recycle_expired_txn_label";
3669
19
    int64_t num_scanned = 0;
3670
19
    int64_t num_expired = 0;
3671
19
    int64_t num_recycled = 0;
3672
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3673
19
    int ret = 0;
3674
3675
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
3676
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
3677
19
    std::string begin_recycle_txn_key;
3678
19
    std::string end_recycle_txn_key;
3679
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
3680
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
3681
19
    std::vector<std::string> recycle_txn_info_keys;
3682
3683
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
3684
3685
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3686
19
    register_recycle_task(task_name, start_time);
3687
19
    DORIS_CLOUD_DEFER {
3688
19
        unregister_recycle_task(task_name);
3689
19
        int64_t cost =
3690
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3691
19
        metrics_context.finish_report();
3692
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
3693
19
                .tag("instance_id", instance_id_)
3694
19
                .tag("num_scanned", num_scanned)
3695
19
                .tag("num_expired", num_expired)
3696
19
                .tag("num_recycled", num_recycled);
3697
19
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
3687
16
    DORIS_CLOUD_DEFER {
3688
16
        unregister_recycle_task(task_name);
3689
16
        int64_t cost =
3690
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3691
16
        metrics_context.finish_report();
3692
16
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
3693
16
                .tag("instance_id", instance_id_)
3694
16
                .tag("num_scanned", num_scanned)
3695
16
                .tag("num_expired", num_expired)
3696
16
                .tag("num_recycled", num_recycled);
3697
16
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
3687
3
    DORIS_CLOUD_DEFER {
3688
3
        unregister_recycle_task(task_name);
3689
3
        int64_t cost =
3690
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3691
3
        metrics_context.finish_report();
3692
3
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
3693
3
                .tag("instance_id", instance_id_)
3694
3
                .tag("num_scanned", num_scanned)
3695
3
                .tag("num_expired", num_expired)
3696
3
                .tag("num_recycled", num_recycled);
3697
3
    };
3698
3699
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3700
3701
19
    SyncExecutor<int> concurrent_delete_executor(
3702
19
            _thread_pool_group.s3_producer_pool,
3703
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
3704
23.0k
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
3704
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
3704
3
            [](const int& ret) { return ret != 0; });
3705
3706
19
    int64_t current_time_ms =
3707
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3708
3709
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
3710
30.0k
        ++num_scanned;
3711
30.0k
        RecycleTxnPB recycle_txn_pb;
3712
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
3713
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
3714
0
            return -1;
3715
0
        }
3716
30.0k
        if ((config::force_immediate_recycle) ||
3717
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
3718
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
3719
30.0k
             current_time_ms)) {
3720
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
3721
23.0k
            num_expired++;
3722
23.0k
            recycle_txn_info_keys.emplace_back(k);
3723
23.0k
        }
3724
30.0k
        return 0;
3725
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3709
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
3710
30.0k
        ++num_scanned;
3711
30.0k
        RecycleTxnPB recycle_txn_pb;
3712
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
3713
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
3714
0
            return -1;
3715
0
        }
3716
30.0k
        if ((config::force_immediate_recycle) ||
3717
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
3718
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
3719
30.0k
             current_time_ms)) {
3720
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
3721
23.0k
            num_expired++;
3722
23.0k
            recycle_txn_info_keys.emplace_back(k);
3723
23.0k
        }
3724
30.0k
        return 0;
3725
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3709
3
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
3710
3
        ++num_scanned;
3711
3
        RecycleTxnPB recycle_txn_pb;
3712
3
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
3713
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
3714
0
            return -1;
3715
0
        }
3716
3
        if ((config::force_immediate_recycle) ||
3717
3
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
3718
3
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
3719
3
             current_time_ms)) {
3720
3
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
3721
3
            num_expired++;
3722
3
            recycle_txn_info_keys.emplace_back(k);
3723
3
        }
3724
3
        return 0;
3725
3
    };
3726
3727
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
3728
23.0k
        std::string_view k1 = k;
3729
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
3730
23.0k
        k1.remove_prefix(1); // Remove key space
3731
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3732
23.0k
        int ret = decode_key(&k1, &out);
3733
23.0k
        if (ret != 0) {
3734
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
3735
0
            return -1;
3736
0
        }
3737
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3738
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3739
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3740
23.0k
        std::unique_ptr<Transaction> txn;
3741
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3742
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3743
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3744
0
            return -1;
3745
0
        }
3746
        // Remove txn index kv
3747
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
3748
23.0k
        txn->remove(index_key);
3749
        // Remove txn info kv
3750
23.0k
        std::string info_key, info_val;
3751
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
3752
23.0k
        err = txn->get(info_key, &info_val);
3753
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3754
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
3755
0
            return -1;
3756
0
        }
3757
23.0k
        TxnInfoPB txn_info;
3758
23.0k
        if (!txn_info.ParseFromString(info_val)) {
3759
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
3760
0
            return -1;
3761
0
        }
3762
23.0k
        txn->remove(info_key);
3763
        // Remove sub txn index kvs
3764
23.0k
        std::vector<std::string> sub_txn_index_keys;
3765
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
3766
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
3767
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
3768
22.9k
        }
3769
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
3770
22.9k
            txn->remove(sub_txn_index_key);
3771
22.9k
        }
3772
        // Update txn label
3773
23.0k
        std::string label_key, label_val;
3774
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
3775
23.0k
        err = txn->get(label_key, &label_val);
3776
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3777
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
3778
0
                         << " err=" << err;
3779
0
            return -1;
3780
0
        }
3781
23.0k
        TxnLabelPB txn_label;
3782
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
3783
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
3784
0
            return -1;
3785
0
        }
3786
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
3787
23.0k
        if (it != txn_label.txn_ids().end()) {
3788
23.0k
            txn_label.mutable_txn_ids()->erase(it);
3789
23.0k
        }
3790
23.0k
        if (txn_label.txn_ids().empty()) {
3791
23.0k
            txn->remove(label_key);
3792
23.0k
        } else {
3793
0
            if (!txn_label.SerializeToString(&label_val)) {
3794
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
3795
0
                return -1;
3796
0
            }
3797
0
            txn->atomic_set_ver_value(label_key, label_val);
3798
0
        }
3799
        // Remove recycle txn kv
3800
23.0k
        txn->remove(k);
3801
23.0k
        err = txn->commit();
3802
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3803
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
3804
0
            return -1;
3805
0
        }
3806
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
3807
23.0k
        metrics_context.report();
3808
3809
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
3810
23.0k
        return 0;
3811
23.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3727
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
3728
23.0k
        std::string_view k1 = k;
3729
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
3730
23.0k
        k1.remove_prefix(1); // Remove key space
3731
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3732
23.0k
        int ret = decode_key(&k1, &out);
3733
23.0k
        if (ret != 0) {
3734
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
3735
0
            return -1;
3736
0
        }
3737
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3738
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3739
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3740
23.0k
        std::unique_ptr<Transaction> txn;
3741
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3742
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3743
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3744
0
            return -1;
3745
0
        }
3746
        // Remove txn index kv
3747
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
3748
23.0k
        txn->remove(index_key);
3749
        // Remove txn info kv
3750
23.0k
        std::string info_key, info_val;
3751
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
3752
23.0k
        err = txn->get(info_key, &info_val);
3753
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3754
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
3755
0
            return -1;
3756
0
        }
3757
23.0k
        TxnInfoPB txn_info;
3758
23.0k
        if (!txn_info.ParseFromString(info_val)) {
3759
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
3760
0
            return -1;
3761
0
        }
3762
23.0k
        txn->remove(info_key);
3763
        // Remove sub txn index kvs
3764
23.0k
        std::vector<std::string> sub_txn_index_keys;
3765
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
3766
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
3767
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
3768
22.9k
        }
3769
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
3770
22.9k
            txn->remove(sub_txn_index_key);
3771
22.9k
        }
3772
        // Update txn label
3773
23.0k
        std::string label_key, label_val;
3774
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
3775
23.0k
        err = txn->get(label_key, &label_val);
3776
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3777
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
3778
0
                         << " err=" << err;
3779
0
            return -1;
3780
0
        }
3781
23.0k
        TxnLabelPB txn_label;
3782
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
3783
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
3784
0
            return -1;
3785
0
        }
3786
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
3787
23.0k
        if (it != txn_label.txn_ids().end()) {
3788
23.0k
            txn_label.mutable_txn_ids()->erase(it);
3789
23.0k
        }
3790
23.0k
        if (txn_label.txn_ids().empty()) {
3791
23.0k
            txn->remove(label_key);
3792
23.0k
        } else {
3793
0
            if (!txn_label.SerializeToString(&label_val)) {
3794
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
3795
0
                return -1;
3796
0
            }
3797
0
            txn->atomic_set_ver_value(label_key, label_val);
3798
0
        }
3799
        // Remove recycle txn kv
3800
23.0k
        txn->remove(k);
3801
23.0k
        err = txn->commit();
3802
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3803
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
3804
0
            return -1;
3805
0
        }
3806
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
3807
23.0k
        metrics_context.report();
3808
3809
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
3810
23.0k
        return 0;
3811
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3727
3
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
3728
3
        std::string_view k1 = k;
3729
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
3730
3
        k1.remove_prefix(1); // Remove key space
3731
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3732
3
        int ret = decode_key(&k1, &out);
3733
3
        if (ret != 0) {
3734
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
3735
0
            return -1;
3736
0
        }
3737
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3738
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3739
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3740
3
        std::unique_ptr<Transaction> txn;
3741
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3742
3
        if (err != TxnErrorCode::TXN_OK) {
3743
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3744
0
            return -1;
3745
0
        }
3746
        // Remove txn index kv
3747
3
        auto index_key = txn_index_key({instance_id_, txn_id});
3748
3
        txn->remove(index_key);
3749
        // Remove txn info kv
3750
3
        std::string info_key, info_val;
3751
3
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
3752
3
        err = txn->get(info_key, &info_val);
3753
3
        if (err != TxnErrorCode::TXN_OK) {
3754
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
3755
0
            return -1;
3756
0
        }
3757
3
        TxnInfoPB txn_info;
3758
3
        if (!txn_info.ParseFromString(info_val)) {
3759
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
3760
0
            return -1;
3761
0
        }
3762
3
        txn->remove(info_key);
3763
        // Remove sub txn index kvs
3764
3
        std::vector<std::string> sub_txn_index_keys;
3765
3
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
3766
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
3767
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
3768
0
        }
3769
3
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
3770
0
            txn->remove(sub_txn_index_key);
3771
0
        }
3772
        // Update txn label
3773
3
        std::string label_key, label_val;
3774
3
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
3775
3
        err = txn->get(label_key, &label_val);
3776
3
        if (err != TxnErrorCode::TXN_OK) {
3777
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
3778
0
                         << " err=" << err;
3779
0
            return -1;
3780
0
        }
3781
3
        TxnLabelPB txn_label;
3782
3
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
3783
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
3784
0
            return -1;
3785
0
        }
3786
3
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
3787
3
        if (it != txn_label.txn_ids().end()) {
3788
3
            txn_label.mutable_txn_ids()->erase(it);
3789
3
        }
3790
3
        if (txn_label.txn_ids().empty()) {
3791
3
            txn->remove(label_key);
3792
3
        } else {
3793
0
            if (!txn_label.SerializeToString(&label_val)) {
3794
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
3795
0
                return -1;
3796
0
            }
3797
0
            txn->atomic_set_ver_value(label_key, label_val);
3798
0
        }
3799
        // Remove recycle txn kv
3800
3
        txn->remove(k);
3801
3
        err = txn->commit();
3802
3
        if (err != TxnErrorCode::TXN_OK) {
3803
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
3804
0
            return -1;
3805
0
        }
3806
3
        metrics_context.total_recycled_num = ++num_recycled;
3807
3
        metrics_context.report();
3808
3809
3
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
3810
3
        return 0;
3811
3
    };
3812
3813
19
    auto loop_done = [&]() -> int {
3814
10
        DORIS_CLOUD_DEFER {
3815
10
            recycle_txn_info_keys.clear();
3816
10
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3814
7
        DORIS_CLOUD_DEFER {
3815
7
            recycle_txn_info_keys.clear();
3816
7
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3814
3
        DORIS_CLOUD_DEFER {
3815
3
            recycle_txn_info_keys.clear();
3816
3
        };
3817
10
        TEST_SYNC_POINT_CALLBACK(
3818
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
3819
10
                &recycle_txn_info_keys);
3820
23.0k
        for (const auto& k : recycle_txn_info_keys) {
3821
23.0k
            concurrent_delete_executor.add([&]() {
3822
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
3823
0
                    LOG_WARNING("failed to delete recycle txn kv")
3824
0
                            .tag("instance id", instance_id_)
3825
0
                            .tag("key", hex(k));
3826
0
                    return -1;
3827
0
                }
3828
23.0k
                return 0;
3829
23.0k
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
3821
23.0k
            concurrent_delete_executor.add([&]() {
3822
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
3823
0
                    LOG_WARNING("failed to delete recycle txn kv")
3824
0
                            .tag("instance id", instance_id_)
3825
0
                            .tag("key", hex(k));
3826
0
                    return -1;
3827
0
                }
3828
23.0k
                return 0;
3829
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
3821
3
            concurrent_delete_executor.add([&]() {
3822
3
                if (delete_recycle_txn_kv(k) != 0) {
3823
0
                    LOG_WARNING("failed to delete recycle txn kv")
3824
0
                            .tag("instance id", instance_id_)
3825
0
                            .tag("key", hex(k));
3826
0
                    return -1;
3827
0
                }
3828
3
                return 0;
3829
3
            });
3830
23.0k
        }
3831
10
        bool finished = true;
3832
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3833
23.0k
        for (int r : rets) {
3834
23.0k
            if (r != 0) {
3835
0
                ret = -1;
3836
0
            }
3837
23.0k
        }
3838
3839
10
        ret = finished ? ret : -1;
3840
3841
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
3842
3843
10
        if (ret != 0) {
3844
2
            LOG_WARNING("recycle txn kv ret!=0")
3845
2
                    .tag("finished", finished)
3846
2
                    .tag("ret", ret)
3847
2
                    .tag("instance_id", instance_id_);
3848
2
            return ret;
3849
2
        }
3850
8
        return ret;
3851
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
3813
7
    auto loop_done = [&]() -> int {
3814
7
        DORIS_CLOUD_DEFER {
3815
7
            recycle_txn_info_keys.clear();
3816
7
        };
3817
7
        TEST_SYNC_POINT_CALLBACK(
3818
7
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
3819
7
                &recycle_txn_info_keys);
3820
23.0k
        for (const auto& k : recycle_txn_info_keys) {
3821
23.0k
            concurrent_delete_executor.add([&]() {
3822
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
3823
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
3824
23.0k
                            .tag("instance id", instance_id_)
3825
23.0k
                            .tag("key", hex(k));
3826
23.0k
                    return -1;
3827
23.0k
                }
3828
23.0k
                return 0;
3829
23.0k
            });
3830
23.0k
        }
3831
7
        bool finished = true;
3832
7
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3833
23.0k
        for (int r : rets) {
3834
23.0k
            if (r != 0) {
3835
0
                ret = -1;
3836
0
            }
3837
23.0k
        }
3838
3839
7
        ret = finished ? ret : -1;
3840
3841
7
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
3842
3843
7
        if (ret != 0) {
3844
2
            LOG_WARNING("recycle txn kv ret!=0")
3845
2
                    .tag("finished", finished)
3846
2
                    .tag("ret", ret)
3847
2
                    .tag("instance_id", instance_id_);
3848
2
            return ret;
3849
2
        }
3850
5
        return ret;
3851
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
3813
3
    auto loop_done = [&]() -> int {
3814
3
        DORIS_CLOUD_DEFER {
3815
3
            recycle_txn_info_keys.clear();
3816
3
        };
3817
3
        TEST_SYNC_POINT_CALLBACK(
3818
3
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
3819
3
                &recycle_txn_info_keys);
3820
3
        for (const auto& k : recycle_txn_info_keys) {
3821
3
            concurrent_delete_executor.add([&]() {
3822
3
                if (delete_recycle_txn_kv(k) != 0) {
3823
3
                    LOG_WARNING("failed to delete recycle txn kv")
3824
3
                            .tag("instance id", instance_id_)
3825
3
                            .tag("key", hex(k));
3826
3
                    return -1;
3827
3
                }
3828
3
                return 0;
3829
3
            });
3830
3
        }
3831
3
        bool finished = true;
3832
3
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3833
3
        for (int r : rets) {
3834
3
            if (r != 0) {
3835
0
                ret = -1;
3836
0
            }
3837
3
        }
3838
3839
3
        ret = finished ? ret : -1;
3840
3841
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
3842
3843
3
        if (ret != 0) {
3844
0
            LOG_WARNING("recycle txn kv ret!=0")
3845
0
                    .tag("finished", finished)
3846
0
                    .tag("ret", ret)
3847
0
                    .tag("instance_id", instance_id_);
3848
0
            return ret;
3849
0
        }
3850
3
        return ret;
3851
3
    };
3852
3853
19
    if (config::enable_recycler_stats_metrics) {
3854
0
        scan_and_statistics_expired_txn_label();
3855
0
    }
3856
    // recycle_func and loop_done for scan and recycle
3857
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
3858
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
3859
19
}
3860
3861
struct CopyJobIdTuple {
3862
    std::string instance_id;
3863
    std::string stage_id;
3864
    long table_id;
3865
    std::string copy_id;
3866
    std::string stage_path;
3867
};
3868
struct BatchObjStoreAccessor {
3869
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
3870
                          TxnKv* txn_kv)
3871
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
3872
3
    ~BatchObjStoreAccessor() {
3873
3
        if (!paths_.empty()) {
3874
3
            consume();
3875
3
        }
3876
3
    }
3877
3878
    /**
3879
    * To implicitely do batch work and submit the batch delete task to s3
3880
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
3881
    *
3882
    * @param copy_job The protubuf struct consists of the copy job files.
3883
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
3884
    *            it would last until we finish the delete task, here we need pass one string value
3885
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
3886
    */
3887
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
3888
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
3889
5
        auto& file_keys = copy_file_keys_[key];
3890
5
        file_keys.log_trace =
3891
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
3892
5
                            instance_id, stage_id, table_id, copy_id, path);
3893
5
        std::string_view log_trace = file_keys.log_trace;
3894
2.03k
        for (const auto& file : copy_job.object_files()) {
3895
2.03k
            auto relative_path = file.relative_path();
3896
2.03k
            paths_.push_back(relative_path);
3897
2.03k
            file_keys.keys.push_back(copy_file_key(
3898
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
3899
2.03k
            LOG_INFO(log_trace)
3900
2.03k
                    .tag("relative_path", relative_path)
3901
2.03k
                    .tag("batch_count", batch_count_);
3902
2.03k
        }
3903
5
        LOG_INFO(log_trace)
3904
5
                .tag("objects_num", copy_job.object_files().size())
3905
5
                .tag("batch_count", batch_count_);
3906
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
3907
        // recommend using delete objects when objects num is less than 10)
3908
5
        if (paths_.size() < 1000) {
3909
3
            return;
3910
3
        }
3911
2
        consume();
3912
2
    }
3913
3914
private:
3915
5
    void consume() {
3916
5
        DORIS_CLOUD_DEFER {
3917
5
            paths_.clear();
3918
5
            copy_file_keys_.clear();
3919
5
            batch_count_++;
3920
3921
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
3922
5
                        batch_count_);
3923
5
        };
3924
3925
5
        StopWatch sw;
3926
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
3927
5
        if (0 != accessor_->delete_files(paths_)) {
3928
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
3929
2
                        paths_.size(), batch_count_, sw.elapsed_us());
3930
2
            return;
3931
2
        }
3932
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
3933
3
                    paths_.size(), batch_count_, sw.elapsed_us());
3934
        // delete fdb's keys
3935
3
        for (auto& file_keys : copy_file_keys_) {
3936
3
            auto& [log_trace, keys] = file_keys.second;
3937
3
            std::unique_ptr<Transaction> txn;
3938
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
3939
0
                LOG(WARNING) << "failed to create txn";
3940
0
                continue;
3941
0
            }
3942
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
3943
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
3944
            // limited, should not cause the txn commit failed.
3945
1.02k
            for (const auto& key : keys) {
3946
1.02k
                txn->remove(key);
3947
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
3948
1.02k
            }
3949
3
            txn->remove(file_keys.first);
3950
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
3951
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
3952
0
                continue;
3953
0
            }
3954
3
        }
3955
3
    }
3956
    std::shared_ptr<StorageVaultAccessor> accessor_;
3957
    // the path of the s3 files to be deleted
3958
    std::vector<std::string> paths_;
3959
    struct CopyFiles {
3960
        std::string log_trace;
3961
        std::vector<std::string> keys;
3962
    };
3963
    // pair<std::string, std::vector<std::string>>
3964
    // first: instance_id_ stage_id table_id query_id
3965
    // second: keys to be deleted
3966
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
3967
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
3968
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
3969
    // which can together uniquely identifies different tasks for tracing log
3970
    uint64_t& batch_count_;
3971
    TxnKv* txn_kv_;
3972
};
3973
3974
13
int InstanceRecycler::recycle_copy_jobs() {
3975
13
    int64_t num_scanned = 0;
3976
13
    int64_t num_finished = 0;
3977
13
    int64_t num_expired = 0;
3978
13
    int64_t num_recycled = 0;
3979
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
3980
13
    uint64_t batch_count = 0;
3981
13
    const std::string task_name = "recycle_copy_jobs";
3982
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3983
3984
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
3985
3986
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3987
13
    register_recycle_task(task_name, start_time);
3988
3989
13
    DORIS_CLOUD_DEFER {
3990
13
        unregister_recycle_task(task_name);
3991
13
        int64_t cost =
3992
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3993
13
        metrics_context.finish_report();
3994
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
3995
13
                .tag("instance_id", instance_id_)
3996
13
                .tag("num_scanned", num_scanned)
3997
13
                .tag("num_finished", num_finished)
3998
13
                .tag("num_expired", num_expired)
3999
13
                .tag("num_recycled", num_recycled);
4000
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
3989
13
    DORIS_CLOUD_DEFER {
3990
13
        unregister_recycle_task(task_name);
3991
13
        int64_t cost =
3992
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3993
13
        metrics_context.finish_report();
3994
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
3995
13
                .tag("instance_id", instance_id_)
3996
13
                .tag("num_scanned", num_scanned)
3997
13
                .tag("num_finished", num_finished)
3998
13
                .tag("num_expired", num_expired)
3999
13
                .tag("num_recycled", num_recycled);
4000
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
4001
4002
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
4003
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
4004
13
    std::string key0;
4005
13
    std::string key1;
4006
13
    copy_job_key(key_info0, &key0);
4007
13
    copy_job_key(key_info1, &key1);
4008
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
4009
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
4010
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
4011
16
                         this](std::string_view k, std::string_view v) -> int {
4012
16
        ++num_scanned;
4013
16
        CopyJobPB copy_job;
4014
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
4015
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
4016
0
            return -1;
4017
0
        }
4018
4019
        // decode copy job key
4020
16
        auto k1 = k;
4021
16
        k1.remove_prefix(1);
4022
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4023
16
        decode_key(&k1, &out);
4024
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
4025
        // -> CopyJobPB
4026
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
4027
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
4028
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
4029
4030
16
        bool check_storage = true;
4031
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4032
12
            ++num_finished;
4033
4034
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
4035
7
                auto it = stage_accessor_map.find(stage_id);
4036
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
4037
7
                std::string_view path;
4038
7
                if (it != stage_accessor_map.end()) {
4039
2
                    accessor = it->second;
4040
5
                } else {
4041
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
4042
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
4043
5
                                                      &inner_accessor);
4044
5
                    if (ret < 0) { // error
4045
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
4046
0
                        return -1;
4047
5
                    } else if (ret == 0) {
4048
3
                        path = inner_accessor->uri();
4049
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
4050
3
                                inner_accessor, batch_count, txn_kv_.get());
4051
3
                        stage_accessor_map.emplace(stage_id, accessor);
4052
3
                    } else { // stage not found, skip check storage
4053
2
                        check_storage = false;
4054
2
                    }
4055
5
                }
4056
7
                if (check_storage) {
4057
                    // TODO delete objects with key and etag is not supported
4058
5
                    accessor->add(std::move(copy_job), std::string(k),
4059
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
4060
5
                    return 0;
4061
5
                }
4062
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
4063
5
                int64_t current_time =
4064
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4065
5
                if (copy_job.finish_time_ms() > 0) {
4066
2
                    if (!config::force_immediate_recycle &&
4067
2
                        current_time < copy_job.finish_time_ms() +
4068
2
                                               config::copy_job_max_retention_second * 1000) {
4069
1
                        return 0;
4070
1
                    }
4071
3
                } else {
4072
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
4073
3
                    if (!config::force_immediate_recycle &&
4074
3
                        current_time < copy_job.start_time_ms() +
4075
3
                                               config::copy_job_max_retention_second * 1000) {
4076
1
                        return 0;
4077
1
                    }
4078
3
                }
4079
5
            }
4080
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4081
4
            int64_t current_time =
4082
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4083
            // if copy job is timeout: delete all copy file kvs and copy job kv
4084
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4085
2
                return 0;
4086
2
            }
4087
2
            ++num_expired;
4088
2
        }
4089
4090
        // delete all copy files
4091
7
        std::vector<std::string> copy_file_keys;
4092
70
        for (auto& file : copy_job.object_files()) {
4093
70
            copy_file_keys.push_back(copy_file_key(
4094
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
4095
70
        }
4096
7
        std::unique_ptr<Transaction> txn;
4097
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4098
0
            LOG(WARNING) << "failed to create txn";
4099
0
            return -1;
4100
0
        }
4101
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4102
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4103
        // limited, should not cause the txn commit failed.
4104
70
        for (const auto& key : copy_file_keys) {
4105
70
            txn->remove(key);
4106
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
4107
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
4108
70
                      << ", query_id=" << copy_id;
4109
70
        }
4110
7
        txn->remove(k);
4111
7
        TxnErrorCode err = txn->commit();
4112
7
        if (err != TxnErrorCode::TXN_OK) {
4113
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
4114
0
            return -1;
4115
0
        }
4116
4117
7
        metrics_context.total_recycled_num = ++num_recycled;
4118
7
        metrics_context.report();
4119
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4120
7
        return 0;
4121
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4011
16
                         this](std::string_view k, std::string_view v) -> int {
4012
16
        ++num_scanned;
4013
16
        CopyJobPB copy_job;
4014
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
4015
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
4016
0
            return -1;
4017
0
        }
4018
4019
        // decode copy job key
4020
16
        auto k1 = k;
4021
16
        k1.remove_prefix(1);
4022
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4023
16
        decode_key(&k1, &out);
4024
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
4025
        // -> CopyJobPB
4026
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
4027
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
4028
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
4029
4030
16
        bool check_storage = true;
4031
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4032
12
            ++num_finished;
4033
4034
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
4035
7
                auto it = stage_accessor_map.find(stage_id);
4036
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
4037
7
                std::string_view path;
4038
7
                if (it != stage_accessor_map.end()) {
4039
2
                    accessor = it->second;
4040
5
                } else {
4041
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
4042
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
4043
5
                                                      &inner_accessor);
4044
5
                    if (ret < 0) { // error
4045
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
4046
0
                        return -1;
4047
5
                    } else if (ret == 0) {
4048
3
                        path = inner_accessor->uri();
4049
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
4050
3
                                inner_accessor, batch_count, txn_kv_.get());
4051
3
                        stage_accessor_map.emplace(stage_id, accessor);
4052
3
                    } else { // stage not found, skip check storage
4053
2
                        check_storage = false;
4054
2
                    }
4055
5
                }
4056
7
                if (check_storage) {
4057
                    // TODO delete objects with key and etag is not supported
4058
5
                    accessor->add(std::move(copy_job), std::string(k),
4059
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
4060
5
                    return 0;
4061
5
                }
4062
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
4063
5
                int64_t current_time =
4064
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4065
5
                if (copy_job.finish_time_ms() > 0) {
4066
2
                    if (!config::force_immediate_recycle &&
4067
2
                        current_time < copy_job.finish_time_ms() +
4068
2
                                               config::copy_job_max_retention_second * 1000) {
4069
1
                        return 0;
4070
1
                    }
4071
3
                } else {
4072
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
4073
3
                    if (!config::force_immediate_recycle &&
4074
3
                        current_time < copy_job.start_time_ms() +
4075
3
                                               config::copy_job_max_retention_second * 1000) {
4076
1
                        return 0;
4077
1
                    }
4078
3
                }
4079
5
            }
4080
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4081
4
            int64_t current_time =
4082
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4083
            // if copy job is timeout: delete all copy file kvs and copy job kv
4084
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4085
2
                return 0;
4086
2
            }
4087
2
            ++num_expired;
4088
2
        }
4089
4090
        // delete all copy files
4091
7
        std::vector<std::string> copy_file_keys;
4092
70
        for (auto& file : copy_job.object_files()) {
4093
70
            copy_file_keys.push_back(copy_file_key(
4094
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
4095
70
        }
4096
7
        std::unique_ptr<Transaction> txn;
4097
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4098
0
            LOG(WARNING) << "failed to create txn";
4099
0
            return -1;
4100
0
        }
4101
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4102
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4103
        // limited, should not cause the txn commit failed.
4104
70
        for (const auto& key : copy_file_keys) {
4105
70
            txn->remove(key);
4106
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
4107
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
4108
70
                      << ", query_id=" << copy_id;
4109
70
        }
4110
7
        txn->remove(k);
4111
7
        TxnErrorCode err = txn->commit();
4112
7
        if (err != TxnErrorCode::TXN_OK) {
4113
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
4114
0
            return -1;
4115
0
        }
4116
4117
7
        metrics_context.total_recycled_num = ++num_recycled;
4118
7
        metrics_context.report();
4119
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4120
7
        return 0;
4121
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
4122
4123
13
    if (config::enable_recycler_stats_metrics) {
4124
0
        scan_and_statistics_copy_jobs();
4125
0
    }
4126
    // recycle_func and loop_done for scan and recycle
4127
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
4128
13
}
4129
4130
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
4131
                                             const StagePB::StageType& stage_type,
4132
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
4133
5
#ifdef UNIT_TEST
4134
    // In unit test, external use the same accessor as the internal stage
4135
5
    auto it = accessor_map_.find(stage_id);
4136
5
    if (it != accessor_map_.end()) {
4137
3
        *accessor = it->second;
4138
3
    } else {
4139
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
4140
2
        return 1;
4141
2
    }
4142
#else
4143
    // init s3 accessor and add to accessor map
4144
    auto stage_it =
4145
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
4146
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
4147
4148
    if (stage_it == instance_info_.stages().end()) {
4149
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
4150
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
4151
        return 1;
4152
    }
4153
4154
    const auto& object_store_info = stage_it->obj_info();
4155
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
4156
4157
    S3Conf s3_conf;
4158
    if (stage_type == StagePB::EXTERNAL) {
4159
        if (stage_access_type == StagePB::AKSK) {
4160
            auto conf = S3Conf::from_obj_store_info(object_store_info);
4161
            if (!conf) {
4162
                return -1;
4163
            }
4164
4165
            s3_conf = std::move(*conf);
4166
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
4167
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
4168
            if (!conf) {
4169
                return -1;
4170
            }
4171
4172
            s3_conf = std::move(*conf);
4173
            if (instance_info_.ram_user().has_encryption_info()) {
4174
                AkSkPair plain_ak_sk_pair;
4175
                int ret = decrypt_ak_sk_helper(
4176
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
4177
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
4178
                if (ret != 0) {
4179
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
4180
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
4181
                    return -1;
4182
                }
4183
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
4184
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
4185
            } else {
4186
                s3_conf.ak = instance_info_.ram_user().ak();
4187
                s3_conf.sk = instance_info_.ram_user().sk();
4188
            }
4189
        } else {
4190
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
4191
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
4192
            return -1;
4193
        }
4194
    } else if (stage_type == StagePB::INTERNAL) {
4195
        int idx = stoi(object_store_info.id());
4196
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4197
            LOG(WARNING) << "invalid idx: " << idx;
4198
            return -1;
4199
        }
4200
4201
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
4202
        auto conf = S3Conf::from_obj_store_info(old_obj);
4203
        if (!conf) {
4204
            return -1;
4205
        }
4206
4207
        s3_conf = std::move(*conf);
4208
        s3_conf.prefix = object_store_info.prefix();
4209
    } else {
4210
        LOG(WARNING) << "unknown stage type " << stage_type;
4211
        return -1;
4212
    }
4213
4214
    std::shared_ptr<S3Accessor> s3_accessor;
4215
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
4216
    if (ret != 0) {
4217
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
4218
        return -1;
4219
    }
4220
4221
    *accessor = std::move(s3_accessor);
4222
#endif
4223
3
    return 0;
4224
5
}
4225
4226
11
int InstanceRecycler::recycle_stage() {
4227
11
    int64_t num_scanned = 0;
4228
11
    int64_t num_recycled = 0;
4229
11
    const std::string task_name = "recycle_stage";
4230
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4231
4232
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
4233
4234
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4235
11
    register_recycle_task(task_name, start_time);
4236
4237
11
    DORIS_CLOUD_DEFER {
4238
11
        unregister_recycle_task(task_name);
4239
11
        int64_t cost =
4240
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4241
11
        metrics_context.finish_report();
4242
11
        LOG_WARNING("recycle stage, cost={}s", cost)
4243
11
                .tag("instance_id", instance_id_)
4244
11
                .tag("num_scanned", num_scanned)
4245
11
                .tag("num_recycled", num_recycled);
4246
11
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
4237
11
    DORIS_CLOUD_DEFER {
4238
11
        unregister_recycle_task(task_name);
4239
11
        int64_t cost =
4240
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4241
11
        metrics_context.finish_report();
4242
11
        LOG_WARNING("recycle stage, cost={}s", cost)
4243
11
                .tag("instance_id", instance_id_)
4244
11
                .tag("num_scanned", num_scanned)
4245
11
                .tag("num_recycled", num_recycled);
4246
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
4247
4248
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
4249
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
4250
11
    std::string key0 = recycle_stage_key(key_info0);
4251
11
    std::string key1 = recycle_stage_key(key_info1);
4252
4253
11
    std::vector<std::string_view> stage_keys;
4254
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
4255
11
                         this](std::string_view k, std::string_view v) -> int {
4256
1
        ++num_scanned;
4257
1
        RecycleStagePB recycle_stage;
4258
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
4259
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
4260
0
            return -1;
4261
0
        }
4262
4263
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
4264
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4265
0
            LOG(WARNING) << "invalid idx: " << idx;
4266
0
            return -1;
4267
0
        }
4268
4269
1
        std::shared_ptr<StorageVaultAccessor> accessor;
4270
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
4271
1
                [&] {
4272
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
4273
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4274
1
                    if (!s3_conf) {
4275
1
                        return -1;
4276
1
                    }
4277
4278
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
4279
1
                    std::shared_ptr<S3Accessor> s3_accessor;
4280
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
4281
1
                    if (ret != 0) {
4282
1
                        return -1;
4283
1
                    }
4284
4285
1
                    accessor = std::move(s3_accessor);
4286
1
                    return 0;
4287
1
                }(),
4288
1
                "recycle_stage:get_accessor", &accessor);
4289
4290
1
        if (ret != 0) {
4291
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
4292
0
            return ret;
4293
0
        }
4294
4295
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
4296
1
                .tag("instance_id", instance_id_)
4297
1
                .tag("stage_id", recycle_stage.stage().stage_id())
4298
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
4299
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
4300
1
                .tag("obj_info_id", idx)
4301
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
4302
1
        ret = accessor->delete_all();
4303
1
        if (ret != 0) {
4304
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
4305
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
4306
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
4307
0
                         << ", ret=" << ret;
4308
0
            return -1;
4309
0
        }
4310
1
        metrics_context.total_recycled_num = ++num_recycled;
4311
1
        metrics_context.report();
4312
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
4313
1
        stage_keys.push_back(k);
4314
1
        return 0;
4315
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4255
1
                         this](std::string_view k, std::string_view v) -> int {
4256
1
        ++num_scanned;
4257
1
        RecycleStagePB recycle_stage;
4258
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
4259
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
4260
0
            return -1;
4261
0
        }
4262
4263
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
4264
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4265
0
            LOG(WARNING) << "invalid idx: " << idx;
4266
0
            return -1;
4267
0
        }
4268
4269
1
        std::shared_ptr<StorageVaultAccessor> accessor;
4270
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
4271
1
                [&] {
4272
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
4273
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4274
1
                    if (!s3_conf) {
4275
1
                        return -1;
4276
1
                    }
4277
4278
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
4279
1
                    std::shared_ptr<S3Accessor> s3_accessor;
4280
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
4281
1
                    if (ret != 0) {
4282
1
                        return -1;
4283
1
                    }
4284
4285
1
                    accessor = std::move(s3_accessor);
4286
1
                    return 0;
4287
1
                }(),
4288
1
                "recycle_stage:get_accessor", &accessor);
4289
4290
1
        if (ret != 0) {
4291
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
4292
0
            return ret;
4293
0
        }
4294
4295
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
4296
1
                .tag("instance_id", instance_id_)
4297
1
                .tag("stage_id", recycle_stage.stage().stage_id())
4298
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
4299
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
4300
1
                .tag("obj_info_id", idx)
4301
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
4302
1
        ret = accessor->delete_all();
4303
1
        if (ret != 0) {
4304
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
4305
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
4306
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
4307
0
                         << ", ret=" << ret;
4308
0
            return -1;
4309
0
        }
4310
1
        metrics_context.total_recycled_num = ++num_recycled;
4311
1
        metrics_context.report();
4312
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
4313
1
        stage_keys.push_back(k);
4314
1
        return 0;
4315
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
4316
4317
11
    auto loop_done = [&stage_keys, this]() -> int {
4318
1
        if (stage_keys.empty()) return 0;
4319
1
        DORIS_CLOUD_DEFER {
4320
1
            stage_keys.clear();
4321
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4319
1
        DORIS_CLOUD_DEFER {
4320
1
            stage_keys.clear();
4321
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
4322
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
4323
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
4324
0
            return -1;
4325
0
        }
4326
1
        return 0;
4327
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
4317
1
    auto loop_done = [&stage_keys, this]() -> int {
4318
1
        if (stage_keys.empty()) return 0;
4319
1
        DORIS_CLOUD_DEFER {
4320
1
            stage_keys.clear();
4321
1
        };
4322
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
4323
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
4324
0
            return -1;
4325
0
        }
4326
1
        return 0;
4327
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
4328
11
    if (config::enable_recycler_stats_metrics) {
4329
0
        scan_and_statistics_stage();
4330
0
    }
4331
    // recycle_func and loop_done for scan and recycle
4332
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
4333
11
}
4334
4335
10
int InstanceRecycler::recycle_expired_stage_objects() {
4336
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
4337
4338
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4339
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
4340
4341
10
    DORIS_CLOUD_DEFER {
4342
10
        int64_t cost =
4343
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4344
10
        metrics_context.finish_report();
4345
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
4346
10
                .tag("instance_id", instance_id_);
4347
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
4341
10
    DORIS_CLOUD_DEFER {
4342
10
        int64_t cost =
4343
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4344
10
        metrics_context.finish_report();
4345
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
4346
10
                .tag("instance_id", instance_id_);
4347
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
4348
4349
10
    int ret = 0;
4350
4351
10
    if (config::enable_recycler_stats_metrics) {
4352
0
        scan_and_statistics_expired_stage_objects();
4353
0
    }
4354
4355
10
    for (const auto& stage : instance_info_.stages()) {
4356
0
        std::stringstream ss;
4357
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
4358
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
4359
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
4360
0
           << ", prefix=" << stage.obj_info().prefix();
4361
4362
0
        if (stopped()) {
4363
0
            break;
4364
0
        }
4365
0
        if (stage.type() == StagePB::EXTERNAL) {
4366
0
            continue;
4367
0
        }
4368
0
        int idx = stoi(stage.obj_info().id());
4369
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4370
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
4371
0
            continue;
4372
0
        }
4373
4374
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
4375
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4376
0
        if (!s3_conf) {
4377
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
4378
0
            continue;
4379
0
        }
4380
4381
0
        s3_conf->prefix = stage.obj_info().prefix();
4382
0
        std::shared_ptr<S3Accessor> accessor;
4383
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
4384
0
        if (ret1 != 0) {
4385
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
4386
0
            ret = -1;
4387
0
            continue;
4388
0
        }
4389
4390
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
4391
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
4392
0
            ret = -1;
4393
0
            continue;
4394
0
        }
4395
4396
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
4397
0
        int64_t expiration_time =
4398
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
4399
0
                config::internal_stage_objects_expire_time_second;
4400
0
        if (config::force_immediate_recycle) {
4401
0
            expiration_time = INT64_MAX;
4402
0
        }
4403
0
        ret1 = accessor->delete_all(expiration_time);
4404
0
        if (ret1 != 0) {
4405
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
4406
0
                         << ss.str();
4407
0
            ret = -1;
4408
0
            continue;
4409
0
        }
4410
0
        metrics_context.total_recycled_num++;
4411
0
        metrics_context.report();
4412
0
    }
4413
10
    return ret;
4414
10
}
4415
4416
134
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
4417
134
    std::lock_guard lock(recycle_tasks_mutex);
4418
134
    running_recycle_tasks[task_name] = start_time;
4419
134
}
4420
4421
134
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
4422
134
    std::lock_guard lock(recycle_tasks_mutex);
4423
134
    DCHECK(running_recycle_tasks[task_name] > 0);
4424
134
    running_recycle_tasks.erase(task_name);
4425
134
}
4426
4427
21
bool InstanceRecycler::check_recycle_tasks() {
4428
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
4429
21
    {
4430
21
        std::lock_guard lock(recycle_tasks_mutex);
4431
21
        tmp_running_recycle_tasks = running_recycle_tasks;
4432
21
    }
4433
4434
21
    bool found = false;
4435
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4436
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
4437
20
        int64_t cost = now - start_time;
4438
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
4439
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
4440
20
                    .tag("instance_id", instance_id_)
4441
20
                    .tag("task", task_name);
4442
20
            found = true;
4443
20
        }
4444
20
    }
4445
4446
21
    return found;
4447
21
}
4448
4449
// Scan and statistics indexes that need to be recycled
4450
0
int InstanceRecycler::scan_and_statistics_indexes() {
4451
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
4452
4453
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
4454
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
4455
0
    std::string index_key0;
4456
0
    std::string index_key1;
4457
0
    recycle_index_key(index_key_info0, &index_key0);
4458
0
    recycle_index_key(index_key_info1, &index_key1);
4459
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4460
4461
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
4462
0
        RecycleIndexPB index_pb;
4463
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
4464
0
            return 0;
4465
0
        }
4466
0
        int64_t current_time = ::time(nullptr);
4467
0
        if (current_time <
4468
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
4469
0
            return 0;
4470
0
        }
4471
        // decode index_id
4472
0
        auto k1 = k;
4473
0
        k1.remove_prefix(1);
4474
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4475
0
        decode_key(&k1, &out);
4476
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
4477
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
4478
0
        std::unique_ptr<Transaction> txn;
4479
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4480
0
        if (err != TxnErrorCode::TXN_OK) {
4481
0
            return 0;
4482
0
        }
4483
0
        std::string val;
4484
0
        err = txn->get(k, &val);
4485
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
4486
0
            return 0;
4487
0
        }
4488
0
        if (err != TxnErrorCode::TXN_OK) {
4489
0
            return 0;
4490
0
        }
4491
0
        index_pb.Clear();
4492
0
        if (!index_pb.ParseFromString(val)) {
4493
0
            return 0;
4494
0
        }
4495
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
4496
0
            return 0;
4497
0
        }
4498
0
        metrics_context.total_need_recycle_num++;
4499
0
        return 0;
4500
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4501
4502
0
    return scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv),
4503
0
                            [&metrics_context]() -> int {
4504
0
                                metrics_context.report(true);
4505
0
                                segment_metrics_context_.report(true);
4506
0
                                tablet_metrics_context_.report(true);
4507
0
                                return 0;
4508
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_1clEv
4509
0
}
4510
4511
// Scan and statistics partitions that need to be recycled
4512
0
int InstanceRecycler::scan_and_statistics_partitions() {
4513
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
4514
4515
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
4516
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
4517
0
    std::string part_key0;
4518
0
    std::string part_key1;
4519
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4520
4521
0
    recycle_partition_key(part_key_info0, &part_key0);
4522
0
    recycle_partition_key(part_key_info1, &part_key1);
4523
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
4524
0
        RecyclePartitionPB part_pb;
4525
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
4526
0
            return 0;
4527
0
        }
4528
0
        int64_t current_time = ::time(nullptr);
4529
0
        if (current_time <
4530
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
4531
0
            return 0;
4532
0
        }
4533
        // decode partition_id
4534
0
        auto k1 = k;
4535
0
        k1.remove_prefix(1);
4536
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4537
0
        decode_key(&k1, &out);
4538
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
4539
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
4540
        // Change state to RECYCLING
4541
0
        std::unique_ptr<Transaction> txn;
4542
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4543
0
        if (err != TxnErrorCode::TXN_OK) {
4544
0
            return 0;
4545
0
        }
4546
0
        std::string val;
4547
0
        err = txn->get(k, &val);
4548
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
4549
0
            return 0;
4550
0
        }
4551
0
        if (err != TxnErrorCode::TXN_OK) {
4552
0
            return 0;
4553
0
        }
4554
0
        part_pb.Clear();
4555
0
        if (!part_pb.ParseFromString(val)) {
4556
0
            return 0;
4557
0
        }
4558
        // Partitions with PREPARED state MUST have no data
4559
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
4560
0
        int ret = 0;
4561
0
        for (int64_t index_id : part_pb.index_id()) {
4562
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
4563
0
                                            partition_id, is_empty_tablet) != 0) {
4564
0
                ret = 0;
4565
0
            }
4566
0
        }
4567
0
        metrics_context.total_need_recycle_num++;
4568
0
        return ret;
4569
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4570
0
    return scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv),
4571
0
                            [&metrics_context]() -> int {
4572
0
                                metrics_context.report(true);
4573
0
                                segment_metrics_context_.report(true);
4574
0
                                tablet_metrics_context_.report(true);
4575
0
                                return 0;
4576
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_1clEv
4577
0
}
4578
4579
// Scan and statistics rowsets that need to be recycled
4580
0
int InstanceRecycler::scan_and_statistics_rowsets() {
4581
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
4582
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4583
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4584
0
    std::string recyc_rs_key0;
4585
0
    std::string recyc_rs_key1;
4586
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4587
0
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4588
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4589
4590
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
4591
0
        RecycleRowsetPB rowset;
4592
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4593
0
            return 0;
4594
0
        }
4595
0
        int64_t current_time = ::time(nullptr);
4596
0
        if (current_time <
4597
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
4598
0
            return 0;
4599
0
        }
4600
0
        if (!rowset.has_type()) {
4601
0
            if (!rowset.has_resource_id()) [[unlikely]] {
4602
0
                return 0;
4603
0
            }
4604
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4605
0
                return 0;
4606
0
            }
4607
0
            return 0;
4608
0
        }
4609
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
4610
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
4611
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
4612
0
                return 0;
4613
0
            }
4614
0
        }
4615
0
        metrics_context.total_need_recycle_num++;
4616
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
4617
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
4618
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
4619
0
        return 0;
4620
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4621
0
    return scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4622
0
                            [&metrics_context]() -> int {
4623
0
                                metrics_context.report(true);
4624
0
                                segment_metrics_context_.report(true);
4625
0
                                return 0;
4626
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_1clEv
4627
0
}
4628
4629
// Scan and statistics tmp_rowsets that need to be recycled
4630
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
4631
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
4632
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
4633
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
4634
0
    std::string tmp_rs_key0;
4635
0
    std::string tmp_rs_key1;
4636
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
4637
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
4638
4639
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4640
4641
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
4642
0
        doris::RowsetMetaCloudPB rowset;
4643
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4644
0
            return 0;
4645
0
        }
4646
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4647
0
        int64_t current_time = ::time(nullptr);
4648
0
        if (current_time < expiration) {
4649
0
            return 0;
4650
0
        }
4651
4652
0
        DCHECK_GT(rowset.txn_id(), 0)
4653
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
4654
0
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
4655
0
            return 0;
4656
0
        }
4657
4658
0
        if (!rowset.has_resource_id()) {
4659
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
4660
0
                return 0;
4661
0
            }
4662
0
            return 0;
4663
0
        }
4664
4665
0
        metrics_context.total_need_recycle_num++;
4666
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
4667
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
4668
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
4669
0
        return 0;
4670
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4671
0
    return scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv),
4672
0
                            [&metrics_context]() -> int {
4673
0
                                metrics_context.report(true);
4674
0
                                segment_metrics_context_.report(true);
4675
0
                                return 0;
4676
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_1clEv
4677
0
}
4678
4679
// Scan and statistics abort_timeout_txn that need to be recycled
4680
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
4681
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
4682
4683
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
4684
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
4685
0
    std::string begin_txn_running_key;
4686
0
    std::string end_txn_running_key;
4687
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
4688
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
4689
4690
0
    int64_t current_time =
4691
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4692
4693
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
4694
0
                                               std::string_view k, std::string_view v) -> int {
4695
0
        std::unique_ptr<Transaction> txn;
4696
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4697
0
        if (err != TxnErrorCode::TXN_OK) {
4698
0
            return 0;
4699
0
        }
4700
0
        std::string_view k1 = k;
4701
0
        k1.remove_prefix(1);
4702
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4703
0
        if (decode_key(&k1, &out) != 0) {
4704
0
            return 0;
4705
0
        }
4706
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4707
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4708
        // Update txn_info
4709
0
        std::string txn_inf_key, txn_inf_val;
4710
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
4711
0
        err = txn->get(txn_inf_key, &txn_inf_val);
4712
0
        if (err != TxnErrorCode::TXN_OK) {
4713
0
            return 0;
4714
0
        }
4715
0
        TxnInfoPB txn_info;
4716
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
4717
0
            return 0;
4718
0
        }
4719
4720
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
4721
0
            TxnRunningPB txn_running_pb;
4722
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
4723
0
                return 0;
4724
0
            }
4725
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
4726
0
                return 0;
4727
0
            }
4728
0
            metrics_context.total_need_recycle_num++;
4729
0
        }
4730
0
        return 0;
4731
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4732
0
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
4733
0
                            std::move(handle_abort_timeout_txn_kv), [&metrics_context]() -> int {
4734
0
                                metrics_context.report(true);
4735
0
                                return 0;
4736
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_1clEv
4737
0
}
4738
4739
// Scan and statistics expired_txn_label that need to be recycled
4740
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
4741
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
4742
4743
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
4744
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
4745
0
    std::string begin_recycle_txn_key;
4746
0
    std::string end_recycle_txn_key;
4747
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
4748
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
4749
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4750
0
    int64_t current_time_ms =
4751
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4752
4753
    // for calculate the total num or bytes of recyled objects
4754
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
4755
0
        RecycleTxnPB recycle_txn_pb;
4756
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
4757
0
            return 0;
4758
0
        }
4759
0
        if ((config::force_immediate_recycle) ||
4760
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
4761
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
4762
0
             current_time_ms)) {
4763
0
            metrics_context.total_need_recycle_num++;
4764
0
        }
4765
0
        return 0;
4766
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4767
0
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
4768
0
                            std::move(handle_expired_txn_label_kv), [&metrics_context]() -> int {
4769
0
                                metrics_context.report(true);
4770
0
                                return 0;
4771
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_1clEv
4772
0
}
4773
4774
// Scan and statistics copy_jobs that need to be recycled
4775
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
4776
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
4777
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
4778
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
4779
0
    std::string key0;
4780
0
    std::string key1;
4781
0
    copy_job_key(key_info0, &key0);
4782
0
    copy_job_key(key_info1, &key1);
4783
4784
    // for calculate the total num or bytes of recyled objects
4785
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
4786
0
        CopyJobPB copy_job;
4787
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
4788
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
4789
0
            return 0;
4790
0
        }
4791
4792
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4793
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
4794
0
                int64_t current_time =
4795
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4796
0
                if (copy_job.finish_time_ms() > 0) {
4797
0
                    if (!config::force_immediate_recycle &&
4798
0
                        current_time < copy_job.finish_time_ms() +
4799
0
                                               config::copy_job_max_retention_second * 1000) {
4800
0
                        return 0;
4801
0
                    }
4802
0
                } else {
4803
0
                    if (!config::force_immediate_recycle &&
4804
0
                        current_time < copy_job.start_time_ms() +
4805
0
                                               config::copy_job_max_retention_second * 1000) {
4806
0
                        return 0;
4807
0
                    }
4808
0
                }
4809
0
            }
4810
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4811
0
            int64_t current_time =
4812
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4813
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4814
0
                return 0;
4815
0
            }
4816
0
        }
4817
0
        metrics_context.total_need_recycle_num++;
4818
0
        return 0;
4819
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4820
4821
0
    return scan_and_recycle(key0, key1, std::move(scan_and_statistics),
4822
0
                            [&metrics_context]() -> int {
4823
0
                                metrics_context.report(true);
4824
0
                                return 0;
4825
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_1clEv
4826
0
}
4827
4828
// Scan and statistics stage that need to be recycled
4829
0
int InstanceRecycler::scan_and_statistics_stage() {
4830
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
4831
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
4832
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
4833
0
    std::string key0 = recycle_stage_key(key_info0);
4834
0
    std::string key1 = recycle_stage_key(key_info1);
4835
4836
    // for calculate the total num or bytes of recyled objects
4837
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
4838
0
                                                        std::string_view v) -> int {
4839
0
        RecycleStagePB recycle_stage;
4840
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
4841
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
4842
0
            return 0;
4843
0
        }
4844
4845
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
4846
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4847
0
            LOG(WARNING) << "invalid idx: " << idx;
4848
0
            return 0;
4849
0
        }
4850
4851
0
        std::shared_ptr<StorageVaultAccessor> accessor;
4852
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
4853
0
                [&] {
4854
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
4855
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4856
0
                    if (!s3_conf) {
4857
0
                        return 0;
4858
0
                    }
4859
4860
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
4861
0
                    std::shared_ptr<S3Accessor> s3_accessor;
4862
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
4863
0
                    if (ret != 0) {
4864
0
                        return 0;
4865
0
                    }
4866
4867
0
                    accessor = std::move(s3_accessor);
4868
0
                    return 0;
4869
0
                }(),
4870
0
                "recycle_stage:get_accessor", &accessor);
4871
4872
0
        if (ret != 0) {
4873
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
4874
0
            return 0;
4875
0
        }
4876
4877
0
        metrics_context.total_need_recycle_num++;
4878
0
        return 0;
4879
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4880
4881
0
    return scan_and_recycle(key0, key1, std::move(scan_and_statistics),
4882
0
                            [&metrics_context]() -> int {
4883
0
                                metrics_context.report(true);
4884
0
                                return 0;
4885
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_1clEv
4886
0
}
4887
4888
// Scan and statistics expired_stage_objects that need to be recycled
4889
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
4890
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
4891
4892
    // for calculate the total num or bytes of recyled objects
4893
0
    auto scan_and_statistics = [&metrics_context, this]() {
4894
0
        for (const auto& stage : instance_info_.stages()) {
4895
0
            if (stopped()) {
4896
0
                break;
4897
0
            }
4898
0
            if (stage.type() == StagePB::EXTERNAL) {
4899
0
                continue;
4900
0
            }
4901
0
            int idx = stoi(stage.obj_info().id());
4902
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
4903
0
                continue;
4904
0
            }
4905
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
4906
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4907
0
            if (!s3_conf) {
4908
0
                continue;
4909
0
            }
4910
0
            s3_conf->prefix = stage.obj_info().prefix();
4911
0
            std::shared_ptr<S3Accessor> accessor;
4912
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
4913
0
            if (ret1 != 0) {
4914
0
                continue;
4915
0
            }
4916
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
4917
0
                continue;
4918
0
            }
4919
0
            metrics_context.total_need_recycle_num++;
4920
0
        }
4921
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
4922
4923
0
    scan_and_statistics();
4924
0
    metrics_context.report(true);
4925
0
    return 0;
4926
0
}
4927
4928
// Scan and statistics versions that need to be recycled
4929
0
int InstanceRecycler::scan_and_statistics_versions() {
4930
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
4931
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
4932
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
4933
4934
0
    int64_t last_scanned_table_id = 0;
4935
0
    bool is_recycled = false; // Is last scanned kv recycled
4936
    // for calculate the total num or bytes of recyled objects
4937
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
4938
0
                                       std::string_view k, std::string_view) {
4939
0
        auto k1 = k;
4940
0
        k1.remove_prefix(1);
4941
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
4942
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4943
0
        decode_key(&k1, &out);
4944
0
        DCHECK_EQ(out.size(), 6) << k;
4945
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
4946
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
4947
0
            metrics_context.total_need_recycle_num +=
4948
0
                    is_recycled; // Version kv of this table has been recycled
4949
0
            return 0;
4950
0
        }
4951
0
        last_scanned_table_id = table_id;
4952
0
        is_recycled = false;
4953
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
4954
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
4955
0
        std::unique_ptr<Transaction> txn;
4956
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4957
0
        if (err != TxnErrorCode::TXN_OK) {
4958
0
            return 0;
4959
0
        }
4960
0
        std::unique_ptr<RangeGetIterator> iter;
4961
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
4962
0
        if (err != TxnErrorCode::TXN_OK) {
4963
0
            return 0;
4964
0
        }
4965
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
4966
0
            return 0;
4967
0
        }
4968
0
        metrics_context.total_need_recycle_num++;
4969
0
        is_recycled = true;
4970
0
        return 0;
4971
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4972
4973
0
    return scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics),
4974
0
                            [&metrics_context]() -> int {
4975
0
                                metrics_context.report(true);
4976
0
                                return 0;
4977
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_1clEv
4978
0
}
4979
4980
// Scan and statistics restore jobs that need to be recycled
4981
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
4982
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
4983
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
4984
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
4985
0
    std::string restore_job_key0;
4986
0
    std::string restore_job_key1;
4987
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
4988
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
4989
4990
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4991
4992
    // for calculate the total num or bytes of recyled objects
4993
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
4994
0
        RestoreJobCloudPB restore_job_pb;
4995
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4996
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4997
0
            return 0;
4998
0
        }
4999
0
        int64_t expiration =
5000
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5001
0
        int64_t current_time = ::time(nullptr);
5002
0
        if (current_time < expiration) { // not expired
5003
0
            return 0;
5004
0
        }
5005
0
        metrics_context.total_need_recycle_num++;
5006
0
        return 0;
5007
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5008
5009
0
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics),
5010
0
                            [&metrics_context]() -> int {
5011
0
                                metrics_context.report(true);
5012
0
                                return 0;
5013
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_1clEv
5014
0
}
5015
5016
} // namespace doris::cloud