Coverage Report

Created: 2025-07-23 17:50

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <deque>
34
#include <initializer_list>
35
#include <numeric>
36
#include <string>
37
#include <string_view>
38
#include <utility>
39
40
#include "common/defer.h"
41
#include "common/stopwatch.h"
42
#include "meta-service/meta_service.h"
43
#include "meta-service/meta_service_helper.h"
44
#include "meta-service/meta_service_schema.h"
45
#include "meta-store/blob_message.h"
46
#include "meta-store/txn_kv.h"
47
#include "meta-store/txn_kv_error.h"
48
#include "meta-store/versioned_value.h"
49
#include "recycler/checker.h"
50
#include "recycler/hdfs_accessor.h"
51
#include "recycler/s3_accessor.h"
52
#include "recycler/storage_vault_accessor.h"
53
#ifdef UNIT_TEST
54
#include "../test/mock_accessor.h"
55
#endif
56
#include "common/bvars.h"
57
#include "common/config.h"
58
#include "common/encryption_util.h"
59
#include "common/logging.h"
60
#include "common/simple_thread_pool.h"
61
#include "common/util.h"
62
#include "cpp/sync_point.h"
63
#include "meta-store/keys.h"
64
#include "recycler/recycler_service.h"
65
#include "recycler/sync_executor.h"
66
#include "recycler/util.h"
67
68
namespace doris::cloud {
69
70
using namespace std::chrono;
71
72
RecyclerMetricsContext tablet_metrics_context_("global_recycler", "recycle_tablet");
73
RecyclerMetricsContext segment_metrics_context_("global_recycler", "recycle_segment");
74
75
// return 0 for success get a key, 1 for key not found, negative for error
76
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
77
0
    std::unique_ptr<Transaction> txn;
78
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
79
0
    if (err != TxnErrorCode::TXN_OK) {
80
0
        return -1;
81
0
    }
82
0
    switch (txn->get(key, &val, true)) {
83
0
    case TxnErrorCode::TXN_OK:
84
0
        return 0;
85
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
86
0
        return 1;
87
0
    default:
88
0
        return -1;
89
0
    };
90
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
91
92
// 0 for success, negative for error
93
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
94
209
                   std::unique_ptr<RangeGetIterator>& it) {
95
209
    std::unique_ptr<Transaction> txn;
96
209
    TxnErrorCode err = txn_kv->create_txn(&txn);
97
209
    if (err != TxnErrorCode::TXN_OK) {
98
0
        return -1;
99
0
    }
100
209
    switch (txn->get(begin, end, &it, true)) {
101
209
    case TxnErrorCode::TXN_OK:
102
209
        return 0;
103
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
104
0
        return 1;
105
0
    default:
106
0
        return -1;
107
209
    };
108
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
94
190
                   std::unique_ptr<RangeGetIterator>& it) {
95
190
    std::unique_ptr<Transaction> txn;
96
190
    TxnErrorCode err = txn_kv->create_txn(&txn);
97
190
    if (err != TxnErrorCode::TXN_OK) {
98
0
        return -1;
99
0
    }
100
190
    switch (txn->get(begin, end, &it, true)) {
101
190
    case TxnErrorCode::TXN_OK:
102
190
        return 0;
103
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
104
0
        return 1;
105
0
    default:
106
0
        return -1;
107
190
    };
108
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
94
19
                   std::unique_ptr<RangeGetIterator>& it) {
95
19
    std::unique_ptr<Transaction> txn;
96
19
    TxnErrorCode err = txn_kv->create_txn(&txn);
97
19
    if (err != TxnErrorCode::TXN_OK) {
98
0
        return -1;
99
0
    }
100
19
    switch (txn->get(begin, end, &it, true)) {
101
19
    case TxnErrorCode::TXN_OK:
102
19
        return 0;
103
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
104
0
        return 1;
105
0
    default:
106
0
        return -1;
107
19
    };
108
0
}
109
110
// return 0 for success otherwise error
111
10
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
112
10
    std::unique_ptr<Transaction> txn;
113
10
    TxnErrorCode err = txn_kv->create_txn(&txn);
114
10
    if (err != TxnErrorCode::TXN_OK) {
115
0
        return -1;
116
0
    }
117
3.04k
    for (auto k : keys) {
118
3.04k
        txn->remove(k);
119
3.04k
    }
120
10
    switch (txn->commit()) {
121
10
    case TxnErrorCode::TXN_OK:
122
10
        return 0;
123
0
    case TxnErrorCode::TXN_CONFLICT:
124
0
        return -1;
125
0
    default:
126
0
        return -1;
127
10
    }
128
10
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
111
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
112
6
    std::unique_ptr<Transaction> txn;
113
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
114
6
    if (err != TxnErrorCode::TXN_OK) {
115
0
        return -1;
116
0
    }
117
3.02k
    for (auto k : keys) {
118
3.02k
        txn->remove(k);
119
3.02k
    }
120
6
    switch (txn->commit()) {
121
6
    case TxnErrorCode::TXN_OK:
122
6
        return 0;
123
0
    case TxnErrorCode::TXN_CONFLICT:
124
0
        return -1;
125
0
    default:
126
0
        return -1;
127
6
    }
128
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
111
4
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
112
4
    std::unique_ptr<Transaction> txn;
113
4
    TxnErrorCode err = txn_kv->create_txn(&txn);
114
4
    if (err != TxnErrorCode::TXN_OK) {
115
0
        return -1;
116
0
    }
117
21
    for (auto k : keys) {
118
21
        txn->remove(k);
119
21
    }
120
4
    switch (txn->commit()) {
121
4
    case TxnErrorCode::TXN_OK:
122
4
        return 0;
123
0
    case TxnErrorCode::TXN_CONFLICT:
124
0
        return -1;
125
0
    default:
126
0
        return -1;
127
4
    }
128
4
}
129
130
// return 0 for success otherwise error
131
30
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
132
30
    std::unique_ptr<Transaction> txn;
133
30
    TxnErrorCode err = txn_kv->create_txn(&txn);
134
30
    if (err != TxnErrorCode::TXN_OK) {
135
0
        return -1;
136
0
    }
137
4.00k
    for (auto& k : keys) {
138
4.00k
        txn->remove(k);
139
4.00k
    }
140
30
    switch (txn->commit()) {
141
30
    case TxnErrorCode::TXN_OK:
142
30
        return 0;
143
0
    case TxnErrorCode::TXN_CONFLICT:
144
0
        return -1;
145
0
    default:
146
0
        return -1;
147
30
    }
148
30
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
131
30
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
132
30
    std::unique_ptr<Transaction> txn;
133
30
    TxnErrorCode err = txn_kv->create_txn(&txn);
134
30
    if (err != TxnErrorCode::TXN_OK) {
135
0
        return -1;
136
0
    }
137
4.00k
    for (auto& k : keys) {
138
4.00k
        txn->remove(k);
139
4.00k
    }
140
30
    switch (txn->commit()) {
141
30
    case TxnErrorCode::TXN_OK:
142
30
        return 0;
143
0
    case TxnErrorCode::TXN_CONFLICT:
144
0
        return -1;
145
0
    default:
146
0
        return -1;
147
30
    }
148
30
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
149
150
// return 0 for success otherwise error
151
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
152
0
                                       std::string_view end) {
153
0
    std::unique_ptr<Transaction> txn;
154
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
155
0
    if (err != TxnErrorCode::TXN_OK) {
156
0
        return -1;
157
0
    }
158
0
    txn->remove(begin, end);
159
0
    switch (txn->commit()) {
160
0
    case TxnErrorCode::TXN_OK:
161
0
        return 0;
162
0
    case TxnErrorCode::TXN_CONFLICT:
163
0
        return -1;
164
0
    default:
165
0
        return -1;
166
0
    }
167
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
168
169
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
170
                                      int64_t num_scanned, int64_t num_recycled,
171
29
                                      int64_t start_time) {
172
29
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
173
0
        int64_t cost =
174
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
175
0
        if (cost > config::recycle_task_threshold_seconds) {
176
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
177
0
                    .tag("instance_id", instance_id)
178
0
                    .tag("task", task_name)
179
0
                    .tag("num_scanned", num_scanned)
180
0
                    .tag("num_recycled", num_recycled);
181
0
        }
182
0
    }
183
29
    return;
184
29
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
171
27
                                      int64_t start_time) {
172
27
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
173
0
        int64_t cost =
174
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
175
0
        if (cost > config::recycle_task_threshold_seconds) {
176
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
177
0
                    .tag("instance_id", instance_id)
178
0
                    .tag("task", task_name)
179
0
                    .tag("num_scanned", num_scanned)
180
0
                    .tag("num_recycled", num_recycled);
181
0
        }
182
0
    }
183
27
    return;
184
27
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
171
2
                                      int64_t start_time) {
172
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
173
0
        int64_t cost =
174
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
175
0
        if (cost > config::recycle_task_threshold_seconds) {
176
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
177
0
                    .tag("instance_id", instance_id)
178
0
                    .tag("task", task_name)
179
0
                    .tag("num_scanned", num_scanned)
180
0
                    .tag("num_recycled", num_recycled);
181
0
        }
182
0
    }
183
2
    return;
184
2
}
185
186
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
187
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
188
189
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
190
4
                                                               "s3_producer_pool");
191
4
    s3_producer_pool->start();
192
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
193
4
                                                                  "recycle_tablet_pool");
194
4
    recycle_tablet_pool->start();
195
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
196
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
197
4
    group_recycle_function_pool->start();
198
4
    _thread_pool_group =
199
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
200
4
                                    std::move(group_recycle_function_pool));
201
202
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_);
203
4
}
204
205
4
Recycler::~Recycler() {
206
4
    if (!stopped()) {
207
0
        stop();
208
0
    }
209
4
}
210
211
4
void Recycler::instance_scanner_callback() {
212
    // sleep 60 seconds before scheduling for the launch procedure to complete:
213
    // some bad hdfs connection may cause some log to stdout stderr
214
    // which may pollute .out file and affect the script to check success
215
4
    std::this_thread::sleep_for(
216
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
217
7
    while (!stopped()) {
218
3
        std::vector<InstanceInfoPB> instances;
219
3
        get_all_instances(txn_kv_.get(), instances);
220
        // TODO(plat1ko): delete job recycle kv of non-existent instances
221
3
        LOG(INFO) << "Recycler get instances: " << [&instances] {
222
3
            std::stringstream ss;
223
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
224
3
            return ss.str();
225
3
        }();
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
221
3
        LOG(INFO) << "Recycler get instances: " << [&instances] {
222
3
            std::stringstream ss;
223
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
224
3
            return ss.str();
225
3
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
226
3
        if (!instances.empty()) {
227
            // enqueue instances
228
3
            std::lock_guard lock(mtx_);
229
30
            for (auto& instance : instances) {
230
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
231
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
232
                // skip instance already in pending queue
233
30
                if (success) {
234
30
                    pending_instance_queue_.push_back(std::move(instance));
235
30
                }
236
30
            }
237
3
            pending_instance_cond_.notify_all();
238
3
        }
239
3
        {
240
3
            std::unique_lock lock(mtx_);
241
3
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
242
6
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
242
6
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
243
3
        }
244
3
    }
245
4
}
246
247
8
void Recycler::recycle_callback() {
248
38
    while (!stopped()) {
249
36
        InstanceInfoPB instance;
250
36
        {
251
36
            std::unique_lock lock(mtx_);
252
36
            pending_instance_cond_.wait(
253
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
253
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
254
36
            if (stopped()) {
255
6
                return;
256
6
            }
257
30
            instance = std::move(pending_instance_queue_.front());
258
30
            pending_instance_queue_.pop_front();
259
30
            pending_instance_set_.erase(instance.instance_id());
260
30
        }
261
0
        auto& instance_id = instance.instance_id();
262
30
        {
263
30
            std::lock_guard lock(mtx_);
264
            // skip instance in recycling
265
30
            if (recycling_instance_map_.count(instance_id)) continue;
266
30
        }
267
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
268
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
269
270
30
        if (int r = instance_recycler->init(); r != 0) {
271
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
272
0
                         << " ret=" << r;
273
0
            continue;
274
0
        }
275
30
        std::string recycle_job_key;
276
30
        job_recycle_key({instance_id}, &recycle_job_key);
277
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
278
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
279
30
        if (ret != 0) { // Prepare failed
280
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
281
20
                         << " ret=" << ret;
282
20
            continue;
283
20
        } else {
284
10
            std::lock_guard lock(mtx_);
285
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
286
10
        }
287
10
        if (stopped()) return;
288
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
289
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
290
10
        g_bvar_recycler_instance_recycle_task_concurrency << 1;
291
10
        g_bvar_recycler_instance_running_counter << 1;
292
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
293
10
        tablet_metrics_context_.reset();
294
10
        segment_metrics_context_.reset();
295
10
        ret = instance_recycler->do_recycle();
296
10
        tablet_metrics_context_.finish_report();
297
10
        segment_metrics_context_.finish_report();
298
10
        g_bvar_recycler_instance_recycle_task_concurrency << -1;
299
10
        g_bvar_recycler_instance_running_counter << -1;
300
        // If instance recycler has been aborted, don't finish this job
301
10
        if (!instance_recycler->stopped()) {
302
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
303
10
                                        ret == 0, ctime_ms);
304
10
        }
305
10
        {
306
10
            std::lock_guard lock(mtx_);
307
10
            recycling_instance_map_.erase(instance_id);
308
10
        }
309
310
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
311
10
        auto elpased_ms = now - ctime_ms;
312
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
313
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
314
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
315
10
                                             now + config::recycle_interval_seconds * 1000);
316
10
        LOG(INFO) << "recycle instance done, "
317
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
318
10
                  << " now: " << now;
319
320
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
321
322
10
        LOG_WARNING("finish recycle instance")
323
10
                .tag("instance_id", instance_id)
324
10
                .tag("cost_ms", elpased_ms);
325
10
    }
326
8
}
327
328
4
void Recycler::lease_recycle_jobs() {
329
54
    while (!stopped()) {
330
50
        std::vector<std::string> instances;
331
50
        instances.reserve(recycling_instance_map_.size());
332
50
        {
333
50
            std::lock_guard lock(mtx_);
334
50
            for (auto& [id, _] : recycling_instance_map_) {
335
30
                instances.push_back(id);
336
30
            }
337
50
        }
338
50
        for (auto& i : instances) {
339
30
            std::string recycle_job_key;
340
30
            job_recycle_key({i}, &recycle_job_key);
341
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
342
30
            if (ret == 1) {
343
0
                std::lock_guard lock(mtx_);
344
0
                if (auto it = recycling_instance_map_.find(i);
345
0
                    it != recycling_instance_map_.end()) {
346
0
                    it->second->stop();
347
0
                }
348
0
            }
349
30
        }
350
50
        {
351
50
            std::unique_lock lock(mtx_);
352
50
            notifier_.wait_for(lock,
353
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
354
100
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
354
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
355
50
        }
356
50
    }
357
4
}
358
359
4
void Recycler::check_recycle_tasks() {
360
7
    while (!stopped()) {
361
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
362
3
        {
363
3
            std::lock_guard lock(mtx_);
364
3
            recycling_instance_map = recycling_instance_map_;
365
3
        }
366
3
        for (auto& entry : recycling_instance_map) {
367
0
            entry.second->check_recycle_tasks();
368
0
        }
369
370
3
        std::unique_lock lock(mtx_);
371
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
372
6
                           [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
372
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
373
3
    }
374
4
}
375
376
4
int Recycler::start(brpc::Server* server) {
377
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
378
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
379
4
    S3Environment::getInstance();
380
381
4
    if (config::enable_checker) {
382
0
        checker_ = std::make_unique<Checker>(txn_kv_);
383
0
        int ret = checker_->start();
384
0
        std::string msg;
385
0
        if (ret != 0) {
386
0
            msg = "failed to start checker";
387
0
            LOG(ERROR) << msg;
388
0
            std::cerr << msg << std::endl;
389
0
            return ret;
390
0
        }
391
0
        msg = "checker started";
392
0
        LOG(INFO) << msg;
393
0
        std::cout << msg << std::endl;
394
0
    }
395
396
4
    if (server) {
397
        // Add service
398
1
        auto recycler_service =
399
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
400
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
401
1
    }
402
403
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
403
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
404
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
405
8
        workers_.emplace_back([this] { recycle_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
405
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
406
8
    }
407
408
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
409
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
410
4
    return 0;
411
4
}
412
413
4
void Recycler::stop() {
414
4
    stopped_ = true;
415
4
    notifier_.notify_all();
416
4
    pending_instance_cond_.notify_all();
417
4
    {
418
4
        std::lock_guard lock(mtx_);
419
4
        for (auto& [_, recycler] : recycling_instance_map_) {
420
0
            recycler->stop();
421
0
        }
422
4
    }
423
20
    for (auto& w : workers_) {
424
20
        if (w.joinable()) w.join();
425
20
    }
426
4
    if (checker_) {
427
0
        checker_->stop();
428
0
    }
429
4
}
430
431
class InstanceRecycler::InvertedIndexIdCache {
432
public:
433
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
434
76
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
435
436
    // Return 0 if success, 1 if schema kv not found, negative for error
437
3.55k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
438
3.55k
        {
439
3.55k
            std::lock_guard lock(mtx_);
440
3.55k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
441
644
                return 0;
442
644
            }
443
2.90k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
444
2.90k
                it != inverted_index_id_map_.end()) {
445
2.37k
                res = it->second;
446
2.37k
                return 0;
447
2.37k
            }
448
2.90k
        }
449
        // Get schema from kv
450
        // TODO(plat1ko): Single flight
451
532
        std::unique_ptr<Transaction> txn;
452
532
        TxnErrorCode err = txn_kv_->create_txn(&txn);
453
532
        if (err != TxnErrorCode::TXN_OK) {
454
0
            LOG(WARNING) << "failed to create txn, err=" << err;
455
0
            return -1;
456
0
        }
457
532
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
458
532
        ValueBuf val_buf;
459
532
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
460
532
        if (err != TxnErrorCode::TXN_OK) {
461
500
            LOG(WARNING) << "failed to get schema, err=" << err;
462
500
            return static_cast<int>(err);
463
500
        }
464
32
        doris::TabletSchemaCloudPB schema;
465
32
        if (!parse_schema_value(val_buf, &schema)) {
466
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
467
0
            return -1;
468
0
        }
469
32
        if (schema.index_size() > 0) {
470
26
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
471
26
            if (schema.has_inverted_index_storage_format()) {
472
23
                index_format = schema.inverted_index_storage_format();
473
23
            }
474
26
            res.first = index_format;
475
26
            res.second.reserve(schema.index_size());
476
62
            for (auto& i : schema.index()) {
477
62
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
478
62
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
479
62
                }
480
62
            }
481
26
        }
482
32
        insert(index_id, schema_version, res);
483
32
        return 0;
484
32
    }
485
486
    // Empty `ids` means this schema has no inverted index
487
32
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
488
32
        if (index_info.second.empty()) {
489
6
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
490
6
            std::lock_guard lock(mtx_);
491
6
            schemas_without_inverted_index_.emplace(index_id, schema_version);
492
26
        } else {
493
26
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
494
26
            std::lock_guard lock(mtx_);
495
26
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
496
26
        }
497
32
    }
498
499
private:
500
    std::string instance_id_;
501
    std::shared_ptr<TxnKv> txn_kv_;
502
503
    std::mutex mtx_;
504
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
505
    struct HashOfKey {
506
6.49k
        size_t operator()(const Key& key) const {
507
6.49k
            size_t seed = 0;
508
6.49k
            seed = std::hash<int64_t> {}(key.first);
509
6.49k
            seed = std::hash<int32_t> {}(key.second);
510
6.49k
            return seed;
511
6.49k
        }
512
    };
513
    // <index_id, schema_version> -> inverted_index_ids
514
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
515
    // Store <index_id, schema_version> of schema which doesn't have inverted index
516
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
517
};
518
519
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
520
                                   RecyclerThreadPoolGroup thread_pool_group,
521
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
522
        : txn_kv_(std::move(txn_kv)),
523
          instance_id_(instance.instance_id()),
524
          instance_info_(instance),
525
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
526
          _thread_pool_group(std::move(thread_pool_group)),
527
76
          txn_lazy_committer_(std::move(txn_lazy_committer)) {};
528
529
76
InstanceRecycler::~InstanceRecycler() = default;
530
531
76
int InstanceRecycler::init_obj_store_accessors() {
532
76
    for (const auto& obj_info : instance_info_.obj_info()) {
533
55
#ifdef UNIT_TEST
534
55
        auto accessor = std::make_shared<MockAccessor>();
535
#else
536
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
537
        if (!s3_conf) {
538
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
539
            return -1;
540
        }
541
542
        std::shared_ptr<S3Accessor> accessor;
543
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
544
        if (ret != 0) {
545
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
546
                         << " resource_id=" << obj_info.id();
547
            return ret;
548
        }
549
#endif
550
55
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
551
55
    }
552
553
76
    return 0;
554
76
}
555
556
76
int InstanceRecycler::init_storage_vault_accessors() {
557
76
    if (instance_info_.resource_ids().empty()) {
558
69
        return 0;
559
69
    }
560
561
7
    FullRangeGetOptions opts(txn_kv_);
562
7
    opts.prefetch = true;
563
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
564
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
565
566
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
567
18
        auto [k, v] = *kv;
568
18
        StorageVaultPB vault;
569
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
570
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
571
0
            return -1;
572
0
        }
573
18
        std::string recycler_storage_vault_white_list = accumulate(
574
18
                config::recycler_storage_vault_white_list.begin(),
575
18
                config::recycler_storage_vault_white_list.end(), std::string(),
576
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
576
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
577
18
        LOG_INFO("config::recycler_storage_vault_white_list")
578
18
                .tag("", recycler_storage_vault_white_list);
579
18
        if (!config::recycler_storage_vault_white_list.empty()) {
580
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
581
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
582
8
                it == config::recycler_storage_vault_white_list.end()) {
583
2
                LOG_WARNING(
584
2
                        "failed to init accessor for vault because this vault is not in "
585
2
                        "config::recycler_storage_vault_white_list. ")
586
2
                        .tag(" vault name:", vault.name())
587
2
                        .tag(" config::recycler_storage_vault_white_list:",
588
2
                             recycler_storage_vault_white_list);
589
2
                continue;
590
2
            }
591
8
        }
592
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
593
16
                                 &accessor_map_, &vault);
594
16
        if (vault.has_hdfs_info()) {
595
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
596
9
            int ret = accessor->init();
597
9
            if (ret != 0) {
598
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
599
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
600
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
601
4
                continue;
602
4
            }
603
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
604
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
605
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
606
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
607
7
        } else if (vault.has_obj_info()) {
608
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
609
7
            if (!s3_conf) {
610
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
611
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
612
1
                continue;
613
1
            }
614
615
6
            std::shared_ptr<S3Accessor> accessor;
616
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
617
6
            if (ret != 0) {
618
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
619
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
620
0
                             << " ret=" << ret
621
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
622
0
                continue;
623
0
            }
624
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
625
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
626
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
627
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
628
6
        }
629
16
    }
630
631
7
    if (!it->is_valid()) {
632
0
        LOG_WARNING("failed to get storage vault kv");
633
0
        return -1;
634
0
    }
635
636
7
    if (accessor_map_.empty()) {
637
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
638
1
        return -2;
639
1
    }
640
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
641
6
             instance_id_);
642
643
6
    return 0;
644
7
}
645
646
76
int InstanceRecycler::init() {
647
76
    int ret = init_obj_store_accessors();
648
76
    if (ret != 0) {
649
0
        return ret;
650
0
    }
651
652
76
    return init_storage_vault_accessors();
653
76
}
654
655
template <typename... Func>
656
90
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
90
    return [funcs...]() {
658
90
        return [](std::initializer_list<int> ret_vals) {
659
90
            int i = 0;
660
110
            for (int ret : ret_vals) {
661
110
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
110
            }
665
90
            return i;
666
90
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
20
            for (int ret : ret_vals) {
661
20
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
20
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
20
            for (int ret : ret_vals) {
661
20
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
20
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
0
                    i = ret;
663
0
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
667
90
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
668
90
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
656
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
657
10
    return [funcs...]() {
658
10
        return [](std::initializer_list<int> ret_vals) {
659
10
            int i = 0;
660
10
            for (int ret : ret_vals) {
661
10
                if (ret != 0) {
662
10
                    i = ret;
663
10
                }
664
10
            }
665
10
            return i;
666
10
        }({funcs()...});
667
10
    };
668
10
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
669
670
10
int InstanceRecycler::do_recycle() {
671
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
672
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
673
0
        return recycle_deleted_instance();
674
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
675
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
676
10
                                        fmt::format("instance id {}", instance_id_),
677
90
                                        [](int r) { return r != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
Line
Count
Source
677
90
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
678
10
        sync_executor
679
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
680
                                   // becase they may both recycle the same set of tablets
681
                        // recycle dropped table or idexes(mv, rollup)
682
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
Line
Count
Source
682
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
683
                        // recycle dropped partitions
684
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
684
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
685
10
                .add(task_wrapper(
686
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
686
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
687
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
687
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
688
10
                .add(task_wrapper(
689
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
689
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
690
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
690
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
691
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
691
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
692
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
692
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
693
10
                .add(task_wrapper(
694
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
694
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
695
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
695
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
696
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
696
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
697
698
10
        bool finished = true;
699
10
        std::vector<int> rets = sync_executor.when_all(&finished);
700
90
        for (int ret : rets) {
701
90
            if (ret != 0) {
702
0
                return ret;
703
0
            }
704
90
        }
705
10
        return finished ? 0 : -1;
706
10
    } else {
707
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
708
0
                     << " instance_id=" << instance_id_;
709
0
        return -1;
710
0
    }
711
10
}
712
713
/**
714
 * 1. delete all remote data
715
 * 2. delete all kv
716
 * 3. remove instance kv
717
 */
718
1
int InstanceRecycler::recycle_deleted_instance() {
719
1
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
720
721
1
    int ret = 0;
722
1
    auto start_time = steady_clock::now();
723
724
1
    DORIS_CLOUD_DEFER {
725
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
726
1
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
727
1
                     << " recycle deleted instance, cost=" << cost
728
1
                     << "s, instance_id=" << instance_id_;
729
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
724
1
    DORIS_CLOUD_DEFER {
725
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
726
1
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
727
1
                     << " recycle deleted instance, cost=" << cost
728
1
                     << "s, instance_id=" << instance_id_;
729
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
730
731
    // delete all remote data
732
2
    for (auto& [_, accessor] : accessor_map_) {
733
2
        if (stopped()) {
734
0
            return ret;
735
0
        }
736
737
2
        LOG(INFO) << "begin to delete all objects in " << accessor->uri();
738
2
        int del_ret = accessor->delete_all();
739
2
        if (del_ret == 0) {
740
2
            LOG(INFO) << "successfully delete all objects in " << accessor->uri();
741
2
        } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
742
            // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
743
            // so the recycling has been successful.
744
0
            ret = -1;
745
0
        }
746
2
    }
747
748
1
    if (ret != 0) {
749
0
        LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
750
0
        return ret;
751
0
    }
752
753
    // delete all kv
754
1
    std::unique_ptr<Transaction> txn;
755
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
756
1
    if (err != TxnErrorCode::TXN_OK) {
757
0
        LOG(WARNING) << "failed to create txn";
758
0
        ret = -1;
759
0
        return -1;
760
0
    }
761
1
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
762
    // delete kv before deleting objects to prevent the checker from misjudging data loss
763
1
    std::string start_txn_key = txn_key_prefix(instance_id_);
764
1
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
765
1
    txn->remove(start_txn_key, end_txn_key);
766
1
    std::string start_version_key = version_key_prefix(instance_id_);
767
1
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
768
1
    txn->remove(start_version_key, end_version_key);
769
1
    std::string start_meta_key = meta_key_prefix(instance_id_);
770
1
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
771
1
    txn->remove(start_meta_key, end_meta_key);
772
1
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
773
1
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
774
1
    txn->remove(start_recycle_key, end_recycle_key);
775
1
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
776
1
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
777
1
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
778
1
    std::string start_copy_key = copy_key_prefix(instance_id_);
779
1
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
780
1
    txn->remove(start_copy_key, end_copy_key);
781
    // should not remove job key range, because we need to reserve job recycle kv
782
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
783
1
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
784
1
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
785
1
    txn->remove(start_job_tablet_key, end_job_tablet_key);
786
1
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
787
1
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
788
1
    std::string start_vault_key = storage_vault_key(key_info0);
789
1
    std::string end_vault_key = storage_vault_key(key_info1);
790
1
    txn->remove(start_vault_key, end_vault_key);
791
1
    err = txn->commit();
792
1
    if (err != TxnErrorCode::TXN_OK) {
793
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
794
0
        ret = -1;
795
0
    }
796
797
1
    if (ret == 0) {
798
        // remove instance kv
799
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
800
1
        err = txn_kv_->create_txn(&txn);
801
1
        if (err != TxnErrorCode::TXN_OK) {
802
0
            LOG(WARNING) << "failed to create txn";
803
0
            ret = -1;
804
0
            return ret;
805
0
        }
806
1
        std::string key;
807
1
        instance_key({instance_id_}, &key);
808
1
        txn->remove(key);
809
1
        err = txn->commit();
810
1
        if (err != TxnErrorCode::TXN_OK) {
811
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
812
0
                         << " err=" << err;
813
0
            ret = -1;
814
0
        }
815
1
    }
816
1
    return ret;
817
1
}
818
819
bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id,
820
3.05k
                     int64_t txn_id) {
821
3.05k
    std::unique_ptr<Transaction> txn;
822
3.05k
    TxnErrorCode err = txn_kv->create_txn(&txn);
823
3.05k
    if (err != TxnErrorCode::TXN_OK) {
824
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
825
0
        return false;
826
0
    }
827
828
3.05k
    std::string index_val;
829
3.05k
    const std::string index_key = txn_index_key({instance_id, txn_id});
830
3.05k
    err = txn->get(index_key, &index_val);
831
3.05k
    if (err != TxnErrorCode::TXN_OK) {
832
3.03k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
833
3.03k
            TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled");
834
            // txn has been recycled;
835
3.03k
            LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id
836
3.03k
                      << " instance_id=" << instance_id;
837
3.03k
            return true;
838
3.03k
        }
839
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
840
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
841
0
                     << " err=" << err;
842
0
        return false;
843
3.03k
    }
844
845
20
    TxnIndexPB index_pb;
846
20
    if (!index_pb.ParseFromString(index_val)) {
847
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
848
0
                     << " instance_id=" << instance_id;
849
0
        return false;
850
0
    }
851
852
20
    DCHECK(index_pb.has_tablet_index() == true);
853
20
    if (!index_pb.tablet_index().has_db_id()) {
854
        // In the previous version, the db_id was not set in the index_pb.
855
        // If updating to the version which enable txn lazy commit, the db_id will be set.
856
0
        LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id
857
0
                  << " index=" << index_pb.ShortDebugString();
858
0
        return true;
859
0
    }
860
861
20
    int64_t db_id = index_pb.tablet_index().db_id();
862
20
    DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id
863
0
                        << " instance_id=" << instance_id;
864
865
20
    std::string info_val;
866
20
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
867
20
    err = txn->get(info_key, &info_val);
868
20
    if (err != TxnErrorCode::TXN_OK) {
869
0
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
870
            // txn info has been recycled;
871
0
            LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id
872
0
                      << " instance_id=" << instance_id;
873
0
            return true;
874
0
        }
875
876
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
877
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
878
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
879
0
                     << " err=" << err;
880
0
        return false;
881
0
    }
882
883
20
    TxnInfoPB txn_info;
884
20
    if (!txn_info.ParseFromString(info_val)) {
885
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
886
0
                     << " instance_id=" << instance_id;
887
0
        return false;
888
0
    }
889
890
20
    DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id
891
0
                                        << " txn_info=" << txn_info.ShortDebugString();
892
893
20
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() ||
894
20
        TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) {
895
10
        TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info);
896
10
        return true;
897
10
    }
898
899
10
    TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info);
900
10
    return false;
901
20
}
902
903
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
904
4.00k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
905
4.00k
    if (config::force_immediate_recycle) {
906
0
        return 0L;
907
0
    }
908
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
909
4.00k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
910
4.00k
    int64_t retention_seconds = config::retention_seconds;
911
4.00k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
912
3.10k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
913
3.10k
    }
914
4.00k
    int64_t final_expiration = expiration + retention_seconds;
915
4.00k
    if (*earlest_ts > final_expiration) {
916
2
        *earlest_ts = final_expiration;
917
2
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
918
2
    }
919
4.00k
    return final_expiration;
920
4.00k
}
921
922
int64_t calculate_partition_expired_time(
923
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
924
8
        int64_t* earlest_ts /* partition earliest expiration ts */) {
925
8
    if (config::force_immediate_recycle) {
926
2
        return 0L;
927
2
    }
928
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
929
6
                                                            : partition_meta_pb.creation_time();
930
6
    int64_t retention_seconds = config::retention_seconds;
931
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
932
6
        retention_seconds =
933
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
934
6
    }
935
6
    int64_t final_expiration = expiration + retention_seconds;
936
6
    if (*earlest_ts > final_expiration) {
937
2
        *earlest_ts = final_expiration;
938
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
939
2
    }
940
6
    return final_expiration;
941
8
}
942
943
int64_t calculate_index_expired_time(const std::string& instance_id_,
944
                                     const RecycleIndexPB& index_meta_pb,
945
8
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
946
8
    if (config::force_immediate_recycle) {
947
2
        return 0L;
948
2
    }
949
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
950
6
                                                        : index_meta_pb.creation_time();
951
6
    int64_t retention_seconds = config::retention_seconds;
952
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
953
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
954
6
    }
955
6
    int64_t final_expiration = expiration + retention_seconds;
956
6
    if (*earlest_ts > final_expiration) {
957
2
        *earlest_ts = final_expiration;
958
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
959
2
    }
960
6
    return final_expiration;
961
8
}
962
963
int64_t calculate_tmp_rowset_expired_time(
964
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
965
3.05k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
966
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
967
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
968
    //  duration or timeout always < `retention_time` in practice.
969
3.05k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
970
3.05k
                                 ? tmp_rowset_meta_pb.txn_expiration()
971
3.05k
                                 : tmp_rowset_meta_pb.creation_time();
972
3.05k
    expiration = config::force_immediate_recycle ? 0 : expiration;
973
3.05k
    int64_t final_expiration = expiration + config::retention_seconds;
974
3.05k
    if (*earlest_ts > final_expiration) {
975
6
        *earlest_ts = final_expiration;
976
6
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
977
6
    }
978
3.05k
    return final_expiration;
979
3.05k
}
980
981
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
982
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
983
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
984
30.0k
    if (*earlest_ts > final_expiration / 1000) {
985
6
        *earlest_ts = final_expiration / 1000;
986
6
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
987
6
    }
988
30.0k
    return final_expiration;
989
30.0k
}
990
991
14
int InstanceRecycler::recycle_indexes() {
992
14
    const std::string task_name = "recycle_indexes";
993
14
    int64_t num_scanned = 0;
994
14
    int64_t num_expired = 0;
995
14
    int64_t num_recycled = 0;
996
14
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
997
998
14
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
999
14
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
1000
14
    std::string index_key0;
1001
14
    std::string index_key1;
1002
14
    recycle_index_key(index_key_info0, &index_key0);
1003
14
    recycle_index_key(index_key_info1, &index_key1);
1004
1005
14
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
1006
1007
14
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1008
14
    register_recycle_task(task_name, start_time);
1009
1010
14
    DORIS_CLOUD_DEFER {
1011
14
        unregister_recycle_task(task_name);
1012
14
        int64_t cost =
1013
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1014
14
        metrics_context.finish_report();
1015
14
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1016
14
                .tag("instance_id", instance_id_)
1017
14
                .tag("num_scanned", num_scanned)
1018
14
                .tag("num_expired", num_expired)
1019
14
                .tag("num_recycled", num_recycled);
1020
14
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1010
12
    DORIS_CLOUD_DEFER {
1011
12
        unregister_recycle_task(task_name);
1012
12
        int64_t cost =
1013
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1014
12
        metrics_context.finish_report();
1015
12
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1016
12
                .tag("instance_id", instance_id_)
1017
12
                .tag("num_scanned", num_scanned)
1018
12
                .tag("num_expired", num_expired)
1019
12
                .tag("num_recycled", num_recycled);
1020
12
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1010
2
    DORIS_CLOUD_DEFER {
1011
2
        unregister_recycle_task(task_name);
1012
2
        int64_t cost =
1013
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1014
2
        metrics_context.finish_report();
1015
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1016
2
                .tag("instance_id", instance_id_)
1017
2
                .tag("num_scanned", num_scanned)
1018
2
                .tag("num_expired", num_expired)
1019
2
                .tag("num_recycled", num_recycled);
1020
2
    };
1021
1022
14
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1023
1024
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
1025
14
    std::vector<std::string_view> index_keys;
1026
14
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1027
8
        ++num_scanned;
1028
8
        RecycleIndexPB index_pb;
1029
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1030
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1031
0
            return -1;
1032
0
        }
1033
8
        int64_t current_time = ::time(nullptr);
1034
8
        if (current_time <
1035
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1036
0
            return 0;
1037
0
        }
1038
8
        ++num_expired;
1039
        // decode index_id
1040
8
        auto k1 = k;
1041
8
        k1.remove_prefix(1);
1042
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1043
8
        decode_key(&k1, &out);
1044
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1045
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1046
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1047
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1048
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1049
        // Change state to RECYCLING
1050
8
        std::unique_ptr<Transaction> txn;
1051
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1052
8
        if (err != TxnErrorCode::TXN_OK) {
1053
0
            LOG_WARNING("failed to create txn").tag("err", err);
1054
0
            return -1;
1055
0
        }
1056
8
        std::string val;
1057
8
        err = txn->get(k, &val);
1058
8
        if (err ==
1059
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1060
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1061
0
            return 0;
1062
0
        }
1063
8
        if (err != TxnErrorCode::TXN_OK) {
1064
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1065
0
            return -1;
1066
0
        }
1067
8
        index_pb.Clear();
1068
8
        if (!index_pb.ParseFromString(val)) {
1069
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1070
0
            return -1;
1071
0
        }
1072
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1073
7
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1074
7
            txn->put(k, index_pb.SerializeAsString());
1075
7
            err = txn->commit();
1076
7
            if (err != TxnErrorCode::TXN_OK) {
1077
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1078
0
                return -1;
1079
0
            }
1080
7
        }
1081
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1082
1
            LOG_WARNING("failed to recycle tablets under index")
1083
1
                    .tag("table_id", index_pb.table_id())
1084
1
                    .tag("instance_id", instance_id_)
1085
1
                    .tag("index_id", index_id);
1086
1
            return -1;
1087
1
        }
1088
7
        metrics_context.total_recycled_num = ++num_recycled;
1089
7
        metrics_context.report();
1090
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1091
7
        index_keys.push_back(k);
1092
7
        return 0;
1093
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1026
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1027
6
        ++num_scanned;
1028
6
        RecycleIndexPB index_pb;
1029
6
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1030
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1031
0
            return -1;
1032
0
        }
1033
6
        int64_t current_time = ::time(nullptr);
1034
6
        if (current_time <
1035
6
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1036
0
            return 0;
1037
0
        }
1038
6
        ++num_expired;
1039
        // decode index_id
1040
6
        auto k1 = k;
1041
6
        k1.remove_prefix(1);
1042
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1043
6
        decode_key(&k1, &out);
1044
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1045
6
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1046
6
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1047
6
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1048
6
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1049
        // Change state to RECYCLING
1050
6
        std::unique_ptr<Transaction> txn;
1051
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1052
6
        if (err != TxnErrorCode::TXN_OK) {
1053
0
            LOG_WARNING("failed to create txn").tag("err", err);
1054
0
            return -1;
1055
0
        }
1056
6
        std::string val;
1057
6
        err = txn->get(k, &val);
1058
6
        if (err ==
1059
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1060
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1061
0
            return 0;
1062
0
        }
1063
6
        if (err != TxnErrorCode::TXN_OK) {
1064
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1065
0
            return -1;
1066
0
        }
1067
6
        index_pb.Clear();
1068
6
        if (!index_pb.ParseFromString(val)) {
1069
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1070
0
            return -1;
1071
0
        }
1072
6
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1073
6
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1074
6
            txn->put(k, index_pb.SerializeAsString());
1075
6
            err = txn->commit();
1076
6
            if (err != TxnErrorCode::TXN_OK) {
1077
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1078
0
                return -1;
1079
0
            }
1080
6
        }
1081
6
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1082
0
            LOG_WARNING("failed to recycle tablets under index")
1083
0
                    .tag("table_id", index_pb.table_id())
1084
0
                    .tag("instance_id", instance_id_)
1085
0
                    .tag("index_id", index_id);
1086
0
            return -1;
1087
0
        }
1088
6
        metrics_context.total_recycled_num = ++num_recycled;
1089
6
        metrics_context.report();
1090
6
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1091
6
        index_keys.push_back(k);
1092
6
        return 0;
1093
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1026
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1027
2
        ++num_scanned;
1028
2
        RecycleIndexPB index_pb;
1029
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1030
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1031
0
            return -1;
1032
0
        }
1033
2
        int64_t current_time = ::time(nullptr);
1034
2
        if (current_time <
1035
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1036
0
            return 0;
1037
0
        }
1038
2
        ++num_expired;
1039
        // decode index_id
1040
2
        auto k1 = k;
1041
2
        k1.remove_prefix(1);
1042
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1043
2
        decode_key(&k1, &out);
1044
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1045
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1046
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1047
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1048
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1049
        // Change state to RECYCLING
1050
2
        std::unique_ptr<Transaction> txn;
1051
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1052
2
        if (err != TxnErrorCode::TXN_OK) {
1053
0
            LOG_WARNING("failed to create txn").tag("err", err);
1054
0
            return -1;
1055
0
        }
1056
2
        std::string val;
1057
2
        err = txn->get(k, &val);
1058
2
        if (err ==
1059
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1060
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1061
0
            return 0;
1062
0
        }
1063
2
        if (err != TxnErrorCode::TXN_OK) {
1064
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1065
0
            return -1;
1066
0
        }
1067
2
        index_pb.Clear();
1068
2
        if (!index_pb.ParseFromString(val)) {
1069
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1070
0
            return -1;
1071
0
        }
1072
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1073
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1074
1
            txn->put(k, index_pb.SerializeAsString());
1075
1
            err = txn->commit();
1076
1
            if (err != TxnErrorCode::TXN_OK) {
1077
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1078
0
                return -1;
1079
0
            }
1080
1
        }
1081
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1082
1
            LOG_WARNING("failed to recycle tablets under index")
1083
1
                    .tag("table_id", index_pb.table_id())
1084
1
                    .tag("instance_id", instance_id_)
1085
1
                    .tag("index_id", index_id);
1086
1
            return -1;
1087
1
        }
1088
1
        metrics_context.total_recycled_num = ++num_recycled;
1089
1
        metrics_context.report();
1090
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1091
1
        index_keys.push_back(k);
1092
1
        return 0;
1093
2
    };
1094
1095
14
    auto loop_done = [&index_keys, this]() -> int {
1096
4
        if (index_keys.empty()) return 0;
1097
3
        DORIS_CLOUD_DEFER {
1098
3
            index_keys.clear();
1099
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1097
2
        DORIS_CLOUD_DEFER {
1098
2
            index_keys.clear();
1099
2
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1097
1
        DORIS_CLOUD_DEFER {
1098
1
            index_keys.clear();
1099
1
        };
1100
3
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1101
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1102
0
            return -1;
1103
0
        }
1104
3
        return 0;
1105
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1095
2
    auto loop_done = [&index_keys, this]() -> int {
1096
2
        if (index_keys.empty()) return 0;
1097
2
        DORIS_CLOUD_DEFER {
1098
2
            index_keys.clear();
1099
2
        };
1100
2
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1101
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1102
0
            return -1;
1103
0
        }
1104
2
        return 0;
1105
2
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1095
2
    auto loop_done = [&index_keys, this]() -> int {
1096
2
        if (index_keys.empty()) return 0;
1097
1
        DORIS_CLOUD_DEFER {
1098
1
            index_keys.clear();
1099
1
        };
1100
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1101
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1102
0
            return -1;
1103
0
        }
1104
1
        return 0;
1105
1
    };
1106
1107
14
    if (config::enable_recycler_stats_metrics) {
1108
0
        scan_and_statistics_indexes();
1109
0
    }
1110
    // recycle_func and loop_done for scan and recycle
1111
14
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
1112
14
}
1113
1114
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
1115
271
                             int64_t tablet_id) {
1116
271
    std::unique_ptr<Transaction> txn;
1117
271
    TxnErrorCode err = txn_kv->create_txn(&txn);
1118
271
    if (err != TxnErrorCode::TXN_OK) {
1119
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
1120
0
                     << " tablet_id=" << tablet_id << " err=" << err;
1121
0
        return false;
1122
0
    }
1123
1124
271
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
1125
271
    std::string tablet_idx_val;
1126
271
    err = txn->get(tablet_idx_key, &tablet_idx_val);
1127
271
    if (TxnErrorCode::TXN_OK != err) {
1128
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
1129
0
                     << " tablet_id=" << tablet_id << " err=" << err
1130
0
                     << " key=" << hex(tablet_idx_key);
1131
0
        return false;
1132
0
    }
1133
1134
271
    TabletIndexPB tablet_idx_pb;
1135
271
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
1136
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
1137
0
                     << " tablet_id=" << tablet_id;
1138
0
        return false;
1139
0
    }
1140
1141
271
    if (!tablet_idx_pb.has_db_id()) {
1142
        // In the previous version, the db_id was not set in the index_pb.
1143
        // If updating to the version which enable txn lazy commit, the db_id will be set.
1144
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
1145
0
                  << " instance_id=" << instance_id
1146
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
1147
0
        return true;
1148
0
    }
1149
1150
271
    std::string ver_val;
1151
271
    std::string ver_key =
1152
271
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
1153
271
                                   tablet_idx_pb.partition_id()});
1154
271
    err = txn->get(ver_key, &ver_val);
1155
1156
271
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1157
201
        LOG(INFO) << ""
1158
201
                     "partition version not found, instance_id="
1159
201
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
1160
201
                  << " table_id=" << tablet_idx_pb.table_id()
1161
201
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
1162
201
                  << " key=" << hex(ver_key);
1163
201
        return true;
1164
201
    }
1165
1166
70
    if (TxnErrorCode::TXN_OK != err) {
1167
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
1168
0
                     << " db_id=" << tablet_idx_pb.db_id()
1169
0
                     << " table_id=" << tablet_idx_pb.table_id()
1170
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1171
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
1172
0
        return false;
1173
0
    }
1174
1175
70
    VersionPB version_pb;
1176
70
    if (!version_pb.ParseFromString(ver_val)) {
1177
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
1178
0
                     << " db_id=" << tablet_idx_pb.db_id()
1179
0
                     << " table_id=" << tablet_idx_pb.table_id()
1180
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1181
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
1182
0
        return false;
1183
0
    }
1184
1185
70
    if (version_pb.pending_txn_ids_size() > 0) {
1186
20
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
1187
20
        DCHECK(version_pb.pending_txn_ids_size() == 1);
1188
20
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
1189
20
                     << " db_id=" << tablet_idx_pb.db_id()
1190
20
                     << " table_id=" << tablet_idx_pb.table_id()
1191
20
                     << " partition_id=" << tablet_idx_pb.partition_id()
1192
20
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
1193
20
                     << " key=" << hex(ver_key);
1194
20
        return false;
1195
20
    }
1196
50
    return true;
1197
70
}
1198
1199
14
int InstanceRecycler::recycle_partitions() {
1200
14
    const std::string task_name = "recycle_partitions";
1201
14
    int64_t num_scanned = 0;
1202
14
    int64_t num_expired = 0;
1203
14
    int64_t num_recycled = 0;
1204
14
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1205
1206
14
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
1207
14
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
1208
14
    std::string part_key0;
1209
14
    std::string part_key1;
1210
14
    recycle_partition_key(part_key_info0, &part_key0);
1211
14
    recycle_partition_key(part_key_info1, &part_key1);
1212
1213
14
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
1214
1215
14
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1216
14
    register_recycle_task(task_name, start_time);
1217
1218
14
    DORIS_CLOUD_DEFER {
1219
14
        unregister_recycle_task(task_name);
1220
14
        int64_t cost =
1221
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1222
14
        metrics_context.finish_report();
1223
14
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1224
14
                .tag("instance_id", instance_id_)
1225
14
                .tag("num_scanned", num_scanned)
1226
14
                .tag("num_expired", num_expired)
1227
14
                .tag("num_recycled", num_recycled);
1228
14
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1218
12
    DORIS_CLOUD_DEFER {
1219
12
        unregister_recycle_task(task_name);
1220
12
        int64_t cost =
1221
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1222
12
        metrics_context.finish_report();
1223
12
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1224
12
                .tag("instance_id", instance_id_)
1225
12
                .tag("num_scanned", num_scanned)
1226
12
                .tag("num_expired", num_expired)
1227
12
                .tag("num_recycled", num_recycled);
1228
12
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1218
2
    DORIS_CLOUD_DEFER {
1219
2
        unregister_recycle_task(task_name);
1220
2
        int64_t cost =
1221
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1222
2
        metrics_context.finish_report();
1223
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1224
2
                .tag("instance_id", instance_id_)
1225
2
                .tag("num_scanned", num_scanned)
1226
2
                .tag("num_expired", num_expired)
1227
2
                .tag("num_recycled", num_recycled);
1228
2
    };
1229
1230
14
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1231
1232
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
1233
14
    std::vector<std::string_view> partition_keys;
1234
14
    std::vector<std::string> partition_version_keys;
1235
14
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1236
8
        ++num_scanned;
1237
8
        RecyclePartitionPB part_pb;
1238
8
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1239
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1240
0
            return -1;
1241
0
        }
1242
8
        int64_t current_time = ::time(nullptr);
1243
8
        if (current_time <
1244
8
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1245
0
            return 0;
1246
0
        }
1247
8
        ++num_expired;
1248
        // decode partition_id
1249
8
        auto k1 = k;
1250
8
        k1.remove_prefix(1);
1251
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1252
8
        decode_key(&k1, &out);
1253
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1254
8
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1255
8
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1256
8
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1257
8
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1258
        // Change state to RECYCLING
1259
8
        std::unique_ptr<Transaction> txn;
1260
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1261
8
        if (err != TxnErrorCode::TXN_OK) {
1262
0
            LOG_WARNING("failed to create txn").tag("err", err);
1263
0
            return -1;
1264
0
        }
1265
8
        std::string val;
1266
8
        err = txn->get(k, &val);
1267
8
        if (err ==
1268
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1269
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1270
0
            return 0;
1271
0
        }
1272
8
        if (err != TxnErrorCode::TXN_OK) {
1273
0
            LOG_WARNING("failed to get kv");
1274
0
            return -1;
1275
0
        }
1276
8
        part_pb.Clear();
1277
8
        if (!part_pb.ParseFromString(val)) {
1278
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1279
0
            return -1;
1280
0
        }
1281
        // Partitions with PREPARED state MUST have no data
1282
8
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1283
8
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1284
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1285
7
            txn->put(k, part_pb.SerializeAsString());
1286
7
            err = txn->commit();
1287
7
            if (err != TxnErrorCode::TXN_OK) {
1288
0
                LOG_WARNING("failed to commit txn: {}", err);
1289
0
                return -1;
1290
0
            }
1291
7
        }
1292
1293
8
        int ret = 0;
1294
32
        for (int64_t index_id : part_pb.index_id()) {
1295
32
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id,
1296
32
                                is_empty_tablet) != 0) {
1297
1
                LOG_WARNING("failed to recycle tablets under partition")
1298
1
                        .tag("table_id", part_pb.table_id())
1299
1
                        .tag("instance_id", instance_id_)
1300
1
                        .tag("index_id", index_id)
1301
1
                        .tag("partition_id", partition_id);
1302
1
                ret = -1;
1303
1
            }
1304
32
        }
1305
8
        if (ret == 0 && part_pb.has_db_id()) {
1306
            // Recycle the versioned keys
1307
7
            std::unique_ptr<Transaction> txn;
1308
7
            err = txn_kv_->create_txn(&txn);
1309
7
            if (err != TxnErrorCode::TXN_OK) {
1310
0
                LOG_WARNING("failed to create txn").tag("err", err);
1311
0
                return -1;
1312
0
            }
1313
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1314
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1315
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1316
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1317
7
            versioned_remove_all(txn.get(), meta_key);
1318
7
            txn->remove(index_key);
1319
7
            txn->remove(inverted_index_key);
1320
7
            err = txn->commit();
1321
7
            if (err != TxnErrorCode::TXN_OK) {
1322
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1323
0
                return -1;
1324
0
            }
1325
7
        }
1326
1327
8
        if (ret == 0) {
1328
7
            ++num_recycled;
1329
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1330
7
            partition_keys.push_back(k);
1331
7
            if (part_pb.db_id() > 0) {
1332
7
                partition_version_keys.push_back(partition_version_key(
1333
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1334
7
            }
1335
7
            metrics_context.report();
1336
7
        }
1337
8
        return ret;
1338
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1235
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1236
6
        ++num_scanned;
1237
6
        RecyclePartitionPB part_pb;
1238
6
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1239
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1240
0
            return -1;
1241
0
        }
1242
6
        int64_t current_time = ::time(nullptr);
1243
6
        if (current_time <
1244
6
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1245
0
            return 0;
1246
0
        }
1247
6
        ++num_expired;
1248
        // decode partition_id
1249
6
        auto k1 = k;
1250
6
        k1.remove_prefix(1);
1251
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1252
6
        decode_key(&k1, &out);
1253
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1254
6
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1255
6
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1256
6
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1257
6
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1258
        // Change state to RECYCLING
1259
6
        std::unique_ptr<Transaction> txn;
1260
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1261
6
        if (err != TxnErrorCode::TXN_OK) {
1262
0
            LOG_WARNING("failed to create txn").tag("err", err);
1263
0
            return -1;
1264
0
        }
1265
6
        std::string val;
1266
6
        err = txn->get(k, &val);
1267
6
        if (err ==
1268
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1269
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1270
0
            return 0;
1271
0
        }
1272
6
        if (err != TxnErrorCode::TXN_OK) {
1273
0
            LOG_WARNING("failed to get kv");
1274
0
            return -1;
1275
0
        }
1276
6
        part_pb.Clear();
1277
6
        if (!part_pb.ParseFromString(val)) {
1278
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1279
0
            return -1;
1280
0
        }
1281
        // Partitions with PREPARED state MUST have no data
1282
6
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1283
6
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1284
6
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1285
6
            txn->put(k, part_pb.SerializeAsString());
1286
6
            err = txn->commit();
1287
6
            if (err != TxnErrorCode::TXN_OK) {
1288
0
                LOG_WARNING("failed to commit txn: {}", err);
1289
0
                return -1;
1290
0
            }
1291
6
        }
1292
1293
6
        int ret = 0;
1294
30
        for (int64_t index_id : part_pb.index_id()) {
1295
30
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id,
1296
30
                                is_empty_tablet) != 0) {
1297
0
                LOG_WARNING("failed to recycle tablets under partition")
1298
0
                        .tag("table_id", part_pb.table_id())
1299
0
                        .tag("instance_id", instance_id_)
1300
0
                        .tag("index_id", index_id)
1301
0
                        .tag("partition_id", partition_id);
1302
0
                ret = -1;
1303
0
            }
1304
30
        }
1305
6
        if (ret == 0 && part_pb.has_db_id()) {
1306
            // Recycle the versioned keys
1307
6
            std::unique_ptr<Transaction> txn;
1308
6
            err = txn_kv_->create_txn(&txn);
1309
6
            if (err != TxnErrorCode::TXN_OK) {
1310
0
                LOG_WARNING("failed to create txn").tag("err", err);
1311
0
                return -1;
1312
0
            }
1313
6
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1314
6
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1315
6
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1316
6
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1317
6
            versioned_remove_all(txn.get(), meta_key);
1318
6
            txn->remove(index_key);
1319
6
            txn->remove(inverted_index_key);
1320
6
            err = txn->commit();
1321
6
            if (err != TxnErrorCode::TXN_OK) {
1322
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1323
0
                return -1;
1324
0
            }
1325
6
        }
1326
1327
6
        if (ret == 0) {
1328
6
            ++num_recycled;
1329
6
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1330
6
            partition_keys.push_back(k);
1331
6
            if (part_pb.db_id() > 0) {
1332
6
                partition_version_keys.push_back(partition_version_key(
1333
6
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1334
6
            }
1335
6
            metrics_context.report();
1336
6
        }
1337
6
        return ret;
1338
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1235
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1236
2
        ++num_scanned;
1237
2
        RecyclePartitionPB part_pb;
1238
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1239
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1240
0
            return -1;
1241
0
        }
1242
2
        int64_t current_time = ::time(nullptr);
1243
2
        if (current_time <
1244
2
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1245
0
            return 0;
1246
0
        }
1247
2
        ++num_expired;
1248
        // decode partition_id
1249
2
        auto k1 = k;
1250
2
        k1.remove_prefix(1);
1251
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1252
2
        decode_key(&k1, &out);
1253
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1254
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1255
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1256
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1257
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1258
        // Change state to RECYCLING
1259
2
        std::unique_ptr<Transaction> txn;
1260
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1261
2
        if (err != TxnErrorCode::TXN_OK) {
1262
0
            LOG_WARNING("failed to create txn").tag("err", err);
1263
0
            return -1;
1264
0
        }
1265
2
        std::string val;
1266
2
        err = txn->get(k, &val);
1267
2
        if (err ==
1268
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1269
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1270
0
            return 0;
1271
0
        }
1272
2
        if (err != TxnErrorCode::TXN_OK) {
1273
0
            LOG_WARNING("failed to get kv");
1274
0
            return -1;
1275
0
        }
1276
2
        part_pb.Clear();
1277
2
        if (!part_pb.ParseFromString(val)) {
1278
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1279
0
            return -1;
1280
0
        }
1281
        // Partitions with PREPARED state MUST have no data
1282
2
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1283
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1284
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1285
1
            txn->put(k, part_pb.SerializeAsString());
1286
1
            err = txn->commit();
1287
1
            if (err != TxnErrorCode::TXN_OK) {
1288
0
                LOG_WARNING("failed to commit txn: {}", err);
1289
0
                return -1;
1290
0
            }
1291
1
        }
1292
1293
2
        int ret = 0;
1294
2
        for (int64_t index_id : part_pb.index_id()) {
1295
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id,
1296
2
                                is_empty_tablet) != 0) {
1297
1
                LOG_WARNING("failed to recycle tablets under partition")
1298
1
                        .tag("table_id", part_pb.table_id())
1299
1
                        .tag("instance_id", instance_id_)
1300
1
                        .tag("index_id", index_id)
1301
1
                        .tag("partition_id", partition_id);
1302
1
                ret = -1;
1303
1
            }
1304
2
        }
1305
2
        if (ret == 0 && part_pb.has_db_id()) {
1306
            // Recycle the versioned keys
1307
1
            std::unique_ptr<Transaction> txn;
1308
1
            err = txn_kv_->create_txn(&txn);
1309
1
            if (err != TxnErrorCode::TXN_OK) {
1310
0
                LOG_WARNING("failed to create txn").tag("err", err);
1311
0
                return -1;
1312
0
            }
1313
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1314
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1315
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1316
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1317
1
            versioned_remove_all(txn.get(), meta_key);
1318
1
            txn->remove(index_key);
1319
1
            txn->remove(inverted_index_key);
1320
1
            err = txn->commit();
1321
1
            if (err != TxnErrorCode::TXN_OK) {
1322
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1323
0
                return -1;
1324
0
            }
1325
1
        }
1326
1327
2
        if (ret == 0) {
1328
1
            ++num_recycled;
1329
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1330
1
            partition_keys.push_back(k);
1331
1
            if (part_pb.db_id() > 0) {
1332
1
                partition_version_keys.push_back(partition_version_key(
1333
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1334
1
            }
1335
1
            metrics_context.report();
1336
1
        }
1337
2
        return ret;
1338
2
    };
1339
1340
14
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1341
4
        if (partition_keys.empty()) return 0;
1342
3
        DORIS_CLOUD_DEFER {
1343
3
            partition_keys.clear();
1344
3
            partition_version_keys.clear();
1345
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1342
2
        DORIS_CLOUD_DEFER {
1343
2
            partition_keys.clear();
1344
2
            partition_version_keys.clear();
1345
2
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1342
1
        DORIS_CLOUD_DEFER {
1343
1
            partition_keys.clear();
1344
1
            partition_version_keys.clear();
1345
1
        };
1346
3
        std::unique_ptr<Transaction> txn;
1347
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1348
3
        if (err != TxnErrorCode::TXN_OK) {
1349
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1350
0
            return -1;
1351
0
        }
1352
7
        for (auto& k : partition_keys) {
1353
7
            txn->remove(k);
1354
7
        }
1355
7
        for (auto& k : partition_version_keys) {
1356
7
            txn->remove(k);
1357
7
        }
1358
3
        err = txn->commit();
1359
3
        if (err != TxnErrorCode::TXN_OK) {
1360
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1361
0
                         << " err=" << err;
1362
0
            return -1;
1363
0
        }
1364
3
        return 0;
1365
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1340
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1341
2
        if (partition_keys.empty()) return 0;
1342
2
        DORIS_CLOUD_DEFER {
1343
2
            partition_keys.clear();
1344
2
            partition_version_keys.clear();
1345
2
        };
1346
2
        std::unique_ptr<Transaction> txn;
1347
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1348
2
        if (err != TxnErrorCode::TXN_OK) {
1349
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1350
0
            return -1;
1351
0
        }
1352
6
        for (auto& k : partition_keys) {
1353
6
            txn->remove(k);
1354
6
        }
1355
6
        for (auto& k : partition_version_keys) {
1356
6
            txn->remove(k);
1357
6
        }
1358
2
        err = txn->commit();
1359
2
        if (err != TxnErrorCode::TXN_OK) {
1360
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1361
0
                         << " err=" << err;
1362
0
            return -1;
1363
0
        }
1364
2
        return 0;
1365
2
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1340
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1341
2
        if (partition_keys.empty()) return 0;
1342
1
        DORIS_CLOUD_DEFER {
1343
1
            partition_keys.clear();
1344
1
            partition_version_keys.clear();
1345
1
        };
1346
1
        std::unique_ptr<Transaction> txn;
1347
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1348
1
        if (err != TxnErrorCode::TXN_OK) {
1349
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1350
0
            return -1;
1351
0
        }
1352
1
        for (auto& k : partition_keys) {
1353
1
            txn->remove(k);
1354
1
        }
1355
1
        for (auto& k : partition_version_keys) {
1356
1
            txn->remove(k);
1357
1
        }
1358
1
        err = txn->commit();
1359
1
        if (err != TxnErrorCode::TXN_OK) {
1360
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1361
0
                         << " err=" << err;
1362
0
            return -1;
1363
0
        }
1364
1
        return 0;
1365
1
    };
1366
1367
14
    if (config::enable_recycler_stats_metrics) {
1368
0
        scan_and_statistics_partitions();
1369
0
    }
1370
    // recycle_func and loop_done for scan and recycle
1371
14
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
1372
14
}
1373
1374
12
int InstanceRecycler::recycle_versions() {
1375
12
    int64_t num_scanned = 0;
1376
12
    int64_t num_recycled = 0;
1377
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
1378
1379
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
1380
1381
12
    auto start_time = steady_clock::now();
1382
1383
12
    DORIS_CLOUD_DEFER {
1384
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1385
12
        metrics_context.finish_report();
1386
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1387
12
                .tag("instance_id", instance_id_)
1388
12
                .tag("num_scanned", num_scanned)
1389
12
                .tag("num_recycled", num_recycled);
1390
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
1383
12
    DORIS_CLOUD_DEFER {
1384
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1385
12
        metrics_context.finish_report();
1386
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1387
12
                .tag("instance_id", instance_id_)
1388
12
                .tag("num_scanned", num_scanned)
1389
12
                .tag("num_recycled", num_recycled);
1390
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
1391
1392
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
1393
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
1394
12
    int64_t last_scanned_table_id = 0;
1395
12
    bool is_recycled = false; // Is last scanned kv recycled
1396
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
1397
12
                         &metrics_context, this](std::string_view k, std::string_view) {
1398
2
        ++num_scanned;
1399
2
        auto k1 = k;
1400
2
        k1.remove_prefix(1);
1401
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1402
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1403
2
        decode_key(&k1, &out);
1404
2
        DCHECK_EQ(out.size(), 6) << k;
1405
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1406
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1407
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1408
0
            return 0;
1409
0
        }
1410
2
        last_scanned_table_id = table_id;
1411
2
        is_recycled = false;
1412
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1413
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1414
2
        std::unique_ptr<Transaction> txn;
1415
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1416
2
        if (err != TxnErrorCode::TXN_OK) {
1417
0
            return -1;
1418
0
        }
1419
2
        std::unique_ptr<RangeGetIterator> iter;
1420
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1421
2
        if (err != TxnErrorCode::TXN_OK) {
1422
0
            return -1;
1423
0
        }
1424
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1425
1
            return 0;
1426
1
        }
1427
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1428
        // 1. Remove all partition version kvs of this table
1429
1
        auto partition_version_key_begin =
1430
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1431
1
        auto partition_version_key_end =
1432
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1433
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1434
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1435
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1436
1
                     << " table_id=" << table_id;
1437
        // 2. Remove the table version kv of this table
1438
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1439
1
        txn->remove(tbl_version_key);
1440
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1441
        // 3. Remove mow delete bitmap update lock and tablet job lock
1442
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1443
1
        txn->remove(lock_key);
1444
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1445
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1446
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1447
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1448
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1449
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1450
1
                     << " table_id=" << table_id;
1451
1
        err = txn->commit();
1452
1
        if (err != TxnErrorCode::TXN_OK) {
1453
0
            return -1;
1454
0
        }
1455
1
        metrics_context.total_recycled_num = ++num_recycled;
1456
1
        metrics_context.report();
1457
1
        is_recycled = true;
1458
1
        return 0;
1459
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1397
2
                         &metrics_context, this](std::string_view k, std::string_view) {
1398
2
        ++num_scanned;
1399
2
        auto k1 = k;
1400
2
        k1.remove_prefix(1);
1401
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1402
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1403
2
        decode_key(&k1, &out);
1404
2
        DCHECK_EQ(out.size(), 6) << k;
1405
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1406
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1407
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1408
0
            return 0;
1409
0
        }
1410
2
        last_scanned_table_id = table_id;
1411
2
        is_recycled = false;
1412
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1413
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1414
2
        std::unique_ptr<Transaction> txn;
1415
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1416
2
        if (err != TxnErrorCode::TXN_OK) {
1417
0
            return -1;
1418
0
        }
1419
2
        std::unique_ptr<RangeGetIterator> iter;
1420
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1421
2
        if (err != TxnErrorCode::TXN_OK) {
1422
0
            return -1;
1423
0
        }
1424
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1425
1
            return 0;
1426
1
        }
1427
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1428
        // 1. Remove all partition version kvs of this table
1429
1
        auto partition_version_key_begin =
1430
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1431
1
        auto partition_version_key_end =
1432
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1433
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1434
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1435
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1436
1
                     << " table_id=" << table_id;
1437
        // 2. Remove the table version kv of this table
1438
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1439
1
        txn->remove(tbl_version_key);
1440
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1441
        // 3. Remove mow delete bitmap update lock and tablet job lock
1442
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1443
1
        txn->remove(lock_key);
1444
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1445
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1446
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1447
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1448
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1449
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1450
1
                     << " table_id=" << table_id;
1451
1
        err = txn->commit();
1452
1
        if (err != TxnErrorCode::TXN_OK) {
1453
0
            return -1;
1454
0
        }
1455
1
        metrics_context.total_recycled_num = ++num_recycled;
1456
1
        metrics_context.report();
1457
1
        is_recycled = true;
1458
1
        return 0;
1459
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1460
1461
12
    if (config::enable_recycler_stats_metrics) {
1462
0
        scan_and_statistics_versions();
1463
0
    }
1464
    // recycle_func and loop_done for scan and recycle
1465
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
1466
12
}
1467
1468
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
1469
                                      RecyclerMetricsContext& metrics_context, int64_t partition_id,
1470
41
                                      bool is_empty_tablet) {
1471
41
    int64_t num_scanned = 0;
1472
41
    std::atomic_long num_recycled = 0;
1473
1474
41
    std::string tablet_key_begin, tablet_key_end;
1475
41
    std::string stats_key_begin, stats_key_end;
1476
41
    std::string job_key_begin, job_key_end;
1477
1478
41
    std::string tablet_belongs;
1479
41
    if (partition_id > 0) {
1480
        // recycle tablets in a partition belonging to the index
1481
32
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
1482
32
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
1483
32
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
1484
32
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
1485
32
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
1486
32
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
1487
32
        tablet_belongs = "partition";
1488
32
    } else {
1489
        // recycle tablets in the index
1490
9
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
1491
9
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
1492
9
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
1493
9
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
1494
9
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
1495
9
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
1496
9
        tablet_belongs = "index";
1497
9
    }
1498
1499
41
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
1500
41
            .tag("table_id", table_id)
1501
41
            .tag("index_id", index_id)
1502
41
            .tag("partition_id", partition_id);
1503
1504
41
    auto start_time = steady_clock::now();
1505
1506
41
    DORIS_CLOUD_DEFER {
1507
41
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1508
41
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1509
41
                .tag("instance_id", instance_id_)
1510
41
                .tag("table_id", table_id)
1511
41
                .tag("index_id", index_id)
1512
41
                .tag("partition_id", partition_id)
1513
41
                .tag("num_scanned", num_scanned)
1514
41
                .tag("num_recycled", num_recycled);
1515
41
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_0clEv
Line
Count
Source
1506
37
    DORIS_CLOUD_DEFER {
1507
37
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1508
37
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1509
37
                .tag("instance_id", instance_id_)
1510
37
                .tag("table_id", table_id)
1511
37
                .tag("index_id", index_id)
1512
37
                .tag("partition_id", partition_id)
1513
37
                .tag("num_scanned", num_scanned)
1514
37
                .tag("num_recycled", num_recycled);
1515
37
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_0clEv
Line
Count
Source
1506
4
    DORIS_CLOUD_DEFER {
1507
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1508
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1509
4
                .tag("instance_id", instance_id_)
1510
4
                .tag("table_id", table_id)
1511
4
                .tag("index_id", index_id)
1512
4
                .tag("partition_id", partition_id)
1513
4
                .tag("num_scanned", num_scanned)
1514
4
                .tag("num_recycled", num_recycled);
1515
4
    };
1516
1517
    // The first string_view represents the tablet key which has been recycled
1518
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
1519
41
    using TabletKeyPair = std::pair<std::string_view, bool>;
1520
41
    SyncExecutor<TabletKeyPair> sync_executor(
1521
41
            _thread_pool_group.recycle_tablet_pool,
1522
41
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
1523
41
                        index_id, partition_id),
1524
251
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1524
231
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1524
20
            [](const TabletKeyPair& k) { return k.first.empty(); });
1525
1526
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
1527
41
    std::vector<std::string> tablet_idx_keys;
1528
41
    std::vector<std::string> init_rs_keys;
1529
271
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1530
271
        bool use_range_remove = true;
1531
271
        ++num_scanned;
1532
271
        doris::TabletMetaCloudPB tablet_meta_pb;
1533
271
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1534
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1535
0
            use_range_remove = false;
1536
0
            return -1;
1537
0
        }
1538
271
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1539
1540
271
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1541
20
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1542
20
            return -1;
1543
20
        }
1544
1545
251
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1546
251
        if (!is_empty_tablet) {
1547
251
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1548
251
                               &metrics_context, k]() mutable -> TabletKeyPair {
1549
251
                if (recycle_tablet(tid, metrics_context) != 0) {
1550
0
                    LOG_WARNING("failed to recycle tablet")
1551
0
                            .tag("instance_id", instance_id_)
1552
0
                            .tag("tablet_id", tid);
1553
0
                    range_move = false;
1554
0
                    return {std::string_view(), range_move};
1555
0
                }
1556
251
                ++num_recycled;
1557
251
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1558
251
                return {k, range_move};
1559
251
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
1548
231
                               &metrics_context, k]() mutable -> TabletKeyPair {
1549
231
                if (recycle_tablet(tid, metrics_context) != 0) {
1550
0
                    LOG_WARNING("failed to recycle tablet")
1551
0
                            .tag("instance_id", instance_id_)
1552
0
                            .tag("tablet_id", tid);
1553
0
                    range_move = false;
1554
0
                    return {std::string_view(), range_move};
1555
0
                }
1556
231
                ++num_recycled;
1557
231
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1558
231
                return {k, range_move};
1559
231
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
1548
20
                               &metrics_context, k]() mutable -> TabletKeyPair {
1549
20
                if (recycle_tablet(tid, metrics_context) != 0) {
1550
0
                    LOG_WARNING("failed to recycle tablet")
1551
0
                            .tag("instance_id", instance_id_)
1552
0
                            .tag("tablet_id", tid);
1553
0
                    range_move = false;
1554
0
                    return {std::string_view(), range_move};
1555
0
                }
1556
20
                ++num_recycled;
1557
20
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1558
20
                return {k, range_move};
1559
20
            });
1560
251
        } else {
1561
            // Empty tablet only has a [0-1] init rowset
1562
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1563
0
            DCHECK([&]() {
1564
0
                std::unique_ptr<Transaction> txn;
1565
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1566
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1567
0
                    return false;
1568
0
                }
1569
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1570
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1571
0
                std::unique_ptr<RangeGetIterator> iter;
1572
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1573
0
                    err != TxnErrorCode::TXN_OK) {
1574
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1575
0
                    return false;
1576
0
                }
1577
0
                if (iter->has_next()) {
1578
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1579
0
                    return false;
1580
0
                }
1581
0
                return true;
1582
0
            }());
1583
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1584
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1585
0
                return {k, true};
1586
0
            });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE1_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE1_clEv
1587
0
            ++num_recycled;
1588
0
        }
1589
251
        return 0;
1590
271
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1529
231
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1530
231
        bool use_range_remove = true;
1531
231
        ++num_scanned;
1532
231
        doris::TabletMetaCloudPB tablet_meta_pb;
1533
231
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1534
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1535
0
            use_range_remove = false;
1536
0
            return -1;
1537
0
        }
1538
231
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1539
1540
231
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1541
0
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1542
0
            return -1;
1543
0
        }
1544
1545
231
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1546
231
        if (!is_empty_tablet) {
1547
231
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1548
231
                               &metrics_context, k]() mutable -> TabletKeyPair {
1549
231
                if (recycle_tablet(tid, metrics_context) != 0) {
1550
231
                    LOG_WARNING("failed to recycle tablet")
1551
231
                            .tag("instance_id", instance_id_)
1552
231
                            .tag("tablet_id", tid);
1553
231
                    range_move = false;
1554
231
                    return {std::string_view(), range_move};
1555
231
                }
1556
231
                ++num_recycled;
1557
231
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1558
231
                return {k, range_move};
1559
231
            });
1560
231
        } else {
1561
            // Empty tablet only has a [0-1] init rowset
1562
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1563
0
            DCHECK([&]() {
1564
0
                std::unique_ptr<Transaction> txn;
1565
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1566
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1567
0
                    return false;
1568
0
                }
1569
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1570
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1571
0
                std::unique_ptr<RangeGetIterator> iter;
1572
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1573
0
                    err != TxnErrorCode::TXN_OK) {
1574
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1575
0
                    return false;
1576
0
                }
1577
0
                if (iter->has_next()) {
1578
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1579
0
                    return false;
1580
0
                }
1581
0
                return true;
1582
0
            }());
1583
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1584
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1585
0
                return {k, true};
1586
0
            });
1587
0
            ++num_recycled;
1588
0
        }
1589
231
        return 0;
1590
231
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1529
40
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1530
40
        bool use_range_remove = true;
1531
40
        ++num_scanned;
1532
40
        doris::TabletMetaCloudPB tablet_meta_pb;
1533
40
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1534
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1535
0
            use_range_remove = false;
1536
0
            return -1;
1537
0
        }
1538
40
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1539
1540
40
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1541
20
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1542
20
            return -1;
1543
20
        }
1544
1545
20
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1546
20
        if (!is_empty_tablet) {
1547
20
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1548
20
                               &metrics_context, k]() mutable -> TabletKeyPair {
1549
20
                if (recycle_tablet(tid, metrics_context) != 0) {
1550
20
                    LOG_WARNING("failed to recycle tablet")
1551
20
                            .tag("instance_id", instance_id_)
1552
20
                            .tag("tablet_id", tid);
1553
20
                    range_move = false;
1554
20
                    return {std::string_view(), range_move};
1555
20
                }
1556
20
                ++num_recycled;
1557
20
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1558
20
                return {k, range_move};
1559
20
            });
1560
20
        } else {
1561
            // Empty tablet only has a [0-1] init rowset
1562
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1563
0
            DCHECK([&]() {
1564
0
                std::unique_ptr<Transaction> txn;
1565
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1566
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1567
0
                    return false;
1568
0
                }
1569
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1570
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1571
0
                std::unique_ptr<RangeGetIterator> iter;
1572
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1573
0
                    err != TxnErrorCode::TXN_OK) {
1574
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1575
0
                    return false;
1576
0
                }
1577
0
                if (iter->has_next()) {
1578
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1579
0
                    return false;
1580
0
                }
1581
0
                return true;
1582
0
            }());
1583
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1584
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1585
0
                return {k, true};
1586
0
            });
1587
0
            ++num_recycled;
1588
0
        }
1589
20
        return 0;
1590
40
    };
1591
1592
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
1593
41
    auto loop_done = [&, this]() -> int {
1594
41
        bool finished = true;
1595
41
        auto tablet_keys = sync_executor.when_all(&finished);
1596
41
        if (!finished) {
1597
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1598
0
            return -1;
1599
0
        }
1600
41
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1601
        // sort the vector using key's order
1602
39
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1603
980
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
1603
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
1603
36
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1604
39
        bool use_range_remove = true;
1605
251
        for (auto& [_, remove] : tablet_keys) {
1606
251
            if (!remove) {
1607
0
                use_range_remove = remove;
1608
0
                break;
1609
0
            }
1610
251
        }
1611
39
        DORIS_CLOUD_DEFER {
1612
39
            tablet_idx_keys.clear();
1613
39
            init_rs_keys.clear();
1614
39
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1611
37
        DORIS_CLOUD_DEFER {
1612
37
            tablet_idx_keys.clear();
1613
37
            init_rs_keys.clear();
1614
37
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1611
2
        DORIS_CLOUD_DEFER {
1612
2
            tablet_idx_keys.clear();
1613
2
            init_rs_keys.clear();
1614
2
        };
1615
39
        std::unique_ptr<Transaction> txn;
1616
39
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1617
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1618
0
            return -1;
1619
0
        }
1620
39
        std::string tablet_key_end;
1621
39
        if (!tablet_keys.empty()) {
1622
39
            if (use_range_remove) {
1623
39
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1624
39
                txn->remove(tablet_keys.front().first, tablet_key_end);
1625
39
            } else {
1626
0
                for (auto& [k, _] : tablet_keys) {
1627
0
                    txn->remove(k);
1628
0
                }
1629
0
            }
1630
39
        }
1631
251
        for (auto& k : tablet_idx_keys) {
1632
251
            txn->remove(k);
1633
251
        }
1634
39
        for (auto& k : init_rs_keys) {
1635
0
            txn->remove(k);
1636
0
        }
1637
39
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1638
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1639
0
                         << ", err=" << err;
1640
0
            return -1;
1641
0
        }
1642
39
        return 0;
1643
39
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEv
Line
Count
Source
1593
37
    auto loop_done = [&, this]() -> int {
1594
37
        bool finished = true;
1595
37
        auto tablet_keys = sync_executor.when_all(&finished);
1596
37
        if (!finished) {
1597
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1598
0
            return -1;
1599
0
        }
1600
37
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1601
        // sort the vector using key's order
1602
37
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1603
37
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1604
37
        bool use_range_remove = true;
1605
231
        for (auto& [_, remove] : tablet_keys) {
1606
231
            if (!remove) {
1607
0
                use_range_remove = remove;
1608
0
                break;
1609
0
            }
1610
231
        }
1611
37
        DORIS_CLOUD_DEFER {
1612
37
            tablet_idx_keys.clear();
1613
37
            init_rs_keys.clear();
1614
37
        };
1615
37
        std::unique_ptr<Transaction> txn;
1616
37
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1617
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1618
0
            return -1;
1619
0
        }
1620
37
        std::string tablet_key_end;
1621
37
        if (!tablet_keys.empty()) {
1622
37
            if (use_range_remove) {
1623
37
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1624
37
                txn->remove(tablet_keys.front().first, tablet_key_end);
1625
37
            } else {
1626
0
                for (auto& [k, _] : tablet_keys) {
1627
0
                    txn->remove(k);
1628
0
                }
1629
0
            }
1630
37
        }
1631
231
        for (auto& k : tablet_idx_keys) {
1632
231
            txn->remove(k);
1633
231
        }
1634
37
        for (auto& k : init_rs_keys) {
1635
0
            txn->remove(k);
1636
0
        }
1637
37
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1638
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1639
0
                         << ", err=" << err;
1640
0
            return -1;
1641
0
        }
1642
37
        return 0;
1643
37
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEv
Line
Count
Source
1593
4
    auto loop_done = [&, this]() -> int {
1594
4
        bool finished = true;
1595
4
        auto tablet_keys = sync_executor.when_all(&finished);
1596
4
        if (!finished) {
1597
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1598
0
            return -1;
1599
0
        }
1600
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1601
        // sort the vector using key's order
1602
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1603
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1604
2
        bool use_range_remove = true;
1605
20
        for (auto& [_, remove] : tablet_keys) {
1606
20
            if (!remove) {
1607
0
                use_range_remove = remove;
1608
0
                break;
1609
0
            }
1610
20
        }
1611
2
        DORIS_CLOUD_DEFER {
1612
2
            tablet_idx_keys.clear();
1613
2
            init_rs_keys.clear();
1614
2
        };
1615
2
        std::unique_ptr<Transaction> txn;
1616
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1617
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1618
0
            return -1;
1619
0
        }
1620
2
        std::string tablet_key_end;
1621
2
        if (!tablet_keys.empty()) {
1622
2
            if (use_range_remove) {
1623
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1624
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
1625
2
            } else {
1626
0
                for (auto& [k, _] : tablet_keys) {
1627
0
                    txn->remove(k);
1628
0
                }
1629
0
            }
1630
2
        }
1631
20
        for (auto& k : tablet_idx_keys) {
1632
20
            txn->remove(k);
1633
20
        }
1634
2
        for (auto& k : init_rs_keys) {
1635
0
            txn->remove(k);
1636
0
        }
1637
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1638
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1639
0
                         << ", err=" << err;
1640
0
            return -1;
1641
0
        }
1642
2
        return 0;
1643
2
    };
1644
1645
41
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
1646
41
                               std::move(loop_done));
1647
41
    if (ret != 0) {
1648
2
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
1649
2
        return ret;
1650
2
    }
1651
1652
    // directly remove tablet stats and tablet jobs of these dropped index or partition
1653
39
    std::unique_ptr<Transaction> txn;
1654
39
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1655
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
1656
0
        return -1;
1657
0
    }
1658
39
    txn->remove(stats_key_begin, stats_key_end);
1659
39
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
1660
39
                 << " end=" << hex(stats_key_end);
1661
39
    txn->remove(job_key_begin, job_key_end);
1662
39
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
1663
39
    std::string schema_key_begin, schema_key_end;
1664
39
    std::string schema_dict_key;
1665
39
    if (partition_id <= 0) {
1666
        // Delete schema kv of this index
1667
8
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
1668
8
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
1669
8
        txn->remove(schema_key_begin, schema_key_end);
1670
8
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
1671
8
                     << " end=" << hex(schema_key_end);
1672
8
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
1673
8
        txn->remove(schema_dict_key);
1674
8
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
1675
8
    }
1676
1677
39
    TxnErrorCode err = txn->commit();
1678
39
    if (err != TxnErrorCode::TXN_OK) {
1679
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
1680
0
                     << " err=" << err;
1681
0
        return -1;
1682
0
    }
1683
1684
39
    return ret;
1685
39
}
1686
1687
4.00k
int InstanceRecycler::delete_rowset_data(const doris::RowsetMetaCloudPB& rs_meta_pb) {
1688
4.00k
    int64_t num_segments = rs_meta_pb.num_segments();
1689
4.00k
    if (num_segments <= 0) return 0;
1690
4.00k
    if (!rs_meta_pb.has_tablet_schema()) {
1691
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
1692
0
                                  rs_meta_pb.rowset_id_v2());
1693
0
    }
1694
4.00k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
1695
4.00k
    if (it == accessor_map_.end()) {
1696
0
        LOG_WARNING("instance has no such resource id")
1697
0
                .tag("instance_id", instance_id_)
1698
0
                .tag("resource_id", rs_meta_pb.resource_id());
1699
0
        return -1;
1700
0
    }
1701
4.00k
    auto& accessor = it->second;
1702
4.00k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
1703
4.00k
    int64_t tablet_id = rs_meta_pb.tablet_id();
1704
    // process inverted indexes
1705
4.00k
    std::vector<std::pair<int64_t, std::string>> index_ids;
1706
4.00k
    index_ids.reserve(rs_meta_pb.tablet_schema().index_size());
1707
8.00k
    for (auto& i : rs_meta_pb.tablet_schema().index()) {
1708
8.00k
        if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
1709
8.00k
            index_ids.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
1710
8.00k
        }
1711
8.00k
    }
1712
4.00k
    std::vector<std::string> file_paths;
1713
4.00k
    auto tablet_schema = rs_meta_pb.tablet_schema();
1714
4.00k
    auto index_storage_format = InvertedIndexStorageFormatPB::V1;
1715
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
1716
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1717
20.0k
        if (tablet_schema.has_inverted_index_storage_format()) {
1718
10.0k
            index_storage_format = tablet_schema.inverted_index_storage_format();
1719
10.0k
        }
1720
20.0k
        if (index_storage_format == InvertedIndexStorageFormatPB::V1) {
1721
40.0k
            for (const auto& index_id : index_ids) {
1722
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
1723
40.0k
                                                            index_id.second));
1724
40.0k
            }
1725
20.0k
        } else if (!index_ids.empty()) {
1726
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1727
0
        }
1728
20.0k
    }
1729
    // TODO(AlexYue): seems could do do batch
1730
4.00k
    return accessor->delete_files(file_paths);
1731
4.00k
}
1732
1733
int InstanceRecycler::delete_rowset_data(
1734
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
1735
32
        RecyclerMetricsContext& metrics_context) {
1736
32
    int ret = 0;
1737
    // resource_id -> file_paths
1738
32
    std::map<std::string, std::vector<std::string>> resource_file_paths;
1739
    // (resource_id, tablet_id, rowset_id)
1740
32
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
1741
32
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
1742
1743
6.14k
    for (const auto& [_, rs] : rowsets) {
1744
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
1745
        // due to aborted schema change.
1746
6.14k
        if (is_formal_rowset) {
1747
3.12k
            std::lock_guard lock(recycled_tablets_mtx_);
1748
3.12k
            if (recycled_tablets_.count(rs.tablet_id())) {
1749
0
                continue; // Rowset data has already been deleted
1750
0
            }
1751
3.12k
        }
1752
1753
6.14k
        auto it = accessor_map_.find(rs.resource_id());
1754
        // possible if the accessor is not initilized correctly
1755
6.14k
        if (it == accessor_map_.end()) [[unlikely]] {
1756
1
            LOG_WARNING("instance has no such resource id")
1757
1
                    .tag("instance_id", instance_id_)
1758
1
                    .tag("resource_id", rs.resource_id());
1759
1
            ret = -1;
1760
1
            continue;
1761
1
        }
1762
1763
6.14k
        auto& file_paths = resource_file_paths[rs.resource_id()];
1764
6.14k
        const auto& rowset_id = rs.rowset_id_v2();
1765
6.14k
        int64_t tablet_id = rs.tablet_id();
1766
6.14k
        int64_t num_segments = rs.num_segments();
1767
6.14k
        if (num_segments <= 0) continue;
1768
1769
        // Process inverted indexes
1770
6.14k
        std::vector<std::pair<int64_t, std::string>> index_ids;
1771
        // default format as v1.
1772
6.14k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1773
6.14k
        int inverted_index_get_ret = 0;
1774
6.14k
        if (rs.has_tablet_schema()) {
1775
5.54k
            for (const auto& index : rs.tablet_schema().index()) {
1776
5.54k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
1777
5.54k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
1778
5.54k
                }
1779
5.54k
            }
1780
2.59k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
1781
2.56k
                index_format = rs.tablet_schema().inverted_index_storage_format();
1782
2.56k
            }
1783
3.55k
        } else {
1784
3.55k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
1785
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
1786
0
                                "instance_id="
1787
0
                             << instance_id_ << " tablet_id=" << tablet_id
1788
0
                             << " rowset_id=" << rowset_id;
1789
0
                ret = -1;
1790
0
                continue;
1791
0
            }
1792
3.55k
            InvertedIndexInfo index_info;
1793
3.55k
            inverted_index_get_ret =
1794
3.55k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
1795
3.55k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
1796
3.55k
                                     &inverted_index_get_ret);
1797
3.55k
            if (inverted_index_get_ret == 0) {
1798
3.05k
                index_format = index_info.first;
1799
3.05k
                index_ids = index_info.second;
1800
3.05k
            } else if (inverted_index_get_ret == 1) {
1801
                // 1. Schema kv not found means tablet has been recycled
1802
                // Maybe some tablet recycle failed by some bugs
1803
                // We need to delete again to double check
1804
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
1805
                // because we are uncertain about the inverted index information.
1806
                // If there are inverted indexes, some data might not be deleted,
1807
                // but this is acceptable as we have made our best effort to delete the data.
1808
503
                LOG_INFO(
1809
503
                        "delete rowset data schema kv not found, need to delete again to double "
1810
503
                        "check")
1811
503
                        .tag("instance_id", instance_id_)
1812
503
                        .tag("tablet_id", tablet_id)
1813
503
                        .tag("rowset", rs.ShortDebugString());
1814
                // Currently index_ids is guaranteed to be empty,
1815
                // but we clear it again here as a safeguard against future code changes
1816
                // that might cause index_ids to no longer be empty
1817
503
                index_format = InvertedIndexStorageFormatPB::V2;
1818
503
                index_ids.clear();
1819
503
            } else {
1820
0
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
1821
0
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
1822
0
                ret = -1;
1823
0
                continue;
1824
0
            }
1825
3.55k
        }
1826
6.14k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
1827
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
1828
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
1829
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
1830
5
            continue;
1831
5
        }
1832
36.8k
        for (int64_t i = 0; i < num_segments; ++i) {
1833
30.6k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1834
30.6k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
1835
59.2k
                for (const auto& index_id : index_ids) {
1836
59.2k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
1837
59.2k
                                                                index_id.first, index_id.second));
1838
59.2k
                }
1839
28.1k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
1840
                // try to recycle inverted index v2 when get_ret == 1
1841
                // we treat schema not found as if it has a v2 format inverted index
1842
                // to reduce chance of data leakage
1843
2.50k
                if (inverted_index_get_ret == 1) {
1844
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
1845
2.50k
                            .tag("instance_id", instance_id_)
1846
2.50k
                            .tag("inverted index v2 path",
1847
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
1848
2.50k
                }
1849
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1850
2.50k
            }
1851
30.6k
        }
1852
6.13k
    }
1853
1854
32
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
1855
32
                                                 "delete_rowset_data",
1856
34
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
1856
34
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
1857
32
    for (auto& [resource_id, file_paths] : resource_file_paths) {
1858
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1859
29
            DCHECK(accessor_map_.count(*rid))
1860
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1861
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1862
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1863
29
                                     &accessor_map_);
1864
29
            if (!accessor_map_.contains(*rid)) {
1865
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1866
0
                        .tag("resource_id", resource_id)
1867
0
                        .tag("instance_id", instance_id_);
1868
0
                return -1;
1869
0
            }
1870
29
            auto& accessor = accessor_map_[*rid];
1871
29
            int ret = accessor->delete_files(*paths);
1872
29
            if (!ret) {
1873
                // deduplication of different files with the same rowset id
1874
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
1875
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
1876
29
                std::set<std::string> deleted_rowset_id;
1877
1878
29
                std::for_each(
1879
29
                        paths->begin(), paths->end(),
1880
92.3k
                        [&metrics_context, &rowsets, &deleted_rowset_id](const std::string& path) {
1881
92.3k
                            std::vector<std::string> str;
1882
92.3k
                            butil::SplitString(path, '/', &str);
1883
92.3k
                            std::string rowset_id;
1884
92.3k
                            if (auto pos = str.back().find('_'); pos != std::string::npos) {
1885
92.3k
                                rowset_id = str.back().substr(0, pos);
1886
92.3k
                            } else {
1887
0
                                LOG(WARNING) << "failed to parse rowset_id, path=" << path;
1888
0
                                return;
1889
0
                            }
1890
92.3k
                            auto rs_meta = rowsets.find(rowset_id);
1891
92.3k
                            if (rs_meta != rowsets.end() &&
1892
92.3k
                                !deleted_rowset_id.contains(rowset_id)) {
1893
6.13k
                                deleted_rowset_id.emplace(rowset_id);
1894
6.13k
                                metrics_context.total_recycled_data_size +=
1895
6.13k
                                        rs_meta->second.total_disk_size();
1896
6.13k
                                segment_metrics_context_.total_recycled_num +=
1897
6.13k
                                        rs_meta->second.num_segments();
1898
6.13k
                                segment_metrics_context_.total_recycled_data_size +=
1899
6.13k
                                        rs_meta->second.total_disk_size();
1900
6.13k
                                metrics_context.total_recycled_num++;
1901
6.13k
                            }
1902
92.3k
                        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
1880
92.3k
                        [&metrics_context, &rowsets, &deleted_rowset_id](const std::string& path) {
1881
92.3k
                            std::vector<std::string> str;
1882
92.3k
                            butil::SplitString(path, '/', &str);
1883
92.3k
                            std::string rowset_id;
1884
92.3k
                            if (auto pos = str.back().find('_'); pos != std::string::npos) {
1885
92.3k
                                rowset_id = str.back().substr(0, pos);
1886
92.3k
                            } else {
1887
0
                                LOG(WARNING) << "failed to parse rowset_id, path=" << path;
1888
0
                                return;
1889
0
                            }
1890
92.3k
                            auto rs_meta = rowsets.find(rowset_id);
1891
92.3k
                            if (rs_meta != rowsets.end() &&
1892
92.3k
                                !deleted_rowset_id.contains(rowset_id)) {
1893
6.13k
                                deleted_rowset_id.emplace(rowset_id);
1894
6.13k
                                metrics_context.total_recycled_data_size +=
1895
6.13k
                                        rs_meta->second.total_disk_size();
1896
6.13k
                                segment_metrics_context_.total_recycled_num +=
1897
6.13k
                                        rs_meta->second.num_segments();
1898
6.13k
                                segment_metrics_context_.total_recycled_data_size +=
1899
6.13k
                                        rs_meta->second.total_disk_size();
1900
6.13k
                                metrics_context.total_recycled_num++;
1901
6.13k
                            }
1902
92.3k
                        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
1903
29
                segment_metrics_context_.report();
1904
29
                metrics_context.report();
1905
29
            }
1906
29
            return ret;
1907
29
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
1858
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1859
29
            DCHECK(accessor_map_.count(*rid))
1860
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1861
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1862
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1863
29
                                     &accessor_map_);
1864
29
            if (!accessor_map_.contains(*rid)) {
1865
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1866
0
                        .tag("resource_id", resource_id)
1867
0
                        .tag("instance_id", instance_id_);
1868
0
                return -1;
1869
0
            }
1870
29
            auto& accessor = accessor_map_[*rid];
1871
29
            int ret = accessor->delete_files(*paths);
1872
29
            if (!ret) {
1873
                // deduplication of different files with the same rowset id
1874
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
1875
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
1876
29
                std::set<std::string> deleted_rowset_id;
1877
1878
29
                std::for_each(
1879
29
                        paths->begin(), paths->end(),
1880
29
                        [&metrics_context, &rowsets, &deleted_rowset_id](const std::string& path) {
1881
29
                            std::vector<std::string> str;
1882
29
                            butil::SplitString(path, '/', &str);
1883
29
                            std::string rowset_id;
1884
29
                            if (auto pos = str.back().find('_'); pos != std::string::npos) {
1885
29
                                rowset_id = str.back().substr(0, pos);
1886
29
                            } else {
1887
29
                                LOG(WARNING) << "failed to parse rowset_id, path=" << path;
1888
29
                                return;
1889
29
                            }
1890
29
                            auto rs_meta = rowsets.find(rowset_id);
1891
29
                            if (rs_meta != rowsets.end() &&
1892
29
                                !deleted_rowset_id.contains(rowset_id)) {
1893
29
                                deleted_rowset_id.emplace(rowset_id);
1894
29
                                metrics_context.total_recycled_data_size +=
1895
29
                                        rs_meta->second.total_disk_size();
1896
29
                                segment_metrics_context_.total_recycled_num +=
1897
29
                                        rs_meta->second.num_segments();
1898
29
                                segment_metrics_context_.total_recycled_data_size +=
1899
29
                                        rs_meta->second.total_disk_size();
1900
29
                                metrics_context.total_recycled_num++;
1901
29
                            }
1902
29
                        });
1903
29
                segment_metrics_context_.report();
1904
29
                metrics_context.report();
1905
29
            }
1906
29
            return ret;
1907
29
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
1908
29
    }
1909
32
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
1910
5
        LOG_INFO(
1911
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
1912
5
                "resource_id={}, tablet_id={}, instance_id={}",
1913
5
                rowset_id, resource_id, tablet_id, instance_id_);
1914
5
        concurrent_delete_executor.add([&]() -> int {
1915
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
1916
5
            if (!ret) {
1917
5
                auto rs = rowsets.at(rowset_id);
1918
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
1919
5
                metrics_context.total_recycled_num++;
1920
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
1921
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
1922
5
                metrics_context.report();
1923
5
                segment_metrics_context_.report();
1924
5
            }
1925
5
            return ret;
1926
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
1914
5
        concurrent_delete_executor.add([&]() -> int {
1915
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
1916
5
            if (!ret) {
1917
5
                auto rs = rowsets.at(rowset_id);
1918
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
1919
5
                metrics_context.total_recycled_num++;
1920
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
1921
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
1922
5
                metrics_context.report();
1923
5
                segment_metrics_context_.report();
1924
5
            }
1925
5
            return ret;
1926
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
1927
5
    }
1928
1929
32
    bool finished = true;
1930
32
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
1931
34
    for (int r : rets) {
1932
34
        if (r != 0) {
1933
0
            ret = -1;
1934
0
            break;
1935
0
        }
1936
34
    }
1937
32
    ret = finished ? ret : -1;
1938
32
    return ret;
1939
32
}
1940
1941
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
1942
2.90k
                                         const std::string& rowset_id) {
1943
2.90k
    auto it = accessor_map_.find(resource_id);
1944
2.90k
    if (it == accessor_map_.end()) {
1945
0
        LOG_WARNING("instance has no such resource id")
1946
0
                .tag("instance_id", instance_id_)
1947
0
                .tag("resource_id", resource_id)
1948
0
                .tag("tablet_id", tablet_id)
1949
0
                .tag("rowset_id", rowset_id);
1950
0
        return -1;
1951
0
    }
1952
2.90k
    auto& accessor = it->second;
1953
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
1954
2.90k
}
1955
1956
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
1957
                                                  RecyclerMetricsContext& metrics_context,
1958
0
                                                  int64_t partition_id, bool is_empty_tablet) {
1959
0
    std::string tablet_key_begin, tablet_key_end;
1960
1961
0
    if (partition_id > 0) {
1962
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
1963
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
1964
0
    } else {
1965
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
1966
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
1967
0
    }
1968
    // for calculate the total num or bytes of recyled objects
1969
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
1970
0
                                                          std::string_view v) -> int {
1971
0
        doris::TabletMetaCloudPB tablet_meta_pb;
1972
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1973
0
            return 0;
1974
0
        }
1975
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1976
1977
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1978
0
            return 0;
1979
0
        }
1980
1981
0
        if (!is_empty_tablet) {
1982
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
1983
0
                return 0;
1984
0
            }
1985
0
            tablet_metrics_context_.total_need_recycle_num++;
1986
0
        }
1987
0
        return 0;
1988
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
1989
0
    return scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics),
1990
0
                            [&metrics_context]() -> int {
1991
0
                                metrics_context.report();
1992
0
                                tablet_metrics_context_.report();
1993
0
                                return 0;
1994
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_1clEv
1995
0
}
1996
1997
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
1998
0
                                                 RecyclerMetricsContext& metrics_context) {
1999
0
    int ret = 0;
2000
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
2001
0
    std::unique_ptr<Transaction> txn;
2002
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2003
0
        LOG_WARNING("failed to recycle tablet ")
2004
0
                .tag("tablet id", tablet_id)
2005
0
                .tag("instance_id", instance_id_)
2006
0
                .tag("reason", "failed to create txn");
2007
0
        ret = -1;
2008
0
    }
2009
0
    GetRowsetResponse resp;
2010
0
    std::string msg;
2011
0
    MetaServiceCode code = MetaServiceCode::OK;
2012
    // get rowsets in tablet
2013
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2014
0
                        tablet_id, code, msg, &resp);
2015
0
    if (code != MetaServiceCode::OK) {
2016
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2017
0
                .tag("tablet id", tablet_id)
2018
0
                .tag("msg", msg)
2019
0
                .tag("code", code)
2020
0
                .tag("instance id", instance_id_);
2021
0
        ret = -1;
2022
0
    }
2023
0
    for (const auto& rs_meta : resp.rowset_meta()) {
2024
        /*
2025
        * For compatibility, we skip the loop for [0-1] here. 
2026
        * The purpose of this loop is to delete object files,
2027
        * and since [0-1] only has meta and doesn't have object files, 
2028
        * skipping it doesn't affect system correctness. 
2029
        *
2030
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below 
2031
        * would return error -1 directly, causing the recycle operation to fail.
2032
        *
2033
        * [0-1] doesn't have resource id is a bug.
2034
        * In the future, we will fix this problem, after that,
2035
        * we can remove this if statement.
2036
        *
2037
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
2038
        */
2039
2040
0
        if (rs_meta.end_version() == 1) {
2041
            // Assert that [0-1] has no resource_id to make sure
2042
            // this if statement will not be forgetted to remove
2043
            // when the resource id bug is fixed
2044
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2045
0
            continue;
2046
0
        }
2047
0
        if (!rs_meta.has_resource_id()) {
2048
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2049
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2050
0
                    .tag("instance_id", instance_id_)
2051
0
                    .tag("tablet_id", tablet_id);
2052
0
            continue;
2053
0
        }
2054
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2055
0
        auto it = accessor_map_.find(rs_meta.resource_id());
2056
        // possible if the accessor is not initilized correctly
2057
0
        if (it == accessor_map_.end()) [[unlikely]] {
2058
0
            LOG_WARNING(
2059
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2060
0
                    "recycle process")
2061
0
                    .tag("tablet id", tablet_id)
2062
0
                    .tag("instance_id", instance_id_)
2063
0
                    .tag("resource_id", rs_meta.resource_id())
2064
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2065
0
            continue;
2066
0
        }
2067
2068
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
2069
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2070
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2071
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
2072
0
    }
2073
0
    return ret;
2074
0
}
2075
2076
254
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
2077
254
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
2078
254
            .tag("instance_id", instance_id_)
2079
254
            .tag("tablet_id", tablet_id);
2080
2081
254
    int ret = 0;
2082
254
    auto start_time = steady_clock::now();
2083
2084
254
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
2085
2086
    // collect resource ids
2087
234
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
2088
234
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
2089
234
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
2090
234
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
2091
2092
234
    std::set<std::string> resource_ids;
2093
234
    int64_t recycle_rowsets_number = 0;
2094
234
    int64_t recycle_segments_number = 0;
2095
234
    int64_t recycle_rowsets_data_size = 0;
2096
234
    int64_t recycle_rowsets_index_size = 0;
2097
234
    int64_t max_rowset_version = 0;
2098
234
    int64_t min_rowset_creation_time = INT64_MAX;
2099
234
    int64_t max_rowset_creation_time = 0;
2100
234
    int64_t min_rowset_expiration_time = INT64_MAX;
2101
234
    int64_t max_rowset_expiration_time = 0;
2102
2103
234
    DORIS_CLOUD_DEFER {
2104
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2105
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2106
234
                .tag("instance_id", instance_id_)
2107
234
                .tag("tablet_id", tablet_id)
2108
234
                .tag("recycle rowsets number", recycle_rowsets_number)
2109
234
                .tag("recycle segments number", recycle_segments_number)
2110
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2111
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2112
234
                .tag("max rowset version", max_rowset_version)
2113
234
                .tag("min rowset creation time", min_rowset_creation_time)
2114
234
                .tag("max rowset creation time", max_rowset_creation_time)
2115
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
2116
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
2117
234
                .tag("ret", ret);
2118
234
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2103
234
    DORIS_CLOUD_DEFER {
2104
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2105
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2106
234
                .tag("instance_id", instance_id_)
2107
234
                .tag("tablet_id", tablet_id)
2108
234
                .tag("recycle rowsets number", recycle_rowsets_number)
2109
234
                .tag("recycle segments number", recycle_segments_number)
2110
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2111
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2112
234
                .tag("max rowset version", max_rowset_version)
2113
234
                .tag("min rowset creation time", min_rowset_creation_time)
2114
234
                .tag("max rowset creation time", max_rowset_creation_time)
2115
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
2116
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
2117
234
                .tag("ret", ret);
2118
234
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
2119
2120
234
    std::unique_ptr<Transaction> txn;
2121
234
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2122
0
        LOG_WARNING("failed to recycle tablet ")
2123
0
                .tag("tablet id", tablet_id)
2124
0
                .tag("instance_id", instance_id_)
2125
0
                .tag("reason", "failed to create txn");
2126
0
        ret = -1;
2127
0
    }
2128
234
    GetRowsetResponse resp;
2129
234
    std::string msg;
2130
234
    MetaServiceCode code = MetaServiceCode::OK;
2131
    // get rowsets in tablet
2132
234
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2133
234
                        tablet_id, code, msg, &resp);
2134
234
    if (code != MetaServiceCode::OK) {
2135
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2136
0
                .tag("tablet id", tablet_id)
2137
0
                .tag("msg", msg)
2138
0
                .tag("code", code)
2139
0
                .tag("instance id", instance_id_);
2140
0
        ret = -1;
2141
0
    }
2142
234
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
2143
2144
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
2145
        /*
2146
        * For compatibility, we skip the loop for [0-1] here. 
2147
        * The purpose of this loop is to delete object files,
2148
        * and since [0-1] only has meta and doesn't have object files, 
2149
        * skipping it doesn't affect system correctness. 
2150
        *
2151
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below 
2152
        * would return error -1 directly, causing the recycle operation to fail.
2153
        *
2154
        * [0-1] doesn't have resource id is a bug.
2155
        * In the future, we will fix this problem, after that,
2156
        * we can remove this if statement.
2157
        *
2158
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
2159
        */
2160
2161
2.50k
        if (rs_meta.end_version() == 1) {
2162
            // Assert that [0-1] has no resource_id to make sure
2163
            // this if statement will not be forgetted to remove
2164
            // when the resource id bug is fixed
2165
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2166
0
            recycle_rowsets_number += 1;
2167
0
            continue;
2168
0
        }
2169
2.50k
        if (!rs_meta.has_resource_id()) {
2170
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2171
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
2172
1
                    .tag("instance_id", instance_id_)
2173
1
                    .tag("tablet_id", tablet_id);
2174
1
            return -1;
2175
1
        }
2176
2.50k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2177
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
2178
        // possible if the accessor is not initilized correctly
2179
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
2180
1
            LOG_WARNING(
2181
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2182
1
                    "recycle process")
2183
1
                    .tag("tablet id", tablet_id)
2184
1
                    .tag("instance_id", instance_id_)
2185
1
                    .tag("resource_id", rs_meta.resource_id())
2186
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2187
1
            return -1;
2188
1
        }
2189
2.50k
        recycle_rowsets_number += 1;
2190
2.50k
        recycle_segments_number += rs_meta.num_segments();
2191
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2192
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2193
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2194
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2195
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2196
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2197
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2198
2.50k
        resource_ids.emplace(rs_meta.resource_id());
2199
2.50k
    }
2200
2201
232
    LOG_INFO("recycle tablet start to delete object")
2202
232
            .tag("instance id", instance_id_)
2203
232
            .tag("tablet id", tablet_id)
2204
232
            .tag("recycle tablet resource ids are",
2205
232
                 std::accumulate(resource_ids.begin(), resource_ids.begin(), std::string(),
2206
232
                                 [](std::string rs_id, const auto& it) {
2207
0
                                     return rs_id.empty() ? it : rs_id + ", " + it;
2208
0
                                 }));
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
2209
2210
232
    SyncExecutor<int> concurrent_delete_executor(
2211
232
            _thread_pool_group.s3_producer_pool,
2212
232
            fmt::format("delete tablet {} s3 rowset", tablet_id),
2213
232
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKi
Line
Count
Source
2213
203
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKi
2214
2215
    // delete all rowset data in this tablet
2216
    // ATTN: there may be data leak if not all accessor initilized successfully
2217
    //       partial data deleted if the tablet is stored cross-storage vault
2218
    //       vault id is not attached to TabletMeta...
2219
232
    for (const auto& resource_id : resource_ids) {
2220
203
        concurrent_delete_executor.add([&, rs_id = resource_id,
2221
203
                                        accessor_ptr = accessor_map_[resource_id]]() {
2222
203
            std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2223
203
                g_bvar_recycler_vault_recycle_task_concurrency.put(
2224
203
                        {instance_id_, metrics_context.operation_type, rs_id}, -1);
2225
203
                metrics_context.report();
2226
203
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEvENKUlPiE_clES5_
Line
Count
Source
2222
203
            std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2223
203
                g_bvar_recycler_vault_recycle_task_concurrency.put(
2224
203
                        {instance_id_, metrics_context.operation_type, rs_id}, -1);
2225
203
                metrics_context.report();
2226
203
            });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEvENKUlPiE_clES5_
2227
203
            g_bvar_recycler_vault_recycle_task_concurrency.put(
2228
203
                    {instance_id_, metrics_context.operation_type, rs_id}, 1);
2229
203
            int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2230
203
            if (res != 0) {
2231
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2232
1
                             << " path=" << accessor_ptr->uri();
2233
1
                g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "abnormal"}, 1);
2234
1
                return -1;
2235
1
            }
2236
202
            g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "normal"}, 1);
2237
202
            return 0;
2238
203
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
Line
Count
Source
2221
203
                                        accessor_ptr = accessor_map_[resource_id]]() {
2222
203
            std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2223
203
                g_bvar_recycler_vault_recycle_task_concurrency.put(
2224
203
                        {instance_id_, metrics_context.operation_type, rs_id}, -1);
2225
203
                metrics_context.report();
2226
203
            });
2227
203
            g_bvar_recycler_vault_recycle_task_concurrency.put(
2228
203
                    {instance_id_, metrics_context.operation_type, rs_id}, 1);
2229
203
            int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2230
203
            if (res != 0) {
2231
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2232
1
                             << " path=" << accessor_ptr->uri();
2233
1
                g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "abnormal"}, 1);
2234
1
                return -1;
2235
1
            }
2236
202
            g_bvar_recycler_vault_recycle_status.put({instance_id_, rs_id, "normal"}, 1);
2237
202
            return 0;
2238
203
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
2239
203
    }
2240
2241
232
    bool finished = true;
2242
232
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2243
232
    for (int r : rets) {
2244
203
        if (r != 0) {
2245
1
            ret = -1;
2246
1
        }
2247
203
    }
2248
2249
232
    ret = finished ? ret : -1;
2250
2251
232
    if (ret != 0) { // failed recycle tablet data
2252
1
        LOG_WARNING("ret!=0")
2253
1
                .tag("finished", finished)
2254
1
                .tag("ret", ret)
2255
1
                .tag("instance_id", instance_id_)
2256
1
                .tag("tablet_id", tablet_id);
2257
1
        return ret;
2258
1
    }
2259
2260
231
    tablet_metrics_context_.total_recycled_data_size +=
2261
231
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2262
231
    tablet_metrics_context_.total_recycled_num += 1;
2263
231
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
2264
231
    segment_metrics_context_.total_recycled_data_size +=
2265
231
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2266
231
    metrics_context.total_recycled_data_size +=
2267
231
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2268
231
    tablet_metrics_context_.report();
2269
231
    segment_metrics_context_.report();
2270
231
    metrics_context.report();
2271
2272
231
    txn.reset();
2273
231
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2274
0
        LOG_WARNING("failed to recycle tablet ")
2275
0
                .tag("tablet id", tablet_id)
2276
0
                .tag("instance_id", instance_id_)
2277
0
                .tag("reason", "failed to create txn");
2278
0
        ret = -1;
2279
0
    }
2280
    // delete all rowset kv in this tablet
2281
231
    txn->remove(rs_key0, rs_key1);
2282
231
    txn->remove(recyc_rs_key0, recyc_rs_key1);
2283
2284
    // remove delete bitmap for MoW table
2285
231
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
2286
231
    txn->remove(pending_key);
2287
231
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
2288
231
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
2289
231
    txn->remove(delete_bitmap_start, delete_bitmap_end);
2290
2291
231
    TxnErrorCode err = txn->commit();
2292
231
    if (err != TxnErrorCode::TXN_OK) {
2293
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
2294
0
        ret = -1;
2295
0
    }
2296
2297
231
    if (ret == 0) {
2298
        // All object files under tablet have been deleted
2299
231
        std::lock_guard lock(recycled_tablets_mtx_);
2300
231
        recycled_tablets_.insert(tablet_id);
2301
231
    }
2302
2303
231
    return ret;
2304
232
}
2305
2306
13
int InstanceRecycler::recycle_rowsets() {
2307
13
    const std::string task_name = "recycle_rowsets";
2308
13
    int64_t num_scanned = 0;
2309
13
    int64_t num_expired = 0;
2310
13
    int64_t num_prepare = 0;
2311
13
    int64_t num_compacted = 0;
2312
13
    int64_t num_empty_rowset = 0;
2313
13
    size_t total_rowset_key_size = 0;
2314
13
    size_t total_rowset_value_size = 0;
2315
13
    size_t expired_rowset_size = 0;
2316
13
    std::atomic_long num_recycled = 0;
2317
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2318
2319
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
2320
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
2321
13
    std::string recyc_rs_key0;
2322
13
    std::string recyc_rs_key1;
2323
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
2324
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
2325
2326
13
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
2327
2328
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2329
13
    register_recycle_task(task_name, start_time);
2330
2331
13
    DORIS_CLOUD_DEFER {
2332
13
        unregister_recycle_task(task_name);
2333
13
        int64_t cost =
2334
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2335
13
        metrics_context.finish_report();
2336
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
2337
13
                .tag("instance_id", instance_id_)
2338
13
                .tag("num_scanned", num_scanned)
2339
13
                .tag("num_expired", num_expired)
2340
13
                .tag("num_recycled", num_recycled)
2341
13
                .tag("num_recycled.prepare", num_prepare)
2342
13
                .tag("num_recycled.compacted", num_compacted)
2343
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
2344
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2345
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2346
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
2347
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
2331
13
    DORIS_CLOUD_DEFER {
2332
13
        unregister_recycle_task(task_name);
2333
13
        int64_t cost =
2334
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2335
13
        metrics_context.finish_report();
2336
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
2337
13
                .tag("instance_id", instance_id_)
2338
13
                .tag("num_scanned", num_scanned)
2339
13
                .tag("num_expired", num_expired)
2340
13
                .tag("num_recycled", num_recycled)
2341
13
                .tag("num_recycled.prepare", num_prepare)
2342
13
                .tag("num_recycled.compacted", num_compacted)
2343
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
2344
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2345
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2346
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
2347
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
2348
2349
13
    std::vector<std::string> rowset_keys;
2350
    // rowset_id -> rowset_meta
2351
    // store rowset id and meta for statistics rs size when delete
2352
13
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
2353
2354
    // Store keys of rowset recycled by background workers
2355
13
    std::mutex async_recycled_rowset_keys_mutex;
2356
13
    std::vector<std::string> async_recycled_rowset_keys;
2357
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
2358
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
2359
13
    worker_pool->start();
2360
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
2361
900
                                            int64_t tablet_id, const std::string& rowset_id) {
2362
        // Try to delete rowset data in background thread
2363
900
        int ret = worker_pool->submit_with_timeout(
2364
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2365
781
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2366
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2367
0
                        return;
2368
0
                    }
2369
781
                    std::vector<std::string> keys;
2370
781
                    {
2371
781
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2372
781
                        async_recycled_rowset_keys.push_back(std::move(key));
2373
781
                        if (async_recycled_rowset_keys.size() > 100) {
2374
7
                            keys.swap(async_recycled_rowset_keys);
2375
7
                        }
2376
781
                    }
2377
781
                    if (keys.empty()) return;
2378
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
2379
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
2380
0
                                     << instance_id_;
2381
7
                    } else {
2382
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
2383
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
2384
7
                                           num_recycled, start_time);
2385
7
                    }
2386
7
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
2364
781
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2365
781
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2366
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2367
0
                        return;
2368
0
                    }
2369
781
                    std::vector<std::string> keys;
2370
781
                    {
2371
781
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2372
781
                        async_recycled_rowset_keys.push_back(std::move(key));
2373
781
                        if (async_recycled_rowset_keys.size() > 100) {
2374
7
                            keys.swap(async_recycled_rowset_keys);
2375
7
                        }
2376
781
                    }
2377
781
                    if (keys.empty()) return;
2378
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
2379
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
2380
0
                                     << instance_id_;
2381
7
                    } else {
2382
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
2383
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
2384
7
                                           num_recycled, start_time);
2385
7
                    }
2386
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
2387
900
                0);
2388
900
        if (ret == 0) return 0;
2389
        // Submit task failed, delete rowset data in current thread
2390
119
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2391
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2392
0
            return -1;
2393
0
        }
2394
119
        rowset_keys.push_back(std::move(key));
2395
119
        return 0;
2396
119
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
2361
900
                                            int64_t tablet_id, const std::string& rowset_id) {
2362
        // Try to delete rowset data in background thread
2363
900
        int ret = worker_pool->submit_with_timeout(
2364
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2365
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2366
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2367
900
                        return;
2368
900
                    }
2369
900
                    std::vector<std::string> keys;
2370
900
                    {
2371
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2372
900
                        async_recycled_rowset_keys.push_back(std::move(key));
2373
900
                        if (async_recycled_rowset_keys.size() > 100) {
2374
900
                            keys.swap(async_recycled_rowset_keys);
2375
900
                        }
2376
900
                    }
2377
900
                    if (keys.empty()) return;
2378
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
2379
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
2380
900
                                     << instance_id_;
2381
900
                    } else {
2382
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
2383
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
2384
900
                                           num_recycled, start_time);
2385
900
                    }
2386
900
                },
2387
900
                0);
2388
900
        if (ret == 0) return 0;
2389
        // Submit task failed, delete rowset data in current thread
2390
119
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2391
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2392
0
            return -1;
2393
0
        }
2394
119
        rowset_keys.push_back(std::move(key));
2395
119
        return 0;
2396
119
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
2397
2398
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2399
2400
4.00k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
2401
4.00k
        ++num_scanned;
2402
4.00k
        total_rowset_key_size += k.size();
2403
4.00k
        total_rowset_value_size += v.size();
2404
4.00k
        RecycleRowsetPB rowset;
2405
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2406
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2407
0
            return -1;
2408
0
        }
2409
2410
4.00k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
2411
2412
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2413
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
2414
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2415
4.00k
        int64_t current_time = ::time(nullptr);
2416
4.00k
        if (current_time < final_expiration) { // not expired
2417
0
            return 0;
2418
0
        }
2419
4.00k
        ++num_expired;
2420
4.00k
        expired_rowset_size += v.size();
2421
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2422
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2423
                // in old version, keep this key-value pair and it needs to be checked manually
2424
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2425
0
                return -1;
2426
0
            }
2427
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2428
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2429
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2430
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2431
0
                rowset_keys.emplace_back(k);
2432
0
                return -1;
2433
0
            }
2434
            // decode rowset_id
2435
250
            auto k1 = k;
2436
250
            k1.remove_prefix(1);
2437
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2438
250
            decode_key(&k1, &out);
2439
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2440
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2441
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2442
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2443
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2444
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2445
0
                return -1;
2446
0
            }
2447
250
            return 0;
2448
250
        }
2449
        // TODO(plat1ko): check rowset not referenced
2450
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2451
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2452
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2453
0
                LOG_INFO("recycle rowset that has empty resource id");
2454
0
            } else {
2455
                // other situations, keep this key-value pair and it needs to be checked manually
2456
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2457
0
                return -1;
2458
0
            }
2459
0
        }
2460
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2461
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2462
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2463
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2464
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2465
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2466
3.75k
                  << " rowset_meta_size=" << v.size()
2467
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2468
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2469
            // unable to calculate file path, can only be deleted by rowset id prefix
2470
650
            num_prepare += 1;
2471
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2472
650
                                             rowset_meta->tablet_id(),
2473
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2474
0
                return -1;
2475
0
            }
2476
3.10k
        } else {
2477
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2478
3.10k
            rowset_keys.emplace_back(k);
2479
3.10k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
2480
3.10k
                rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
2481
3.10k
            } else {
2482
0
                ++num_empty_rowset;
2483
0
            }
2484
3.10k
        }
2485
3.75k
        return 0;
2486
3.75k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2400
4.00k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
2401
4.00k
        ++num_scanned;
2402
4.00k
        total_rowset_key_size += k.size();
2403
4.00k
        total_rowset_value_size += v.size();
2404
4.00k
        RecycleRowsetPB rowset;
2405
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2406
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2407
0
            return -1;
2408
0
        }
2409
2410
4.00k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
2411
2412
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2413
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
2414
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2415
4.00k
        int64_t current_time = ::time(nullptr);
2416
4.00k
        if (current_time < final_expiration) { // not expired
2417
0
            return 0;
2418
0
        }
2419
4.00k
        ++num_expired;
2420
4.00k
        expired_rowset_size += v.size();
2421
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2422
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2423
                // in old version, keep this key-value pair and it needs to be checked manually
2424
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2425
0
                return -1;
2426
0
            }
2427
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2428
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2429
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2430
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2431
0
                rowset_keys.emplace_back(k);
2432
0
                return -1;
2433
0
            }
2434
            // decode rowset_id
2435
250
            auto k1 = k;
2436
250
            k1.remove_prefix(1);
2437
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2438
250
            decode_key(&k1, &out);
2439
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2440
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2441
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2442
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2443
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2444
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2445
0
                return -1;
2446
0
            }
2447
250
            return 0;
2448
250
        }
2449
        // TODO(plat1ko): check rowset not referenced
2450
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2451
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2452
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2453
0
                LOG_INFO("recycle rowset that has empty resource id");
2454
0
            } else {
2455
                // other situations, keep this key-value pair and it needs to be checked manually
2456
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2457
0
                return -1;
2458
0
            }
2459
0
        }
2460
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2461
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2462
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2463
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2464
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2465
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2466
3.75k
                  << " rowset_meta_size=" << v.size()
2467
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2468
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2469
            // unable to calculate file path, can only be deleted by rowset id prefix
2470
650
            num_prepare += 1;
2471
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2472
650
                                             rowset_meta->tablet_id(),
2473
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2474
0
                return -1;
2475
0
            }
2476
3.10k
        } else {
2477
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2478
3.10k
            rowset_keys.emplace_back(k);
2479
3.10k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
2480
3.10k
                rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
2481
3.10k
            } else {
2482
0
                ++num_empty_rowset;
2483
0
            }
2484
3.10k
        }
2485
3.75k
        return 0;
2486
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
2487
2488
21
    auto loop_done = [&]() -> int {
2489
21
        std::vector<std::string> rowset_keys_to_delete;
2490
        // rowset_id -> rowset_meta
2491
        // store rowset id and meta for statistics rs size when delete
2492
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
2493
21
        rowset_keys_to_delete.swap(rowset_keys);
2494
21
        rowsets_to_delete.swap(rowsets);
2495
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2496
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2497
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
2498
21
                                   metrics_context) != 0) {
2499
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2500
0
                return;
2501
0
            }
2502
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2503
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2504
0
                return;
2505
0
            }
2506
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2507
21
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
2496
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2497
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
2498
21
                                   metrics_context) != 0) {
2499
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2500
0
                return;
2501
0
            }
2502
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2503
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2504
0
                return;
2505
0
            }
2506
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2507
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
2508
21
        return 0;
2509
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
2488
21
    auto loop_done = [&]() -> int {
2489
21
        std::vector<std::string> rowset_keys_to_delete;
2490
        // rowset_id -> rowset_meta
2491
        // store rowset id and meta for statistics rs size when delete
2492
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
2493
21
        rowset_keys_to_delete.swap(rowset_keys);
2494
21
        rowsets_to_delete.swap(rowsets);
2495
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2496
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2497
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
2498
21
                                   metrics_context) != 0) {
2499
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2500
21
                return;
2501
21
            }
2502
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2503
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2504
21
                return;
2505
21
            }
2506
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2507
21
        });
2508
21
        return 0;
2509
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
2510
2511
13
    if (config::enable_recycler_stats_metrics) {
2512
0
        scan_and_statistics_rowsets();
2513
0
    }
2514
    // recycle_func and loop_done for scan and recycle
2515
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
2516
13
                               std::move(loop_done));
2517
2518
13
    worker_pool->stop();
2519
2520
13
    if (!async_recycled_rowset_keys.empty()) {
2521
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
2522
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2523
0
            return -1;
2524
2
        } else {
2525
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
2526
2
        }
2527
2
    }
2528
13
    return ret;
2529
13
}
2530
2531
17
int InstanceRecycler::recycle_tmp_rowsets() {
2532
17
    const std::string task_name = "recycle_tmp_rowsets";
2533
17
    int64_t num_scanned = 0;
2534
17
    int64_t num_expired = 0;
2535
17
    int64_t num_recycled = 0;
2536
17
    size_t expired_rowset_size = 0;
2537
17
    size_t total_rowset_key_size = 0;
2538
17
    size_t total_rowset_value_size = 0;
2539
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2540
2541
17
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
2542
17
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
2543
17
    std::string tmp_rs_key0;
2544
17
    std::string tmp_rs_key1;
2545
17
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
2546
17
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
2547
2548
17
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
2549
2550
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2551
17
    register_recycle_task(task_name, start_time);
2552
2553
17
    DORIS_CLOUD_DEFER {
2554
17
        unregister_recycle_task(task_name);
2555
17
        int64_t cost =
2556
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2557
17
        metrics_context.finish_report();
2558
17
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
2559
17
                .tag("instance_id", instance_id_)
2560
17
                .tag("num_scanned", num_scanned)
2561
17
                .tag("num_expired", num_expired)
2562
17
                .tag("num_recycled", num_recycled)
2563
17
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2564
17
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2565
17
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2566
17
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
2553
13
    DORIS_CLOUD_DEFER {
2554
13
        unregister_recycle_task(task_name);
2555
13
        int64_t cost =
2556
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2557
13
        metrics_context.finish_report();
2558
13
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
2559
13
                .tag("instance_id", instance_id_)
2560
13
                .tag("num_scanned", num_scanned)
2561
13
                .tag("num_expired", num_expired)
2562
13
                .tag("num_recycled", num_recycled)
2563
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2564
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2565
13
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2566
13
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
2553
4
    DORIS_CLOUD_DEFER {
2554
4
        unregister_recycle_task(task_name);
2555
4
        int64_t cost =
2556
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2557
4
        metrics_context.finish_report();
2558
4
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
2559
4
                .tag("instance_id", instance_id_)
2560
4
                .tag("num_scanned", num_scanned)
2561
4
                .tag("num_expired", num_expired)
2562
4
                .tag("num_recycled", num_recycled)
2563
4
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2564
4
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2565
4
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2566
4
    };
2567
2568
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
2569
17
    std::vector<std::string_view> tmp_rowset_keys;
2570
    // rowset_id -> rowset_meta
2571
    // store tmp_rowset id and meta for statistics rs size when delete
2572
17
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
2573
2574
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2575
2576
17
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
2577
17
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
2578
3.05k
                             &earlest_ts, this](std::string_view k, std::string_view v) -> int {
2579
3.05k
        ++num_scanned;
2580
3.05k
        total_rowset_key_size += k.size();
2581
3.05k
        total_rowset_value_size += v.size();
2582
3.05k
        doris::RowsetMetaCloudPB rowset;
2583
3.05k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2584
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2585
0
            return -1;
2586
0
        }
2587
3.05k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
2588
3.05k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2589
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2590
0
                   << " txn_expiration=" << rowset.txn_expiration()
2591
0
                   << " rowset_creation_time=" << rowset.creation_time();
2592
3.05k
        int64_t current_time = ::time(nullptr);
2593
3.05k
        if (current_time < expiration) { // not expired
2594
0
            return 0;
2595
0
        }
2596
2597
3.05k
        DCHECK_GT(rowset.txn_id(), 0)
2598
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
2599
3.05k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
2600
10
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
2601
10
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
2602
10
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
2603
10
                      << rowset.start_version() << '-' << rowset.end_version()
2604
10
                      << "] txn_id=" << rowset.txn_id()
2605
10
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
2606
10
                      << " txn_expiration=" << rowset.txn_expiration();
2607
10
            return 0;
2608
10
        }
2609
2610
3.04k
        ++num_expired;
2611
3.04k
        expired_rowset_size += v.size();
2612
3.04k
        if (!rowset.has_resource_id()) {
2613
20
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2614
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2615
0
                return -1;
2616
0
            }
2617
            // might be a delete pred rowset
2618
20
            tmp_rowset_keys.push_back(k);
2619
20
            return 0;
2620
20
        }
2621
        // TODO(plat1ko): check rowset not referenced
2622
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2623
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2624
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2625
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2626
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2627
3.02k
                  << " num_expired=" << num_expired;
2628
2629
3.02k
        tmp_rowset_keys.push_back(k);
2630
3.02k
        if (rowset.num_segments() > 0) { // Skip empty rowset
2631
3.02k
            tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
2632
3.02k
        }
2633
3.02k
        return 0;
2634
3.04k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2578
3.02k
                             &earlest_ts, this](std::string_view k, std::string_view v) -> int {
2579
3.02k
        ++num_scanned;
2580
3.02k
        total_rowset_key_size += k.size();
2581
3.02k
        total_rowset_value_size += v.size();
2582
3.02k
        doris::RowsetMetaCloudPB rowset;
2583
3.02k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2584
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2585
0
            return -1;
2586
0
        }
2587
3.02k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
2588
3.02k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2589
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2590
0
                   << " txn_expiration=" << rowset.txn_expiration()
2591
0
                   << " rowset_creation_time=" << rowset.creation_time();
2592
3.02k
        int64_t current_time = ::time(nullptr);
2593
3.02k
        if (current_time < expiration) { // not expired
2594
0
            return 0;
2595
0
        }
2596
2597
3.02k
        DCHECK_GT(rowset.txn_id(), 0)
2598
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
2599
3.02k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
2600
0
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
2601
0
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
2602
0
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
2603
0
                      << rowset.start_version() << '-' << rowset.end_version()
2604
0
                      << "] txn_id=" << rowset.txn_id()
2605
0
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
2606
0
                      << " txn_expiration=" << rowset.txn_expiration();
2607
0
            return 0;
2608
0
        }
2609
2610
3.02k
        ++num_expired;
2611
3.02k
        expired_rowset_size += v.size();
2612
3.02k
        if (!rowset.has_resource_id()) {
2613
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2614
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2615
0
                return -1;
2616
0
            }
2617
            // might be a delete pred rowset
2618
0
            tmp_rowset_keys.push_back(k);
2619
0
            return 0;
2620
0
        }
2621
        // TODO(plat1ko): check rowset not referenced
2622
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2623
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2624
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2625
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2626
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2627
3.02k
                  << " num_expired=" << num_expired;
2628
2629
3.02k
        tmp_rowset_keys.push_back(k);
2630
3.02k
        if (rowset.num_segments() > 0) { // Skip empty rowset
2631
3.02k
            tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
2632
3.02k
        }
2633
3.02k
        return 0;
2634
3.02k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2578
30
                             &earlest_ts, this](std::string_view k, std::string_view v) -> int {
2579
30
        ++num_scanned;
2580
30
        total_rowset_key_size += k.size();
2581
30
        total_rowset_value_size += v.size();
2582
30
        doris::RowsetMetaCloudPB rowset;
2583
30
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2584
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2585
0
            return -1;
2586
0
        }
2587
30
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
2588
30
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2589
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2590
0
                   << " txn_expiration=" << rowset.txn_expiration()
2591
0
                   << " rowset_creation_time=" << rowset.creation_time();
2592
30
        int64_t current_time = ::time(nullptr);
2593
30
        if (current_time < expiration) { // not expired
2594
0
            return 0;
2595
0
        }
2596
2597
30
        DCHECK_GT(rowset.txn_id(), 0)
2598
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
2599
30
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
2600
10
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
2601
10
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
2602
10
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
2603
10
                      << rowset.start_version() << '-' << rowset.end_version()
2604
10
                      << "] txn_id=" << rowset.txn_id()
2605
10
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
2606
10
                      << " txn_expiration=" << rowset.txn_expiration();
2607
10
            return 0;
2608
10
        }
2609
2610
20
        ++num_expired;
2611
20
        expired_rowset_size += v.size();
2612
20
        if (!rowset.has_resource_id()) {
2613
20
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2614
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2615
0
                return -1;
2616
0
            }
2617
            // might be a delete pred rowset
2618
20
            tmp_rowset_keys.push_back(k);
2619
20
            return 0;
2620
20
        }
2621
        // TODO(plat1ko): check rowset not referenced
2622
0
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2623
0
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2624
0
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2625
0
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2626
0
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2627
0
                  << " num_expired=" << num_expired;
2628
2629
0
        tmp_rowset_keys.push_back(k);
2630
0
        if (rowset.num_segments() > 0) { // Skip empty rowset
2631
0
            tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
2632
0
        }
2633
0
        return 0;
2634
20
    };
2635
2636
17
    auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, &metrics_context,
2637
17
                      this]() -> int {
2638
6
        DORIS_CLOUD_DEFER {
2639
6
            tmp_rowset_keys.clear();
2640
6
            tmp_rowsets.clear();
2641
6
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2638
3
        DORIS_CLOUD_DEFER {
2639
3
            tmp_rowset_keys.clear();
2640
3
            tmp_rowsets.clear();
2641
3
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2638
3
        DORIS_CLOUD_DEFER {
2639
3
            tmp_rowset_keys.clear();
2640
3
            tmp_rowsets.clear();
2641
3
        };
2642
6
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET, metrics_context) !=
2643
6
            0) {
2644
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2645
0
            return -1;
2646
0
        }
2647
6
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2648
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2649
0
            return -1;
2650
0
        }
2651
6
        num_recycled += tmp_rowset_keys.size();
2652
6
        return 0;
2653
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
2637
3
                      this]() -> int {
2638
3
        DORIS_CLOUD_DEFER {
2639
3
            tmp_rowset_keys.clear();
2640
3
            tmp_rowsets.clear();
2641
3
        };
2642
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET, metrics_context) !=
2643
3
            0) {
2644
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2645
0
            return -1;
2646
0
        }
2647
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2648
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2649
0
            return -1;
2650
0
        }
2651
3
        num_recycled += tmp_rowset_keys.size();
2652
3
        return 0;
2653
3
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
2637
3
                      this]() -> int {
2638
3
        DORIS_CLOUD_DEFER {
2639
3
            tmp_rowset_keys.clear();
2640
3
            tmp_rowsets.clear();
2641
3
        };
2642
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET, metrics_context) !=
2643
3
            0) {
2644
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2645
0
            return -1;
2646
0
        }
2647
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2648
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2649
0
            return -1;
2650
0
        }
2651
3
        num_recycled += tmp_rowset_keys.size();
2652
3
        return 0;
2653
3
    };
2654
2655
17
    if (config::enable_recycler_stats_metrics) {
2656
0
        scan_and_statistics_tmp_rowsets();
2657
0
    }
2658
    // recycle_func and loop_done for scan and recycle
2659
17
    return scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
2660
17
                            std::move(loop_done));
2661
17
}
2662
2663
int InstanceRecycler::scan_and_recycle(
2664
        std::string begin, std::string_view end,
2665
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
2666
179
        std::function<int()> loop_done) {
2667
179
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
2668
179
    int ret = 0;
2669
179
    int64_t cnt = 0;
2670
179
    int get_range_retried = 0;
2671
179
    std::string err;
2672
179
    DORIS_CLOUD_DEFER_COPY(begin, end) {
2673
179
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2674
179
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2675
179
                  << " ret=" << ret << " err=" << err;
2676
179
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
2672
160
    DORIS_CLOUD_DEFER_COPY(begin, end) {
2673
160
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2674
160
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2675
160
                  << " ret=" << ret << " err=" << err;
2676
160
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
2672
19
    DORIS_CLOUD_DEFER_COPY(begin, end) {
2673
19
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2674
19
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2675
19
                  << " ret=" << ret << " err=" << err;
2676
19
    };
2677
2678
179
    std::unique_ptr<RangeGetIterator> it;
2679
202
    do {
2680
202
        if (get_range_retried > 1000) {
2681
0
            err = "txn_get exceeds max retry, may not scan all keys";
2682
0
            ret = -1;
2683
0
            return -1;
2684
0
        }
2685
202
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
2686
202
        if (get_ret != 0) { // txn kv may complain "Request for future version"
2687
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
2688
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
2689
0
                         << " get_range_retried=" << get_range_retried;
2690
0
            ++get_range_retried;
2691
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
2692
0
            continue; // try again
2693
0
        }
2694
202
        if (!it->has_next()) {
2695
95
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
2696
95
            break; // scan finished
2697
95
        }
2698
37.4k
        while (it->has_next()) {
2699
37.3k
            ++cnt;
2700
            // recycle corresponding resources
2701
37.3k
            auto [k, v] = it->next();
2702
37.3k
            if (!it->has_next()) {
2703
107
                begin = k;
2704
107
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
2705
107
            }
2706
            // if we want to continue scanning, the recycle_func should not return non-zero
2707
37.3k
            if (recycle_func(k, v) != 0) {
2708
22
                err = "recycle_func error";
2709
22
                ret = -1;
2710
22
            }
2711
37.3k
        }
2712
107
        begin.push_back('\x00'); // Update to next smallest key for iteration
2713
        // if we want to continue scanning, the recycle_func should not return non-zero
2714
107
        if (loop_done && loop_done() != 0) {
2715
2
            err = "loop_done error";
2716
2
            ret = -1;
2717
2
        }
2718
107
    } while (it->more() && !stopped());
2719
179
    return ret;
2720
179
}
2721
2722
20
int InstanceRecycler::abort_timeout_txn() {
2723
20
    const std::string task_name = "abort_timeout_txn";
2724
20
    int64_t num_scanned = 0;
2725
20
    int64_t num_timeout = 0;
2726
20
    int64_t num_abort = 0;
2727
20
    int64_t num_advance = 0;
2728
20
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2729
2730
20
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
2731
20
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
2732
20
    std::string begin_txn_running_key;
2733
20
    std::string end_txn_running_key;
2734
20
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
2735
20
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
2736
2737
20
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
2738
2739
20
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2740
20
    register_recycle_task(task_name, start_time);
2741
2742
20
    DORIS_CLOUD_DEFER {
2743
20
        unregister_recycle_task(task_name);
2744
20
        int64_t cost =
2745
20
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2746
20
        metrics_context.finish_report();
2747
20
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
2748
20
                .tag("instance_id", instance_id_)
2749
20
                .tag("num_scanned", num_scanned)
2750
20
                .tag("num_timeout", num_timeout)
2751
20
                .tag("num_abort", num_abort)
2752
20
                .tag("num_advance", num_advance);
2753
20
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
2742
16
    DORIS_CLOUD_DEFER {
2743
16
        unregister_recycle_task(task_name);
2744
16
        int64_t cost =
2745
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2746
16
        metrics_context.finish_report();
2747
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
2748
16
                .tag("instance_id", instance_id_)
2749
16
                .tag("num_scanned", num_scanned)
2750
16
                .tag("num_timeout", num_timeout)
2751
16
                .tag("num_abort", num_abort)
2752
16
                .tag("num_advance", num_advance);
2753
16
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
2742
4
    DORIS_CLOUD_DEFER {
2743
4
        unregister_recycle_task(task_name);
2744
4
        int64_t cost =
2745
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2746
4
        metrics_context.finish_report();
2747
4
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
2748
4
                .tag("instance_id", instance_id_)
2749
4
                .tag("num_scanned", num_scanned)
2750
4
                .tag("num_timeout", num_timeout)
2751
4
                .tag("num_abort", num_abort)
2752
4
                .tag("num_advance", num_advance);
2753
4
    };
2754
2755
20
    int64_t current_time =
2756
20
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2757
2758
20
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
2759
20
                                  &current_time, &metrics_context,
2760
20
                                  this](std::string_view k, std::string_view v) -> int {
2761
10
        ++num_scanned;
2762
2763
10
        std::unique_ptr<Transaction> txn;
2764
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2765
10
        if (err != TxnErrorCode::TXN_OK) {
2766
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2767
0
            return -1;
2768
0
        }
2769
10
        std::string_view k1 = k;
2770
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2771
10
        k1.remove_prefix(1); // Remove key space
2772
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2773
10
        if (decode_key(&k1, &out) != 0) {
2774
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2775
0
            return -1;
2776
0
        }
2777
10
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2778
10
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2779
10
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2780
        // Update txn_info
2781
10
        std::string txn_inf_key, txn_inf_val;
2782
10
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2783
10
        err = txn->get(txn_inf_key, &txn_inf_val);
2784
10
        if (err != TxnErrorCode::TXN_OK) {
2785
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2786
0
            return -1;
2787
0
        }
2788
10
        TxnInfoPB txn_info;
2789
10
        if (!txn_info.ParseFromString(txn_inf_val)) {
2790
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2791
0
            return -1;
2792
0
        }
2793
2794
10
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2795
4
            txn.reset();
2796
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2797
4
            std::shared_ptr<TxnLazyCommitTask> task =
2798
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2799
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2800
4
            if (ret.first != MetaServiceCode::OK) {
2801
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2802
0
                             << "msg=" << ret.second;
2803
0
                return -1;
2804
0
            }
2805
4
            ++num_advance;
2806
4
            return 0;
2807
6
        } else {
2808
6
            TxnRunningPB txn_running_pb;
2809
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2810
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2811
0
                return -1;
2812
0
            }
2813
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2814
4
                return 0;
2815
4
            }
2816
2
            ++num_timeout;
2817
2818
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2819
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2820
2
            txn_info.set_finish_time(current_time);
2821
2
            txn_info.set_reason("timeout");
2822
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2823
2
            txn_inf_val.clear();
2824
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2825
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2826
0
                return -1;
2827
0
            }
2828
2
            txn->put(txn_inf_key, txn_inf_val);
2829
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2830
            // Put recycle txn key
2831
2
            std::string recyc_txn_key, recyc_txn_val;
2832
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2833
2
            RecycleTxnPB recycle_txn_pb;
2834
2
            recycle_txn_pb.set_creation_time(current_time);
2835
2
            recycle_txn_pb.set_label(txn_info.label());
2836
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2837
0
                LOG_WARNING("failed to serialize txn recycle info")
2838
0
                        .tag("key", hex(k))
2839
0
                        .tag("db_id", db_id)
2840
0
                        .tag("txn_id", txn_id);
2841
0
                return -1;
2842
0
            }
2843
2
            txn->put(recyc_txn_key, recyc_txn_val);
2844
            // Remove txn running key
2845
2
            txn->remove(k);
2846
2
            err = txn->commit();
2847
2
            if (err != TxnErrorCode::TXN_OK) {
2848
0
                LOG_WARNING("failed to commit txn err={}", err)
2849
0
                        .tag("key", hex(k))
2850
0
                        .tag("db_id", db_id)
2851
0
                        .tag("txn_id", txn_id);
2852
0
                return -1;
2853
0
            }
2854
2
            metrics_context.total_recycled_num = ++num_abort;
2855
2
            metrics_context.report();
2856
2
        }
2857
2858
2
        return 0;
2859
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2760
6
                                  this](std::string_view k, std::string_view v) -> int {
2761
6
        ++num_scanned;
2762
2763
6
        std::unique_ptr<Transaction> txn;
2764
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2765
6
        if (err != TxnErrorCode::TXN_OK) {
2766
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2767
0
            return -1;
2768
0
        }
2769
6
        std::string_view k1 = k;
2770
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2771
6
        k1.remove_prefix(1); // Remove key space
2772
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2773
6
        if (decode_key(&k1, &out) != 0) {
2774
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2775
0
            return -1;
2776
0
        }
2777
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2778
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2779
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2780
        // Update txn_info
2781
6
        std::string txn_inf_key, txn_inf_val;
2782
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2783
6
        err = txn->get(txn_inf_key, &txn_inf_val);
2784
6
        if (err != TxnErrorCode::TXN_OK) {
2785
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2786
0
            return -1;
2787
0
        }
2788
6
        TxnInfoPB txn_info;
2789
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
2790
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2791
0
            return -1;
2792
0
        }
2793
2794
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2795
0
            txn.reset();
2796
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2797
0
            std::shared_ptr<TxnLazyCommitTask> task =
2798
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2799
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2800
0
            if (ret.first != MetaServiceCode::OK) {
2801
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2802
0
                             << "msg=" << ret.second;
2803
0
                return -1;
2804
0
            }
2805
0
            ++num_advance;
2806
0
            return 0;
2807
6
        } else {
2808
6
            TxnRunningPB txn_running_pb;
2809
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2810
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2811
0
                return -1;
2812
0
            }
2813
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2814
4
                return 0;
2815
4
            }
2816
2
            ++num_timeout;
2817
2818
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2819
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2820
2
            txn_info.set_finish_time(current_time);
2821
2
            txn_info.set_reason("timeout");
2822
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2823
2
            txn_inf_val.clear();
2824
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2825
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2826
0
                return -1;
2827
0
            }
2828
2
            txn->put(txn_inf_key, txn_inf_val);
2829
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2830
            // Put recycle txn key
2831
2
            std::string recyc_txn_key, recyc_txn_val;
2832
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2833
2
            RecycleTxnPB recycle_txn_pb;
2834
2
            recycle_txn_pb.set_creation_time(current_time);
2835
2
            recycle_txn_pb.set_label(txn_info.label());
2836
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2837
0
                LOG_WARNING("failed to serialize txn recycle info")
2838
0
                        .tag("key", hex(k))
2839
0
                        .tag("db_id", db_id)
2840
0
                        .tag("txn_id", txn_id);
2841
0
                return -1;
2842
0
            }
2843
2
            txn->put(recyc_txn_key, recyc_txn_val);
2844
            // Remove txn running key
2845
2
            txn->remove(k);
2846
2
            err = txn->commit();
2847
2
            if (err != TxnErrorCode::TXN_OK) {
2848
0
                LOG_WARNING("failed to commit txn err={}", err)
2849
0
                        .tag("key", hex(k))
2850
0
                        .tag("db_id", db_id)
2851
0
                        .tag("txn_id", txn_id);
2852
0
                return -1;
2853
0
            }
2854
2
            metrics_context.total_recycled_num = ++num_abort;
2855
2
            metrics_context.report();
2856
2
        }
2857
2858
2
        return 0;
2859
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2760
4
                                  this](std::string_view k, std::string_view v) -> int {
2761
4
        ++num_scanned;
2762
2763
4
        std::unique_ptr<Transaction> txn;
2764
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2765
4
        if (err != TxnErrorCode::TXN_OK) {
2766
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2767
0
            return -1;
2768
0
        }
2769
4
        std::string_view k1 = k;
2770
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2771
4
        k1.remove_prefix(1); // Remove key space
2772
4
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2773
4
        if (decode_key(&k1, &out) != 0) {
2774
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2775
0
            return -1;
2776
0
        }
2777
4
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2778
4
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2779
4
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2780
        // Update txn_info
2781
4
        std::string txn_inf_key, txn_inf_val;
2782
4
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2783
4
        err = txn->get(txn_inf_key, &txn_inf_val);
2784
4
        if (err != TxnErrorCode::TXN_OK) {
2785
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2786
0
            return -1;
2787
0
        }
2788
4
        TxnInfoPB txn_info;
2789
4
        if (!txn_info.ParseFromString(txn_inf_val)) {
2790
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2791
0
            return -1;
2792
0
        }
2793
2794
4
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2795
4
            txn.reset();
2796
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2797
4
            std::shared_ptr<TxnLazyCommitTask> task =
2798
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2799
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2800
4
            if (ret.first != MetaServiceCode::OK) {
2801
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2802
0
                             << "msg=" << ret.second;
2803
0
                return -1;
2804
0
            }
2805
4
            ++num_advance;
2806
4
            return 0;
2807
4
        } else {
2808
0
            TxnRunningPB txn_running_pb;
2809
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2810
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2811
0
                return -1;
2812
0
            }
2813
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2814
0
                return 0;
2815
0
            }
2816
0
            ++num_timeout;
2817
2818
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2819
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2820
0
            txn_info.set_finish_time(current_time);
2821
0
            txn_info.set_reason("timeout");
2822
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2823
0
            txn_inf_val.clear();
2824
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2825
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2826
0
                return -1;
2827
0
            }
2828
0
            txn->put(txn_inf_key, txn_inf_val);
2829
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2830
            // Put recycle txn key
2831
0
            std::string recyc_txn_key, recyc_txn_val;
2832
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2833
0
            RecycleTxnPB recycle_txn_pb;
2834
0
            recycle_txn_pb.set_creation_time(current_time);
2835
0
            recycle_txn_pb.set_label(txn_info.label());
2836
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2837
0
                LOG_WARNING("failed to serialize txn recycle info")
2838
0
                        .tag("key", hex(k))
2839
0
                        .tag("db_id", db_id)
2840
0
                        .tag("txn_id", txn_id);
2841
0
                return -1;
2842
0
            }
2843
0
            txn->put(recyc_txn_key, recyc_txn_val);
2844
            // Remove txn running key
2845
0
            txn->remove(k);
2846
0
            err = txn->commit();
2847
0
            if (err != TxnErrorCode::TXN_OK) {
2848
0
                LOG_WARNING("failed to commit txn err={}", err)
2849
0
                        .tag("key", hex(k))
2850
0
                        .tag("db_id", db_id)
2851
0
                        .tag("txn_id", txn_id);
2852
0
                return -1;
2853
0
            }
2854
0
            metrics_context.total_recycled_num = ++num_abort;
2855
0
            metrics_context.report();
2856
0
        }
2857
2858
0
        return 0;
2859
4
    };
2860
2861
20
    if (config::enable_recycler_stats_metrics) {
2862
0
        scan_and_statistics_abort_timeout_txn();
2863
0
    }
2864
    // recycle_func and loop_done for scan and recycle
2865
20
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
2866
20
                            std::move(handle_txn_running_kv));
2867
20
}
2868
2869
19
int InstanceRecycler::recycle_expired_txn_label() {
2870
19
    const std::string task_name = "recycle_expired_txn_label";
2871
19
    int64_t num_scanned = 0;
2872
19
    int64_t num_expired = 0;
2873
19
    int64_t num_recycled = 0;
2874
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2875
19
    int ret = 0;
2876
2877
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
2878
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
2879
19
    std::string begin_recycle_txn_key;
2880
19
    std::string end_recycle_txn_key;
2881
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
2882
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
2883
19
    std::vector<std::string> recycle_txn_info_keys;
2884
2885
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
2886
2887
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2888
19
    register_recycle_task(task_name, start_time);
2889
19
    DORIS_CLOUD_DEFER {
2890
19
        unregister_recycle_task(task_name);
2891
19
        int64_t cost =
2892
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2893
19
        metrics_context.finish_report();
2894
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
2895
19
                .tag("instance_id", instance_id_)
2896
19
                .tag("num_scanned", num_scanned)
2897
19
                .tag("num_expired", num_expired)
2898
19
                .tag("num_recycled", num_recycled);
2899
19
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
2889
16
    DORIS_CLOUD_DEFER {
2890
16
        unregister_recycle_task(task_name);
2891
16
        int64_t cost =
2892
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2893
16
        metrics_context.finish_report();
2894
16
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
2895
16
                .tag("instance_id", instance_id_)
2896
16
                .tag("num_scanned", num_scanned)
2897
16
                .tag("num_expired", num_expired)
2898
16
                .tag("num_recycled", num_recycled);
2899
16
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
2889
3
    DORIS_CLOUD_DEFER {
2890
3
        unregister_recycle_task(task_name);
2891
3
        int64_t cost =
2892
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2893
3
        metrics_context.finish_report();
2894
3
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
2895
3
                .tag("instance_id", instance_id_)
2896
3
                .tag("num_scanned", num_scanned)
2897
3
                .tag("num_expired", num_expired)
2898
3
                .tag("num_recycled", num_recycled);
2899
3
    };
2900
2901
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2902
2903
19
    SyncExecutor<int> concurrent_delete_executor(
2904
19
            _thread_pool_group.s3_producer_pool,
2905
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
2906
23.0k
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
2906
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
2906
3
            [](const int& ret) { return ret != 0; });
2907
2908
19
    int64_t current_time_ms =
2909
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2910
2911
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
2912
30.0k
        ++num_scanned;
2913
30.0k
        RecycleTxnPB recycle_txn_pb;
2914
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2915
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2916
0
            return -1;
2917
0
        }
2918
30.0k
        if ((config::force_immediate_recycle) ||
2919
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2920
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
2921
30.0k
             current_time_ms)) {
2922
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2923
23.0k
            num_expired++;
2924
23.0k
            recycle_txn_info_keys.emplace_back(k);
2925
23.0k
        }
2926
30.0k
        return 0;
2927
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2911
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
2912
30.0k
        ++num_scanned;
2913
30.0k
        RecycleTxnPB recycle_txn_pb;
2914
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2915
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2916
0
            return -1;
2917
0
        }
2918
30.0k
        if ((config::force_immediate_recycle) ||
2919
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2920
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
2921
30.0k
             current_time_ms)) {
2922
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2923
23.0k
            num_expired++;
2924
23.0k
            recycle_txn_info_keys.emplace_back(k);
2925
23.0k
        }
2926
30.0k
        return 0;
2927
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2911
3
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
2912
3
        ++num_scanned;
2913
3
        RecycleTxnPB recycle_txn_pb;
2914
3
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2915
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2916
0
            return -1;
2917
0
        }
2918
3
        if ((config::force_immediate_recycle) ||
2919
3
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2920
3
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
2921
3
             current_time_ms)) {
2922
3
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2923
3
            num_expired++;
2924
3
            recycle_txn_info_keys.emplace_back(k);
2925
3
        }
2926
3
        return 0;
2927
3
    };
2928
2929
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2930
23.0k
        std::string_view k1 = k;
2931
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2932
23.0k
        k1.remove_prefix(1); // Remove key space
2933
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2934
23.0k
        int ret = decode_key(&k1, &out);
2935
23.0k
        if (ret != 0) {
2936
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2937
0
            return -1;
2938
0
        }
2939
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2940
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2941
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2942
23.0k
        std::unique_ptr<Transaction> txn;
2943
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2944
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2945
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2946
0
            return -1;
2947
0
        }
2948
        // Remove txn index kv
2949
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
2950
23.0k
        txn->remove(index_key);
2951
        // Remove txn info kv
2952
23.0k
        std::string info_key, info_val;
2953
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2954
23.0k
        err = txn->get(info_key, &info_val);
2955
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2956
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2957
0
            return -1;
2958
0
        }
2959
23.0k
        TxnInfoPB txn_info;
2960
23.0k
        if (!txn_info.ParseFromString(info_val)) {
2961
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2962
0
            return -1;
2963
0
        }
2964
23.0k
        txn->remove(info_key);
2965
        // Remove sub txn index kvs
2966
23.0k
        std::vector<std::string> sub_txn_index_keys;
2967
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2968
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2969
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
2970
22.9k
        }
2971
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2972
22.9k
            txn->remove(sub_txn_index_key);
2973
22.9k
        }
2974
        // Update txn label
2975
23.0k
        std::string label_key, label_val;
2976
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2977
23.0k
        err = txn->get(label_key, &label_val);
2978
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2979
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2980
0
                         << " err=" << err;
2981
0
            return -1;
2982
0
        }
2983
23.0k
        TxnLabelPB txn_label;
2984
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2985
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2986
0
            return -1;
2987
0
        }
2988
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2989
23.0k
        if (it != txn_label.txn_ids().end()) {
2990
23.0k
            txn_label.mutable_txn_ids()->erase(it);
2991
23.0k
        }
2992
23.0k
        if (txn_label.txn_ids().empty()) {
2993
23.0k
            txn->remove(label_key);
2994
23.0k
        } else {
2995
0
            if (!txn_label.SerializeToString(&label_val)) {
2996
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2997
0
                return -1;
2998
0
            }
2999
0
            txn->atomic_set_ver_value(label_key, label_val);
3000
0
        }
3001
        // Remove recycle txn kv
3002
23.0k
        txn->remove(k);
3003
23.0k
        err = txn->commit();
3004
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3005
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
3006
0
            return -1;
3007
0
        }
3008
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
3009
23.0k
        metrics_context.report();
3010
3011
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
3012
23.0k
        return 0;
3013
23.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
2929
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2930
23.0k
        std::string_view k1 = k;
2931
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2932
23.0k
        k1.remove_prefix(1); // Remove key space
2933
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2934
23.0k
        int ret = decode_key(&k1, &out);
2935
23.0k
        if (ret != 0) {
2936
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2937
0
            return -1;
2938
0
        }
2939
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2940
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2941
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2942
23.0k
        std::unique_ptr<Transaction> txn;
2943
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2944
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2945
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2946
0
            return -1;
2947
0
        }
2948
        // Remove txn index kv
2949
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
2950
23.0k
        txn->remove(index_key);
2951
        // Remove txn info kv
2952
23.0k
        std::string info_key, info_val;
2953
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2954
23.0k
        err = txn->get(info_key, &info_val);
2955
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2956
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2957
0
            return -1;
2958
0
        }
2959
23.0k
        TxnInfoPB txn_info;
2960
23.0k
        if (!txn_info.ParseFromString(info_val)) {
2961
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2962
0
            return -1;
2963
0
        }
2964
23.0k
        txn->remove(info_key);
2965
        // Remove sub txn index kvs
2966
23.0k
        std::vector<std::string> sub_txn_index_keys;
2967
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2968
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2969
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
2970
22.9k
        }
2971
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2972
22.9k
            txn->remove(sub_txn_index_key);
2973
22.9k
        }
2974
        // Update txn label
2975
23.0k
        std::string label_key, label_val;
2976
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2977
23.0k
        err = txn->get(label_key, &label_val);
2978
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2979
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2980
0
                         << " err=" << err;
2981
0
            return -1;
2982
0
        }
2983
23.0k
        TxnLabelPB txn_label;
2984
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2985
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2986
0
            return -1;
2987
0
        }
2988
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2989
23.0k
        if (it != txn_label.txn_ids().end()) {
2990
23.0k
            txn_label.mutable_txn_ids()->erase(it);
2991
23.0k
        }
2992
23.0k
        if (txn_label.txn_ids().empty()) {
2993
23.0k
            txn->remove(label_key);
2994
23.0k
        } else {
2995
0
            if (!txn_label.SerializeToString(&label_val)) {
2996
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2997
0
                return -1;
2998
0
            }
2999
0
            txn->atomic_set_ver_value(label_key, label_val);
3000
0
        }
3001
        // Remove recycle txn kv
3002
23.0k
        txn->remove(k);
3003
23.0k
        err = txn->commit();
3004
23.0k
        if (err != TxnErrorCode::TXN_OK) {
3005
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
3006
0
            return -1;
3007
0
        }
3008
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
3009
23.0k
        metrics_context.report();
3010
3011
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
3012
23.0k
        return 0;
3013
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
2929
3
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2930
3
        std::string_view k1 = k;
2931
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2932
3
        k1.remove_prefix(1); // Remove key space
2933
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2934
3
        int ret = decode_key(&k1, &out);
2935
3
        if (ret != 0) {
2936
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2937
0
            return -1;
2938
0
        }
2939
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2940
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2941
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2942
3
        std::unique_ptr<Transaction> txn;
2943
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2944
3
        if (err != TxnErrorCode::TXN_OK) {
2945
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2946
0
            return -1;
2947
0
        }
2948
        // Remove txn index kv
2949
3
        auto index_key = txn_index_key({instance_id_, txn_id});
2950
3
        txn->remove(index_key);
2951
        // Remove txn info kv
2952
3
        std::string info_key, info_val;
2953
3
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2954
3
        err = txn->get(info_key, &info_val);
2955
3
        if (err != TxnErrorCode::TXN_OK) {
2956
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2957
0
            return -1;
2958
0
        }
2959
3
        TxnInfoPB txn_info;
2960
3
        if (!txn_info.ParseFromString(info_val)) {
2961
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2962
0
            return -1;
2963
0
        }
2964
3
        txn->remove(info_key);
2965
        // Remove sub txn index kvs
2966
3
        std::vector<std::string> sub_txn_index_keys;
2967
3
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2968
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2969
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
2970
0
        }
2971
3
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2972
0
            txn->remove(sub_txn_index_key);
2973
0
        }
2974
        // Update txn label
2975
3
        std::string label_key, label_val;
2976
3
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2977
3
        err = txn->get(label_key, &label_val);
2978
3
        if (err != TxnErrorCode::TXN_OK) {
2979
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2980
0
                         << " err=" << err;
2981
0
            return -1;
2982
0
        }
2983
3
        TxnLabelPB txn_label;
2984
3
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2985
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2986
0
            return -1;
2987
0
        }
2988
3
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2989
3
        if (it != txn_label.txn_ids().end()) {
2990
3
            txn_label.mutable_txn_ids()->erase(it);
2991
3
        }
2992
3
        if (txn_label.txn_ids().empty()) {
2993
3
            txn->remove(label_key);
2994
3
        } else {
2995
0
            if (!txn_label.SerializeToString(&label_val)) {
2996
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2997
0
                return -1;
2998
0
            }
2999
0
            txn->atomic_set_ver_value(label_key, label_val);
3000
0
        }
3001
        // Remove recycle txn kv
3002
3
        txn->remove(k);
3003
3
        err = txn->commit();
3004
3
        if (err != TxnErrorCode::TXN_OK) {
3005
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
3006
0
            return -1;
3007
0
        }
3008
3
        metrics_context.total_recycled_num = ++num_recycled;
3009
3
        metrics_context.report();
3010
3011
3
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
3012
3
        return 0;
3013
3
    };
3014
3015
19
    auto loop_done = [&]() -> int {
3016
10
        DORIS_CLOUD_DEFER {
3017
10
            recycle_txn_info_keys.clear();
3018
10
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3016
7
        DORIS_CLOUD_DEFER {
3017
7
            recycle_txn_info_keys.clear();
3018
7
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3016
3
        DORIS_CLOUD_DEFER {
3017
3
            recycle_txn_info_keys.clear();
3018
3
        };
3019
10
        TEST_SYNC_POINT_CALLBACK(
3020
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
3021
10
                &recycle_txn_info_keys);
3022
23.0k
        for (const auto& k : recycle_txn_info_keys) {
3023
23.0k
            concurrent_delete_executor.add([&]() {
3024
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
3025
0
                    LOG_WARNING("failed to delete recycle txn kv")
3026
0
                            .tag("instance id", instance_id_)
3027
0
                            .tag("key", hex(k));
3028
0
                    return -1;
3029
0
                }
3030
23.0k
                return 0;
3031
23.0k
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
3023
23.0k
            concurrent_delete_executor.add([&]() {
3024
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
3025
0
                    LOG_WARNING("failed to delete recycle txn kv")
3026
0
                            .tag("instance id", instance_id_)
3027
0
                            .tag("key", hex(k));
3028
0
                    return -1;
3029
0
                }
3030
23.0k
                return 0;
3031
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
3023
3
            concurrent_delete_executor.add([&]() {
3024
3
                if (delete_recycle_txn_kv(k) != 0) {
3025
0
                    LOG_WARNING("failed to delete recycle txn kv")
3026
0
                            .tag("instance id", instance_id_)
3027
0
                            .tag("key", hex(k));
3028
0
                    return -1;
3029
0
                }
3030
3
                return 0;
3031
3
            });
3032
23.0k
        }
3033
10
        bool finished = true;
3034
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3035
23.0k
        for (int r : rets) {
3036
23.0k
            if (r != 0) {
3037
0
                ret = -1;
3038
0
            }
3039
23.0k
        }
3040
3041
10
        ret = finished ? ret : -1;
3042
3043
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
3044
3045
10
        if (ret != 0) {
3046
2
            LOG_WARNING("recycle txn kv ret!=0")
3047
2
                    .tag("finished", finished)
3048
2
                    .tag("ret", ret)
3049
2
                    .tag("instance_id", instance_id_);
3050
2
            return ret;
3051
2
        }
3052
8
        return ret;
3053
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
3015
7
    auto loop_done = [&]() -> int {
3016
7
        DORIS_CLOUD_DEFER {
3017
7
            recycle_txn_info_keys.clear();
3018
7
        };
3019
7
        TEST_SYNC_POINT_CALLBACK(
3020
7
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
3021
7
                &recycle_txn_info_keys);
3022
23.0k
        for (const auto& k : recycle_txn_info_keys) {
3023
23.0k
            concurrent_delete_executor.add([&]() {
3024
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
3025
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
3026
23.0k
                            .tag("instance id", instance_id_)
3027
23.0k
                            .tag("key", hex(k));
3028
23.0k
                    return -1;
3029
23.0k
                }
3030
23.0k
                return 0;
3031
23.0k
            });
3032
23.0k
        }
3033
7
        bool finished = true;
3034
7
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3035
23.0k
        for (int r : rets) {
3036
23.0k
            if (r != 0) {
3037
0
                ret = -1;
3038
0
            }
3039
23.0k
        }
3040
3041
7
        ret = finished ? ret : -1;
3042
3043
7
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
3044
3045
7
        if (ret != 0) {
3046
2
            LOG_WARNING("recycle txn kv ret!=0")
3047
2
                    .tag("finished", finished)
3048
2
                    .tag("ret", ret)
3049
2
                    .tag("instance_id", instance_id_);
3050
2
            return ret;
3051
2
        }
3052
5
        return ret;
3053
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
3015
3
    auto loop_done = [&]() -> int {
3016
3
        DORIS_CLOUD_DEFER {
3017
3
            recycle_txn_info_keys.clear();
3018
3
        };
3019
3
        TEST_SYNC_POINT_CALLBACK(
3020
3
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
3021
3
                &recycle_txn_info_keys);
3022
3
        for (const auto& k : recycle_txn_info_keys) {
3023
3
            concurrent_delete_executor.add([&]() {
3024
3
                if (delete_recycle_txn_kv(k) != 0) {
3025
3
                    LOG_WARNING("failed to delete recycle txn kv")
3026
3
                            .tag("instance id", instance_id_)
3027
3
                            .tag("key", hex(k));
3028
3
                    return -1;
3029
3
                }
3030
3
                return 0;
3031
3
            });
3032
3
        }
3033
3
        bool finished = true;
3034
3
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3035
3
        for (int r : rets) {
3036
3
            if (r != 0) {
3037
0
                ret = -1;
3038
0
            }
3039
3
        }
3040
3041
3
        ret = finished ? ret : -1;
3042
3043
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
3044
3045
3
        if (ret != 0) {
3046
0
            LOG_WARNING("recycle txn kv ret!=0")
3047
0
                    .tag("finished", finished)
3048
0
                    .tag("ret", ret)
3049
0
                    .tag("instance_id", instance_id_);
3050
0
            return ret;
3051
0
        }
3052
3
        return ret;
3053
3
    };
3054
3055
19
    if (config::enable_recycler_stats_metrics) {
3056
0
        scan_and_statistics_expired_txn_label();
3057
0
    }
3058
    // recycle_func and loop_done for scan and recycle
3059
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
3060
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
3061
19
}
3062
3063
struct CopyJobIdTuple {
3064
    std::string instance_id;
3065
    std::string stage_id;
3066
    long table_id;
3067
    std::string copy_id;
3068
    std::string stage_path;
3069
};
3070
struct BatchObjStoreAccessor {
3071
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
3072
                          TxnKv* txn_kv)
3073
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
3074
3
    ~BatchObjStoreAccessor() {
3075
3
        if (!paths_.empty()) {
3076
3
            consume();
3077
3
        }
3078
3
    }
3079
3080
    /**
3081
    * To implicitely do batch work and submit the batch delete task to s3
3082
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
3083
    *
3084
    * @param copy_job The protubuf struct consists of the copy job files.
3085
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
3086
    *            it would last until we finish the delete task, here we need pass one string value
3087
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
3088
    */
3089
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
3090
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
3091
5
        auto& file_keys = copy_file_keys_[key];
3092
5
        file_keys.log_trace =
3093
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
3094
5
                            instance_id, stage_id, table_id, copy_id, path);
3095
5
        std::string_view log_trace = file_keys.log_trace;
3096
2.03k
        for (const auto& file : copy_job.object_files()) {
3097
2.03k
            auto relative_path = file.relative_path();
3098
2.03k
            paths_.push_back(relative_path);
3099
2.03k
            file_keys.keys.push_back(copy_file_key(
3100
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
3101
2.03k
            LOG_INFO(log_trace)
3102
2.03k
                    .tag("relative_path", relative_path)
3103
2.03k
                    .tag("batch_count", batch_count_);
3104
2.03k
        }
3105
5
        LOG_INFO(log_trace)
3106
5
                .tag("objects_num", copy_job.object_files().size())
3107
5
                .tag("batch_count", batch_count_);
3108
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
3109
        // recommend using delete objects when objects num is less than 10)
3110
5
        if (paths_.size() < 1000) {
3111
3
            return;
3112
3
        }
3113
2
        consume();
3114
2
    }
3115
3116
private:
3117
5
    void consume() {
3118
5
        DORIS_CLOUD_DEFER {
3119
5
            paths_.clear();
3120
5
            copy_file_keys_.clear();
3121
5
            batch_count_++;
3122
3123
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
3124
5
                        batch_count_);
3125
5
        };
3126
3127
5
        StopWatch sw;
3128
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
3129
5
        if (0 != accessor_->delete_files(paths_)) {
3130
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
3131
2
                        paths_.size(), batch_count_, sw.elapsed_us());
3132
2
            return;
3133
2
        }
3134
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
3135
3
                    paths_.size(), batch_count_, sw.elapsed_us());
3136
        // delete fdb's keys
3137
3
        for (auto& file_keys : copy_file_keys_) {
3138
3
            auto& [log_trace, keys] = file_keys.second;
3139
3
            std::unique_ptr<Transaction> txn;
3140
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
3141
0
                LOG(WARNING) << "failed to create txn";
3142
0
                continue;
3143
0
            }
3144
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
3145
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
3146
            // limited, should not cause the txn commit failed.
3147
1.02k
            for (const auto& key : keys) {
3148
1.02k
                txn->remove(key);
3149
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
3150
1.02k
            }
3151
3
            txn->remove(file_keys.first);
3152
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
3153
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
3154
0
                continue;
3155
0
            }
3156
3
        }
3157
3
    }
3158
    std::shared_ptr<StorageVaultAccessor> accessor_;
3159
    // the path of the s3 files to be deleted
3160
    std::vector<std::string> paths_;
3161
    struct CopyFiles {
3162
        std::string log_trace;
3163
        std::vector<std::string> keys;
3164
    };
3165
    // pair<std::string, std::vector<std::string>>
3166
    // first: instance_id_ stage_id table_id query_id
3167
    // second: keys to be deleted
3168
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
3169
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
3170
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
3171
    // which can together uniquely identifies different tasks for tracing log
3172
    uint64_t& batch_count_;
3173
    TxnKv* txn_kv_;
3174
};
3175
3176
13
int InstanceRecycler::recycle_copy_jobs() {
3177
13
    int64_t num_scanned = 0;
3178
13
    int64_t num_finished = 0;
3179
13
    int64_t num_expired = 0;
3180
13
    int64_t num_recycled = 0;
3181
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
3182
13
    uint64_t batch_count = 0;
3183
13
    const std::string task_name = "recycle_copy_jobs";
3184
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3185
3186
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
3187
3188
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3189
13
    register_recycle_task(task_name, start_time);
3190
3191
13
    DORIS_CLOUD_DEFER {
3192
13
        unregister_recycle_task(task_name);
3193
13
        int64_t cost =
3194
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3195
13
        metrics_context.finish_report();
3196
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
3197
13
                .tag("instance_id", instance_id_)
3198
13
                .tag("num_scanned", num_scanned)
3199
13
                .tag("num_finished", num_finished)
3200
13
                .tag("num_expired", num_expired)
3201
13
                .tag("num_recycled", num_recycled);
3202
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
3191
13
    DORIS_CLOUD_DEFER {
3192
13
        unregister_recycle_task(task_name);
3193
13
        int64_t cost =
3194
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3195
13
        metrics_context.finish_report();
3196
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
3197
13
                .tag("instance_id", instance_id_)
3198
13
                .tag("num_scanned", num_scanned)
3199
13
                .tag("num_finished", num_finished)
3200
13
                .tag("num_expired", num_expired)
3201
13
                .tag("num_recycled", num_recycled);
3202
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
3203
3204
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
3205
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
3206
13
    std::string key0;
3207
13
    std::string key1;
3208
13
    copy_job_key(key_info0, &key0);
3209
13
    copy_job_key(key_info1, &key1);
3210
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
3211
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
3212
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
3213
16
                         this](std::string_view k, std::string_view v) -> int {
3214
16
        ++num_scanned;
3215
16
        CopyJobPB copy_job;
3216
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
3217
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
3218
0
            return -1;
3219
0
        }
3220
3221
        // decode copy job key
3222
16
        auto k1 = k;
3223
16
        k1.remove_prefix(1);
3224
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3225
16
        decode_key(&k1, &out);
3226
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
3227
        // -> CopyJobPB
3228
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
3229
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
3230
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
3231
3232
16
        bool check_storage = true;
3233
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
3234
12
            ++num_finished;
3235
3236
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
3237
7
                auto it = stage_accessor_map.find(stage_id);
3238
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
3239
7
                std::string_view path;
3240
7
                if (it != stage_accessor_map.end()) {
3241
2
                    accessor = it->second;
3242
5
                } else {
3243
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
3244
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
3245
5
                                                      &inner_accessor);
3246
5
                    if (ret < 0) { // error
3247
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
3248
0
                        return -1;
3249
5
                    } else if (ret == 0) {
3250
3
                        path = inner_accessor->uri();
3251
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
3252
3
                                inner_accessor, batch_count, txn_kv_.get());
3253
3
                        stage_accessor_map.emplace(stage_id, accessor);
3254
3
                    } else { // stage not found, skip check storage
3255
2
                        check_storage = false;
3256
2
                    }
3257
5
                }
3258
7
                if (check_storage) {
3259
                    // TODO delete objects with key and etag is not supported
3260
5
                    accessor->add(std::move(copy_job), std::string(k),
3261
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
3262
5
                    return 0;
3263
5
                }
3264
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
3265
5
                int64_t current_time =
3266
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3267
5
                if (copy_job.finish_time_ms() > 0) {
3268
2
                    if (!config::force_immediate_recycle &&
3269
2
                        current_time < copy_job.finish_time_ms() +
3270
2
                                               config::copy_job_max_retention_second * 1000) {
3271
1
                        return 0;
3272
1
                    }
3273
3
                } else {
3274
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
3275
3
                    if (!config::force_immediate_recycle &&
3276
3
                        current_time < copy_job.start_time_ms() +
3277
3
                                               config::copy_job_max_retention_second * 1000) {
3278
1
                        return 0;
3279
1
                    }
3280
3
                }
3281
5
            }
3282
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
3283
4
            int64_t current_time =
3284
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3285
            // if copy job is timeout: delete all copy file kvs and copy job kv
3286
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
3287
2
                return 0;
3288
2
            }
3289
2
            ++num_expired;
3290
2
        }
3291
3292
        // delete all copy files
3293
7
        std::vector<std::string> copy_file_keys;
3294
70
        for (auto& file : copy_job.object_files()) {
3295
70
            copy_file_keys.push_back(copy_file_key(
3296
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
3297
70
        }
3298
7
        std::unique_ptr<Transaction> txn;
3299
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3300
0
            LOG(WARNING) << "failed to create txn";
3301
0
            return -1;
3302
0
        }
3303
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
3304
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
3305
        // limited, should not cause the txn commit failed.
3306
70
        for (const auto& key : copy_file_keys) {
3307
70
            txn->remove(key);
3308
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
3309
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
3310
70
                      << ", query_id=" << copy_id;
3311
70
        }
3312
7
        txn->remove(k);
3313
7
        TxnErrorCode err = txn->commit();
3314
7
        if (err != TxnErrorCode::TXN_OK) {
3315
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
3316
0
            return -1;
3317
0
        }
3318
3319
7
        metrics_context.total_recycled_num = ++num_recycled;
3320
7
        metrics_context.report();
3321
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3322
7
        return 0;
3323
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3213
16
                         this](std::string_view k, std::string_view v) -> int {
3214
16
        ++num_scanned;
3215
16
        CopyJobPB copy_job;
3216
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
3217
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
3218
0
            return -1;
3219
0
        }
3220
3221
        // decode copy job key
3222
16
        auto k1 = k;
3223
16
        k1.remove_prefix(1);
3224
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3225
16
        decode_key(&k1, &out);
3226
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
3227
        // -> CopyJobPB
3228
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
3229
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
3230
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
3231
3232
16
        bool check_storage = true;
3233
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
3234
12
            ++num_finished;
3235
3236
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
3237
7
                auto it = stage_accessor_map.find(stage_id);
3238
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
3239
7
                std::string_view path;
3240
7
                if (it != stage_accessor_map.end()) {
3241
2
                    accessor = it->second;
3242
5
                } else {
3243
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
3244
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
3245
5
                                                      &inner_accessor);
3246
5
                    if (ret < 0) { // error
3247
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
3248
0
                        return -1;
3249
5
                    } else if (ret == 0) {
3250
3
                        path = inner_accessor->uri();
3251
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
3252
3
                                inner_accessor, batch_count, txn_kv_.get());
3253
3
                        stage_accessor_map.emplace(stage_id, accessor);
3254
3
                    } else { // stage not found, skip check storage
3255
2
                        check_storage = false;
3256
2
                    }
3257
5
                }
3258
7
                if (check_storage) {
3259
                    // TODO delete objects with key and etag is not supported
3260
5
                    accessor->add(std::move(copy_job), std::string(k),
3261
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
3262
5
                    return 0;
3263
5
                }
3264
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
3265
5
                int64_t current_time =
3266
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3267
5
                if (copy_job.finish_time_ms() > 0) {
3268
2
                    if (!config::force_immediate_recycle &&
3269
2
                        current_time < copy_job.finish_time_ms() +
3270
2
                                               config::copy_job_max_retention_second * 1000) {
3271
1
                        return 0;
3272
1
                    }
3273
3
                } else {
3274
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
3275
3
                    if (!config::force_immediate_recycle &&
3276
3
                        current_time < copy_job.start_time_ms() +
3277
3
                                               config::copy_job_max_retention_second * 1000) {
3278
1
                        return 0;
3279
1
                    }
3280
3
                }
3281
5
            }
3282
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
3283
4
            int64_t current_time =
3284
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3285
            // if copy job is timeout: delete all copy file kvs and copy job kv
3286
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
3287
2
                return 0;
3288
2
            }
3289
2
            ++num_expired;
3290
2
        }
3291
3292
        // delete all copy files
3293
7
        std::vector<std::string> copy_file_keys;
3294
70
        for (auto& file : copy_job.object_files()) {
3295
70
            copy_file_keys.push_back(copy_file_key(
3296
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
3297
70
        }
3298
7
        std::unique_ptr<Transaction> txn;
3299
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3300
0
            LOG(WARNING) << "failed to create txn";
3301
0
            return -1;
3302
0
        }
3303
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
3304
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
3305
        // limited, should not cause the txn commit failed.
3306
70
        for (const auto& key : copy_file_keys) {
3307
70
            txn->remove(key);
3308
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
3309
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
3310
70
                      << ", query_id=" << copy_id;
3311
70
        }
3312
7
        txn->remove(k);
3313
7
        TxnErrorCode err = txn->commit();
3314
7
        if (err != TxnErrorCode::TXN_OK) {
3315
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
3316
0
            return -1;
3317
0
        }
3318
3319
7
        metrics_context.total_recycled_num = ++num_recycled;
3320
7
        metrics_context.report();
3321
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3322
7
        return 0;
3323
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3324
3325
13
    if (config::enable_recycler_stats_metrics) {
3326
0
        scan_and_statistics_copy_jobs();
3327
0
    }
3328
    // recycle_func and loop_done for scan and recycle
3329
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
3330
13
}
3331
3332
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
3333
                                             const StagePB::StageType& stage_type,
3334
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
3335
5
#ifdef UNIT_TEST
3336
    // In unit test, external use the same accessor as the internal stage
3337
5
    auto it = accessor_map_.find(stage_id);
3338
5
    if (it != accessor_map_.end()) {
3339
3
        *accessor = it->second;
3340
3
    } else {
3341
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
3342
2
        return 1;
3343
2
    }
3344
#else
3345
    // init s3 accessor and add to accessor map
3346
    auto stage_it =
3347
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
3348
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
3349
3350
    if (stage_it == instance_info_.stages().end()) {
3351
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
3352
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
3353
        return 1;
3354
    }
3355
3356
    const auto& object_store_info = stage_it->obj_info();
3357
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
3358
3359
    S3Conf s3_conf;
3360
    if (stage_type == StagePB::EXTERNAL) {
3361
        if (stage_access_type == StagePB::AKSK) {
3362
            auto conf = S3Conf::from_obj_store_info(object_store_info);
3363
            if (!conf) {
3364
                return -1;
3365
            }
3366
3367
            s3_conf = std::move(*conf);
3368
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
3369
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
3370
            if (!conf) {
3371
                return -1;
3372
            }
3373
3374
            s3_conf = std::move(*conf);
3375
            if (instance_info_.ram_user().has_encryption_info()) {
3376
                AkSkPair plain_ak_sk_pair;
3377
                int ret = decrypt_ak_sk_helper(
3378
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
3379
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
3380
                if (ret != 0) {
3381
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
3382
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
3383
                    return -1;
3384
                }
3385
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
3386
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
3387
            } else {
3388
                s3_conf.ak = instance_info_.ram_user().ak();
3389
                s3_conf.sk = instance_info_.ram_user().sk();
3390
            }
3391
        } else {
3392
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
3393
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
3394
            return -1;
3395
        }
3396
    } else if (stage_type == StagePB::INTERNAL) {
3397
        int idx = stoi(object_store_info.id());
3398
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3399
            LOG(WARNING) << "invalid idx: " << idx;
3400
            return -1;
3401
        }
3402
3403
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
3404
        auto conf = S3Conf::from_obj_store_info(old_obj);
3405
        if (!conf) {
3406
            return -1;
3407
        }
3408
3409
        s3_conf = std::move(*conf);
3410
        s3_conf.prefix = object_store_info.prefix();
3411
    } else {
3412
        LOG(WARNING) << "unknown stage type " << stage_type;
3413
        return -1;
3414
    }
3415
3416
    std::shared_ptr<S3Accessor> s3_accessor;
3417
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
3418
    if (ret != 0) {
3419
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
3420
        return -1;
3421
    }
3422
3423
    *accessor = std::move(s3_accessor);
3424
#endif
3425
3
    return 0;
3426
5
}
3427
3428
11
int InstanceRecycler::recycle_stage() {
3429
11
    int64_t num_scanned = 0;
3430
11
    int64_t num_recycled = 0;
3431
11
    const std::string task_name = "recycle_stage";
3432
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3433
3434
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
3435
3436
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3437
11
    register_recycle_task(task_name, start_time);
3438
3439
11
    DORIS_CLOUD_DEFER {
3440
11
        unregister_recycle_task(task_name);
3441
11
        int64_t cost =
3442
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3443
11
        metrics_context.finish_report();
3444
11
        LOG_WARNING("recycle stage, cost={}s", cost)
3445
11
                .tag("instance_id", instance_id_)
3446
11
                .tag("num_scanned", num_scanned)
3447
11
                .tag("num_recycled", num_recycled);
3448
11
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
3439
11
    DORIS_CLOUD_DEFER {
3440
11
        unregister_recycle_task(task_name);
3441
11
        int64_t cost =
3442
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3443
11
        metrics_context.finish_report();
3444
11
        LOG_WARNING("recycle stage, cost={}s", cost)
3445
11
                .tag("instance_id", instance_id_)
3446
11
                .tag("num_scanned", num_scanned)
3447
11
                .tag("num_recycled", num_recycled);
3448
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
3449
3450
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
3451
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
3452
11
    std::string key0 = recycle_stage_key(key_info0);
3453
11
    std::string key1 = recycle_stage_key(key_info1);
3454
3455
11
    std::vector<std::string_view> stage_keys;
3456
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
3457
11
                         this](std::string_view k, std::string_view v) -> int {
3458
1
        ++num_scanned;
3459
1
        RecycleStagePB recycle_stage;
3460
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
3461
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
3462
0
            return -1;
3463
0
        }
3464
3465
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
3466
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3467
0
            LOG(WARNING) << "invalid idx: " << idx;
3468
0
            return -1;
3469
0
        }
3470
3471
1
        std::shared_ptr<StorageVaultAccessor> accessor;
3472
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
3473
1
                [&] {
3474
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
3475
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3476
1
                    if (!s3_conf) {
3477
1
                        return -1;
3478
1
                    }
3479
3480
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
3481
1
                    std::shared_ptr<S3Accessor> s3_accessor;
3482
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
3483
1
                    if (ret != 0) {
3484
1
                        return -1;
3485
1
                    }
3486
3487
1
                    accessor = std::move(s3_accessor);
3488
1
                    return 0;
3489
1
                }(),
3490
1
                "recycle_stage:get_accessor", &accessor);
3491
3492
1
        if (ret != 0) {
3493
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
3494
0
            return ret;
3495
0
        }
3496
3497
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
3498
1
                .tag("instance_id", instance_id_)
3499
1
                .tag("stage_id", recycle_stage.stage().stage_id())
3500
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
3501
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
3502
1
                .tag("obj_info_id", idx)
3503
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
3504
1
        ret = accessor->delete_all();
3505
1
        if (ret != 0) {
3506
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
3507
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
3508
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
3509
0
                         << ", ret=" << ret;
3510
0
            return -1;
3511
0
        }
3512
1
        metrics_context.total_recycled_num = ++num_recycled;
3513
1
        metrics_context.report();
3514
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
3515
1
        stage_keys.push_back(k);
3516
1
        return 0;
3517
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3457
1
                         this](std::string_view k, std::string_view v) -> int {
3458
1
        ++num_scanned;
3459
1
        RecycleStagePB recycle_stage;
3460
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
3461
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
3462
0
            return -1;
3463
0
        }
3464
3465
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
3466
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3467
0
            LOG(WARNING) << "invalid idx: " << idx;
3468
0
            return -1;
3469
0
        }
3470
3471
1
        std::shared_ptr<StorageVaultAccessor> accessor;
3472
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
3473
1
                [&] {
3474
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
3475
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3476
1
                    if (!s3_conf) {
3477
1
                        return -1;
3478
1
                    }
3479
3480
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
3481
1
                    std::shared_ptr<S3Accessor> s3_accessor;
3482
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
3483
1
                    if (ret != 0) {
3484
1
                        return -1;
3485
1
                    }
3486
3487
1
                    accessor = std::move(s3_accessor);
3488
1
                    return 0;
3489
1
                }(),
3490
1
                "recycle_stage:get_accessor", &accessor);
3491
3492
1
        if (ret != 0) {
3493
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
3494
0
            return ret;
3495
0
        }
3496
3497
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
3498
1
                .tag("instance_id", instance_id_)
3499
1
                .tag("stage_id", recycle_stage.stage().stage_id())
3500
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
3501
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
3502
1
                .tag("obj_info_id", idx)
3503
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
3504
1
        ret = accessor->delete_all();
3505
1
        if (ret != 0) {
3506
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
3507
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
3508
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
3509
0
                         << ", ret=" << ret;
3510
0
            return -1;
3511
0
        }
3512
1
        metrics_context.total_recycled_num = ++num_recycled;
3513
1
        metrics_context.report();
3514
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
3515
1
        stage_keys.push_back(k);
3516
1
        return 0;
3517
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
3518
3519
11
    auto loop_done = [&stage_keys, this]() -> int {
3520
1
        if (stage_keys.empty()) return 0;
3521
1
        DORIS_CLOUD_DEFER {
3522
1
            stage_keys.clear();
3523
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3521
1
        DORIS_CLOUD_DEFER {
3522
1
            stage_keys.clear();
3523
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
3524
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
3525
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
3526
0
            return -1;
3527
0
        }
3528
1
        return 0;
3529
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
3519
1
    auto loop_done = [&stage_keys, this]() -> int {
3520
1
        if (stage_keys.empty()) return 0;
3521
1
        DORIS_CLOUD_DEFER {
3522
1
            stage_keys.clear();
3523
1
        };
3524
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
3525
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
3526
0
            return -1;
3527
0
        }
3528
1
        return 0;
3529
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
3530
11
    if (config::enable_recycler_stats_metrics) {
3531
0
        scan_and_statistics_stage();
3532
0
    }
3533
    // recycle_func and loop_done for scan and recycle
3534
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
3535
11
}
3536
3537
10
int InstanceRecycler::recycle_expired_stage_objects() {
3538
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
3539
3540
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3541
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
3542
3543
10
    DORIS_CLOUD_DEFER {
3544
10
        int64_t cost =
3545
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3546
10
        metrics_context.finish_report();
3547
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
3548
10
                .tag("instance_id", instance_id_);
3549
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
3543
10
    DORIS_CLOUD_DEFER {
3544
10
        int64_t cost =
3545
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3546
10
        metrics_context.finish_report();
3547
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
3548
10
                .tag("instance_id", instance_id_);
3549
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
3550
3551
10
    int ret = 0;
3552
3553
10
    if (config::enable_recycler_stats_metrics) {
3554
0
        scan_and_statistics_expired_stage_objects();
3555
0
    }
3556
3557
10
    for (const auto& stage : instance_info_.stages()) {
3558
0
        std::stringstream ss;
3559
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
3560
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
3561
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
3562
0
           << ", prefix=" << stage.obj_info().prefix();
3563
3564
0
        if (stopped()) {
3565
0
            break;
3566
0
        }
3567
0
        if (stage.type() == StagePB::EXTERNAL) {
3568
0
            continue;
3569
0
        }
3570
0
        int idx = stoi(stage.obj_info().id());
3571
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3572
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
3573
0
            continue;
3574
0
        }
3575
3576
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
3577
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3578
0
        if (!s3_conf) {
3579
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
3580
0
            continue;
3581
0
        }
3582
3583
0
        s3_conf->prefix = stage.obj_info().prefix();
3584
0
        std::shared_ptr<S3Accessor> accessor;
3585
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
3586
0
        if (ret1 != 0) {
3587
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
3588
0
            ret = -1;
3589
0
            continue;
3590
0
        }
3591
3592
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
3593
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
3594
0
            ret = -1;
3595
0
            continue;
3596
0
        }
3597
3598
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
3599
0
        int64_t expiration_time =
3600
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
3601
0
                config::internal_stage_objects_expire_time_second;
3602
0
        if (config::force_immediate_recycle) {
3603
0
            expiration_time = INT64_MAX;
3604
0
        }
3605
0
        ret1 = accessor->delete_all(expiration_time);
3606
0
        if (ret1 != 0) {
3607
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
3608
0
                         << ss.str();
3609
0
            ret = -1;
3610
0
            continue;
3611
0
        }
3612
0
        metrics_context.total_recycled_num++;
3613
0
        metrics_context.report();
3614
0
    }
3615
10
    return ret;
3616
10
}
3617
3618
121
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
3619
121
    std::lock_guard lock(recycle_tasks_mutex);
3620
121
    running_recycle_tasks[task_name] = start_time;
3621
121
}
3622
3623
121
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
3624
121
    std::lock_guard lock(recycle_tasks_mutex);
3625
121
    DCHECK(running_recycle_tasks[task_name] > 0);
3626
121
    running_recycle_tasks.erase(task_name);
3627
121
}
3628
3629
21
bool InstanceRecycler::check_recycle_tasks() {
3630
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
3631
21
    {
3632
21
        std::lock_guard lock(recycle_tasks_mutex);
3633
21
        tmp_running_recycle_tasks = running_recycle_tasks;
3634
21
    }
3635
3636
21
    bool found = false;
3637
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3638
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
3639
20
        int64_t cost = now - start_time;
3640
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
3641
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
3642
20
                    .tag("instance_id", instance_id_)
3643
20
                    .tag("task", task_name);
3644
20
            found = true;
3645
20
        }
3646
20
    }
3647
3648
21
    return found;
3649
21
}
3650
3651
// Scan and statistics indexes that need to be recycled
3652
0
int InstanceRecycler::scan_and_statistics_indexes() {
3653
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
3654
3655
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
3656
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
3657
0
    std::string index_key0;
3658
0
    std::string index_key1;
3659
0
    recycle_index_key(index_key_info0, &index_key0);
3660
0
    recycle_index_key(index_key_info1, &index_key1);
3661
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3662
3663
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
3664
0
        RecycleIndexPB index_pb;
3665
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
3666
0
            return 0;
3667
0
        }
3668
0
        int64_t current_time = ::time(nullptr);
3669
0
        if (current_time <
3670
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
3671
0
            return 0;
3672
0
        }
3673
        // decode index_id
3674
0
        auto k1 = k;
3675
0
        k1.remove_prefix(1);
3676
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3677
0
        decode_key(&k1, &out);
3678
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
3679
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
3680
0
        std::unique_ptr<Transaction> txn;
3681
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3682
0
        if (err != TxnErrorCode::TXN_OK) {
3683
0
            return 0;
3684
0
        }
3685
0
        std::string val;
3686
0
        err = txn->get(k, &val);
3687
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3688
0
            return 0;
3689
0
        }
3690
0
        if (err != TxnErrorCode::TXN_OK) {
3691
0
            return 0;
3692
0
        }
3693
0
        index_pb.Clear();
3694
0
        if (!index_pb.ParseFromString(val)) {
3695
0
            return 0;
3696
0
        }
3697
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
3698
0
            return 0;
3699
0
        }
3700
0
        metrics_context.total_need_recycle_num++;
3701
0
        return 0;
3702
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
3703
3704
0
    return scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv),
3705
0
                            [&metrics_context]() -> int {
3706
0
                                metrics_context.report(true);
3707
0
                                segment_metrics_context_.report(true);
3708
0
                                tablet_metrics_context_.report(true);
3709
0
                                return 0;
3710
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_1clEv
3711
0
}
3712
3713
// Scan and statistics partitions that need to be recycled
3714
0
int InstanceRecycler::scan_and_statistics_partitions() {
3715
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
3716
3717
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
3718
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
3719
0
    std::string part_key0;
3720
0
    std::string part_key1;
3721
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3722
3723
0
    recycle_partition_key(part_key_info0, &part_key0);
3724
0
    recycle_partition_key(part_key_info1, &part_key1);
3725
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
3726
0
        RecyclePartitionPB part_pb;
3727
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
3728
0
            return 0;
3729
0
        }
3730
0
        int64_t current_time = ::time(nullptr);
3731
0
        if (current_time <
3732
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
3733
0
            return 0;
3734
0
        }
3735
        // decode partition_id
3736
0
        auto k1 = k;
3737
0
        k1.remove_prefix(1);
3738
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3739
0
        decode_key(&k1, &out);
3740
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
3741
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
3742
        // Change state to RECYCLING
3743
0
        std::unique_ptr<Transaction> txn;
3744
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3745
0
        if (err != TxnErrorCode::TXN_OK) {
3746
0
            return 0;
3747
0
        }
3748
0
        std::string val;
3749
0
        err = txn->get(k, &val);
3750
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3751
0
            return 0;
3752
0
        }
3753
0
        if (err != TxnErrorCode::TXN_OK) {
3754
0
            return 0;
3755
0
        }
3756
0
        part_pb.Clear();
3757
0
        if (!part_pb.ParseFromString(val)) {
3758
0
            return 0;
3759
0
        }
3760
        // Partitions with PREPARED state MUST have no data
3761
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
3762
0
        int ret = 0;
3763
0
        for (int64_t index_id : part_pb.index_id()) {
3764
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
3765
0
                                            partition_id, is_empty_tablet) != 0) {
3766
0
                ret = 0;
3767
0
            }
3768
0
        }
3769
0
        metrics_context.total_need_recycle_num++;
3770
0
        return ret;
3771
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
3772
0
    return scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv),
3773
0
                            [&metrics_context]() -> int {
3774
0
                                metrics_context.report(true);
3775
0
                                segment_metrics_context_.report(true);
3776
0
                                tablet_metrics_context_.report(true);
3777
0
                                return 0;
3778
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_1clEv
3779
0
}
3780
3781
// Scan and statistics rowsets that need to be recycled
3782
0
int InstanceRecycler::scan_and_statistics_rowsets() {
3783
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
3784
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
3785
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
3786
0
    std::string recyc_rs_key0;
3787
0
    std::string recyc_rs_key1;
3788
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
3789
0
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
3790
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3791
3792
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
3793
0
        RecycleRowsetPB rowset;
3794
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3795
0
            return 0;
3796
0
        }
3797
0
        int64_t current_time = ::time(nullptr);
3798
0
        if (current_time <
3799
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
3800
0
            return 0;
3801
0
        }
3802
0
        if (!rowset.has_type()) {
3803
0
            if (!rowset.has_resource_id()) [[unlikely]] {
3804
0
                return 0;
3805
0
            }
3806
0
            if (rowset.resource_id().empty()) [[unlikely]] {
3807
0
                return 0;
3808
0
            }
3809
0
            return 0;
3810
0
        }
3811
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
3812
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
3813
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
3814
0
                return 0;
3815
0
            }
3816
0
        }
3817
0
        if (rowset.type() != RecycleRowsetPB::PREPARE) {
3818
0
            if (rowset_meta->num_segments() > 0) {
3819
0
                metrics_context.total_need_recycle_num++;
3820
0
                segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
3821
0
                segment_metrics_context_.total_need_recycle_data_size +=
3822
0
                        rowset_meta->total_disk_size();
3823
0
                metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
3824
0
            }
3825
0
        }
3826
0
        return 0;
3827
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
3828
0
    return scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
3829
0
                            [&metrics_context]() -> int {
3830
0
                                metrics_context.report(true);
3831
0
                                segment_metrics_context_.report(true);
3832
0
                                return 0;
3833
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_1clEv
3834
0
}
3835
3836
// Scan and statistics tmp_rowsets that need to be recycled
3837
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
3838
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
3839
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
3840
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
3841
0
    std::string tmp_rs_key0;
3842
0
    std::string tmp_rs_key1;
3843
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
3844
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
3845
3846
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3847
3848
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
3849
0
        doris::RowsetMetaCloudPB rowset;
3850
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3851
0
            return 0;
3852
0
        }
3853
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3854
0
        int64_t current_time = ::time(nullptr);
3855
0
        if (current_time < expiration) {
3856
0
            return 0;
3857
0
        }
3858
3859
0
        DCHECK_GT(rowset.txn_id(), 0)
3860
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
3861
0
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
3862
0
            return 0;
3863
0
        }
3864
3865
0
        if (!rowset.has_resource_id()) {
3866
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
3867
0
                return 0;
3868
0
            }
3869
0
            metrics_context.total_need_recycle_num++;
3870
0
            return 0;
3871
0
        }
3872
3873
0
        metrics_context.total_need_recycle_num++;
3874
0
        if (rowset.num_segments() > 0) {
3875
0
            metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
3876
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
3877
0
            segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
3878
0
        }
3879
0
        return 0;
3880
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
3881
0
    return scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv),
3882
0
                            [&metrics_context]() -> int {
3883
0
                                metrics_context.report(true);
3884
0
                                segment_metrics_context_.report(true);
3885
0
                                return 0;
3886
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_1clEv
3887
0
}
3888
3889
// Scan and statistics abort_timeout_txn that need to be recycled
3890
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
3891
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
3892
3893
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
3894
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
3895
0
    std::string begin_txn_running_key;
3896
0
    std::string end_txn_running_key;
3897
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
3898
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
3899
3900
0
    int64_t current_time =
3901
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3902
3903
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
3904
0
                                               std::string_view k, std::string_view v) -> int {
3905
0
        std::unique_ptr<Transaction> txn;
3906
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3907
0
        if (err != TxnErrorCode::TXN_OK) {
3908
0
            return 0;
3909
0
        }
3910
0
        std::string_view k1 = k;
3911
0
        k1.remove_prefix(1);
3912
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3913
0
        if (decode_key(&k1, &out) != 0) {
3914
0
            return 0;
3915
0
        }
3916
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3917
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3918
        // Update txn_info
3919
0
        std::string txn_inf_key, txn_inf_val;
3920
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
3921
0
        err = txn->get(txn_inf_key, &txn_inf_val);
3922
0
        if (err != TxnErrorCode::TXN_OK) {
3923
0
            return 0;
3924
0
        }
3925
0
        TxnInfoPB txn_info;
3926
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
3927
0
            return 0;
3928
0
        }
3929
3930
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
3931
0
            TxnRunningPB txn_running_pb;
3932
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
3933
0
                return 0;
3934
0
            }
3935
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
3936
0
                return 0;
3937
0
            }
3938
0
            metrics_context.total_need_recycle_num++;
3939
0
        }
3940
0
        return 0;
3941
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
3942
0
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
3943
0
                            std::move(handle_abort_timeout_txn_kv), [&metrics_context]() -> int {
3944
0
                                metrics_context.report(true);
3945
0
                                return 0;
3946
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_1clEv
3947
0
}
3948
3949
// Scan and statistics expired_txn_label that need to be recycled
3950
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
3951
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
3952
3953
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
3954
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
3955
0
    std::string begin_recycle_txn_key;
3956
0
    std::string end_recycle_txn_key;
3957
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
3958
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
3959
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3960
0
    int64_t current_time_ms =
3961
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3962
3963
    // for calculate the total num or bytes of recyled objects
3964
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
3965
0
        RecycleTxnPB recycle_txn_pb;
3966
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
3967
0
            return 0;
3968
0
        }
3969
0
        if ((config::force_immediate_recycle) ||
3970
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
3971
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
3972
0
             current_time_ms)) {
3973
0
            metrics_context.total_need_recycle_num++;
3974
0
        }
3975
0
        return 0;
3976
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
3977
0
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
3978
0
                            std::move(handle_expired_txn_label_kv), [&metrics_context]() -> int {
3979
0
                                metrics_context.report(true);
3980
0
                                return 0;
3981
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_1clEv
3982
0
}
3983
3984
// Scan and statistics copy_jobs that need to be recycled
3985
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
3986
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
3987
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
3988
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
3989
0
    std::string key0;
3990
0
    std::string key1;
3991
0
    copy_job_key(key_info0, &key0);
3992
0
    copy_job_key(key_info1, &key1);
3993
3994
    // for calculate the total num or bytes of recyled objects
3995
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
3996
0
        CopyJobPB copy_job;
3997
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
3998
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
3999
0
            return 0;
4000
0
        }
4001
4002
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4003
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
4004
0
                int64_t current_time =
4005
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4006
0
                if (copy_job.finish_time_ms() > 0) {
4007
0
                    if (!config::force_immediate_recycle &&
4008
0
                        current_time < copy_job.finish_time_ms() +
4009
0
                                               config::copy_job_max_retention_second * 1000) {
4010
0
                        return 0;
4011
0
                    }
4012
0
                } else {
4013
0
                    if (!config::force_immediate_recycle &&
4014
0
                        current_time < copy_job.start_time_ms() +
4015
0
                                               config::copy_job_max_retention_second * 1000) {
4016
0
                        return 0;
4017
0
                    }
4018
0
                }
4019
0
            }
4020
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4021
0
            int64_t current_time =
4022
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4023
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4024
0
                return 0;
4025
0
            }
4026
0
        }
4027
0
        metrics_context.total_need_recycle_num++;
4028
0
        return 0;
4029
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4030
4031
0
    return scan_and_recycle(key0, key1, std::move(scan_and_statistics),
4032
0
                            [&metrics_context]() -> int {
4033
0
                                metrics_context.report(true);
4034
0
                                return 0;
4035
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_1clEv
4036
0
}
4037
4038
// Scan and statistics stage that need to be recycled
4039
0
int InstanceRecycler::scan_and_statistics_stage() {
4040
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
4041
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
4042
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
4043
0
    std::string key0 = recycle_stage_key(key_info0);
4044
0
    std::string key1 = recycle_stage_key(key_info1);
4045
4046
    // for calculate the total num or bytes of recyled objects
4047
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
4048
0
                                                        std::string_view v) -> int {
4049
0
        RecycleStagePB recycle_stage;
4050
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
4051
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
4052
0
            return 0;
4053
0
        }
4054
4055
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
4056
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4057
0
            LOG(WARNING) << "invalid idx: " << idx;
4058
0
            return 0;
4059
0
        }
4060
4061
0
        std::shared_ptr<StorageVaultAccessor> accessor;
4062
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
4063
0
                [&] {
4064
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
4065
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4066
0
                    if (!s3_conf) {
4067
0
                        return 0;
4068
0
                    }
4069
4070
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
4071
0
                    std::shared_ptr<S3Accessor> s3_accessor;
4072
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
4073
0
                    if (ret != 0) {
4074
0
                        return 0;
4075
0
                    }
4076
4077
0
                    accessor = std::move(s3_accessor);
4078
0
                    return 0;
4079
0
                }(),
4080
0
                "recycle_stage:get_accessor", &accessor);
4081
4082
0
        if (ret != 0) {
4083
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
4084
0
            return 0;
4085
0
        }
4086
4087
0
        metrics_context.total_need_recycle_num++;
4088
0
        return 0;
4089
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4090
4091
0
    return scan_and_recycle(key0, key1, std::move(scan_and_statistics),
4092
0
                            [&metrics_context]() -> int {
4093
0
                                metrics_context.report(true);
4094
0
                                return 0;
4095
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_1clEv
4096
0
}
4097
4098
// Scan and statistics expired_stage_objects that need to be recycled
4099
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
4100
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
4101
4102
    // for calculate the total num or bytes of recyled objects
4103
0
    auto scan_and_statistics = [&metrics_context, this]() {
4104
0
        for (const auto& stage : instance_info_.stages()) {
4105
0
            if (stopped()) {
4106
0
                break;
4107
0
            }
4108
0
            if (stage.type() == StagePB::EXTERNAL) {
4109
0
                continue;
4110
0
            }
4111
0
            int idx = stoi(stage.obj_info().id());
4112
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
4113
0
                continue;
4114
0
            }
4115
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
4116
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4117
0
            if (!s3_conf) {
4118
0
                continue;
4119
0
            }
4120
0
            s3_conf->prefix = stage.obj_info().prefix();
4121
0
            std::shared_ptr<S3Accessor> accessor;
4122
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
4123
0
            if (ret1 != 0) {
4124
0
                continue;
4125
0
            }
4126
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
4127
0
                continue;
4128
0
            }
4129
0
            metrics_context.total_need_recycle_num++;
4130
0
        }
4131
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
4132
4133
0
    scan_and_statistics();
4134
0
    metrics_context.report(true);
4135
0
    return 0;
4136
0
}
4137
4138
// Scan and statistics versions that need to be recycled
4139
0
int InstanceRecycler::scan_and_statistics_versions() {
4140
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
4141
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
4142
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
4143
4144
0
    int64_t last_scanned_table_id = 0;
4145
0
    bool is_recycled = false; // Is last scanned kv recycled
4146
    // for calculate the total num or bytes of recyled objects
4147
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
4148
0
                                       std::string_view k, std::string_view) {
4149
0
        auto k1 = k;
4150
0
        k1.remove_prefix(1);
4151
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
4152
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4153
0
        decode_key(&k1, &out);
4154
0
        DCHECK_EQ(out.size(), 6) << k;
4155
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
4156
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
4157
0
            metrics_context.total_need_recycle_num +=
4158
0
                    is_recycled; // Version kv of this table has been recycled
4159
0
            return 0;
4160
0
        }
4161
0
        last_scanned_table_id = table_id;
4162
0
        is_recycled = false;
4163
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
4164
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
4165
0
        std::unique_ptr<Transaction> txn;
4166
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4167
0
        if (err != TxnErrorCode::TXN_OK) {
4168
0
            return 0;
4169
0
        }
4170
0
        std::unique_ptr<RangeGetIterator> iter;
4171
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
4172
0
        if (err != TxnErrorCode::TXN_OK) {
4173
0
            return 0;
4174
0
        }
4175
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
4176
0
            return 0;
4177
0
        }
4178
0
        metrics_context.total_need_recycle_num++;
4179
0
        is_recycled = true;
4180
0
        return 0;
4181
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4182
4183
0
    return scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics),
4184
0
                            [&metrics_context]() -> int {
4185
0
                                metrics_context.report(true);
4186
0
                                return 0;
4187
0
                            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_1clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_1clEv
4188
0
}
4189
4190
} // namespace doris::cloud