Coverage Report

Created: 2025-06-19 00:56

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <bvar/status.h>
24
#include <gen_cpp/cloud.pb.h>
25
#include <gen_cpp/olap_file.pb.h>
26
27
#include <atomic>
28
#include <chrono>
29
#include <cstddef>
30
#include <cstdint>
31
#include <deque>
32
#include <initializer_list>
33
#include <numeric>
34
#include <string>
35
#include <string_view>
36
#include <utility>
37
38
#include "common/stopwatch.h"
39
#include "meta-service/meta_service.h"
40
#include "meta-service/meta_service_helper.h"
41
#include "meta-service/meta_service_schema.h"
42
#include "meta-service/txn_kv.h"
43
#include "meta-service/txn_kv_error.h"
44
#include "recycler/checker.h"
45
#include "recycler/hdfs_accessor.h"
46
#include "recycler/s3_accessor.h"
47
#include "recycler/storage_vault_accessor.h"
48
#ifdef UNIT_TEST
49
#include "../test/mock_accessor.h"
50
#endif
51
#include "common/bvars.h"
52
#include "common/config.h"
53
#include "common/encryption_util.h"
54
#include "common/logging.h"
55
#include "common/simple_thread_pool.h"
56
#include "common/util.h"
57
#include "cpp/sync_point.h"
58
#include "meta-service/keys.h"
59
#include "recycler/recycler_service.h"
60
#include "recycler/sync_executor.h"
61
#include "recycler/util.h"
62
63
namespace doris::cloud {
64
65
using namespace std::chrono;
66
67
// return 0 for success get a key, 1 for key not found, negative for error
68
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
69
0
    std::unique_ptr<Transaction> txn;
70
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
71
0
    if (err != TxnErrorCode::TXN_OK) {
72
0
        return -1;
73
0
    }
74
0
    switch (txn->get(key, &val, true)) {
75
0
    case TxnErrorCode::TXN_OK:
76
0
        return 0;
77
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
78
0
        return 1;
79
0
    default:
80
0
        return -1;
81
0
    };
82
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
83
84
// 0 for success, negative for error
85
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
86
199
                   std::unique_ptr<RangeGetIterator>& it) {
87
199
    std::unique_ptr<Transaction> txn;
88
199
    TxnErrorCode err = txn_kv->create_txn(&txn);
89
199
    if (err != TxnErrorCode::TXN_OK) {
90
0
        return -1;
91
0
    }
92
199
    switch (txn->get(begin, end, &it, true)) {
93
199
    case TxnErrorCode::TXN_OK:
94
199
        return 0;
95
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
96
0
        return 1;
97
0
    default:
98
0
        return -1;
99
199
    };
100
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
86
180
                   std::unique_ptr<RangeGetIterator>& it) {
87
180
    std::unique_ptr<Transaction> txn;
88
180
    TxnErrorCode err = txn_kv->create_txn(&txn);
89
180
    if (err != TxnErrorCode::TXN_OK) {
90
0
        return -1;
91
0
    }
92
180
    switch (txn->get(begin, end, &it, true)) {
93
180
    case TxnErrorCode::TXN_OK:
94
180
        return 0;
95
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
96
0
        return 1;
97
0
    default:
98
0
        return -1;
99
180
    };
100
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
86
19
                   std::unique_ptr<RangeGetIterator>& it) {
87
19
    std::unique_ptr<Transaction> txn;
88
19
    TxnErrorCode err = txn_kv->create_txn(&txn);
89
19
    if (err != TxnErrorCode::TXN_OK) {
90
0
        return -1;
91
0
    }
92
19
    switch (txn->get(begin, end, &it, true)) {
93
19
    case TxnErrorCode::TXN_OK:
94
19
        return 0;
95
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
96
0
        return 1;
97
0
    default:
98
0
        return -1;
99
19
    };
100
0
}
101
102
// return 0 for success otherwise error
103
10
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
104
10
    std::unique_ptr<Transaction> txn;
105
10
    TxnErrorCode err = txn_kv->create_txn(&txn);
106
10
    if (err != TxnErrorCode::TXN_OK) {
107
0
        return -1;
108
0
    }
109
3.04k
    for (auto k : keys) {
110
3.04k
        txn->remove(k);
111
3.04k
    }
112
10
    switch (txn->commit()) {
113
10
    case TxnErrorCode::TXN_OK:
114
10
        return 0;
115
0
    case TxnErrorCode::TXN_CONFLICT:
116
0
        return -1;
117
0
    default:
118
0
        return -1;
119
10
    }
120
10
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
103
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
104
6
    std::unique_ptr<Transaction> txn;
105
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
106
6
    if (err != TxnErrorCode::TXN_OK) {
107
0
        return -1;
108
0
    }
109
3.02k
    for (auto k : keys) {
110
3.02k
        txn->remove(k);
111
3.02k
    }
112
6
    switch (txn->commit()) {
113
6
    case TxnErrorCode::TXN_OK:
114
6
        return 0;
115
0
    case TxnErrorCode::TXN_CONFLICT:
116
0
        return -1;
117
0
    default:
118
0
        return -1;
119
6
    }
120
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
103
4
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
104
4
    std::unique_ptr<Transaction> txn;
105
4
    TxnErrorCode err = txn_kv->create_txn(&txn);
106
4
    if (err != TxnErrorCode::TXN_OK) {
107
0
        return -1;
108
0
    }
109
21
    for (auto k : keys) {
110
21
        txn->remove(k);
111
21
    }
112
4
    switch (txn->commit()) {
113
4
    case TxnErrorCode::TXN_OK:
114
4
        return 0;
115
0
    case TxnErrorCode::TXN_CONFLICT:
116
0
        return -1;
117
0
    default:
118
0
        return -1;
119
4
    }
120
4
}
121
122
// return 0 for success otherwise error
123
30
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
124
30
    std::unique_ptr<Transaction> txn;
125
30
    TxnErrorCode err = txn_kv->create_txn(&txn);
126
30
    if (err != TxnErrorCode::TXN_OK) {
127
0
        return -1;
128
0
    }
129
4.00k
    for (auto& k : keys) {
130
4.00k
        txn->remove(k);
131
4.00k
    }
132
30
    switch (txn->commit()) {
133
30
    case TxnErrorCode::TXN_OK:
134
30
        return 0;
135
0
    case TxnErrorCode::TXN_CONFLICT:
136
0
        return -1;
137
0
    default:
138
0
        return -1;
139
30
    }
140
30
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
123
30
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
124
30
    std::unique_ptr<Transaction> txn;
125
30
    TxnErrorCode err = txn_kv->create_txn(&txn);
126
30
    if (err != TxnErrorCode::TXN_OK) {
127
0
        return -1;
128
0
    }
129
4.00k
    for (auto& k : keys) {
130
4.00k
        txn->remove(k);
131
4.00k
    }
132
30
    switch (txn->commit()) {
133
30
    case TxnErrorCode::TXN_OK:
134
30
        return 0;
135
0
    case TxnErrorCode::TXN_CONFLICT:
136
0
        return -1;
137
0
    default:
138
0
        return -1;
139
30
    }
140
30
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
141
142
// return 0 for success otherwise error
143
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
144
0
                                       std::string_view end) {
145
0
    std::unique_ptr<Transaction> txn;
146
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
147
0
    if (err != TxnErrorCode::TXN_OK) {
148
0
        return -1;
149
0
    }
150
0
    txn->remove(begin, end);
151
0
    switch (txn->commit()) {
152
0
    case TxnErrorCode::TXN_OK:
153
0
        return 0;
154
0
    case TxnErrorCode::TXN_CONFLICT:
155
0
        return -1;
156
0
    default:
157
0
        return -1;
158
0
    }
159
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
160
161
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
162
                                      int64_t num_scanned, int64_t num_recycled,
163
29
                                      int64_t start_time) {
164
29
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
165
0
        int64_t cost =
166
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
167
0
        if (cost > config::recycle_task_threshold_seconds) {
168
0
            LOG_INFO("recycle task cost too much time cost={}s", cost)
169
0
                    .tag("instance_id", instance_id)
170
0
                    .tag("task", task_name)
171
0
                    .tag("num_scanned", num_scanned)
172
0
                    .tag("num_recycled", num_recycled);
173
0
        }
174
0
    }
175
29
    return;
176
29
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
163
27
                                      int64_t start_time) {
164
27
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
165
0
        int64_t cost =
166
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
167
0
        if (cost > config::recycle_task_threshold_seconds) {
168
0
            LOG_INFO("recycle task cost too much time cost={}s", cost)
169
0
                    .tag("instance_id", instance_id)
170
0
                    .tag("task", task_name)
171
0
                    .tag("num_scanned", num_scanned)
172
0
                    .tag("num_recycled", num_recycled);
173
0
        }
174
0
    }
175
27
    return;
176
27
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
163
2
                                      int64_t start_time) {
164
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
165
0
        int64_t cost =
166
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
167
0
        if (cost > config::recycle_task_threshold_seconds) {
168
0
            LOG_INFO("recycle task cost too much time cost={}s", cost)
169
0
                    .tag("instance_id", instance_id)
170
0
                    .tag("task", task_name)
171
0
                    .tag("num_scanned", num_scanned)
172
0
                    .tag("num_recycled", num_recycled);
173
0
        }
174
0
    }
175
2
    return;
176
2
}
177
178
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
179
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
180
181
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
182
4
                                                               "s3_producer_pool");
183
4
    s3_producer_pool->start();
184
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
185
4
                                                                  "recycle_tablet_pool");
186
4
    recycle_tablet_pool->start();
187
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
188
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
189
4
    group_recycle_function_pool->start();
190
4
    _thread_pool_group =
191
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
192
4
                                    std::move(group_recycle_function_pool));
193
194
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_);
195
4
}
196
197
4
Recycler::~Recycler() {
198
4
    if (!stopped()) {
199
0
        stop();
200
0
    }
201
4
}
202
203
4
void Recycler::instance_scanner_callback() {
204
    // sleep 60 seconds before scheduling for the launch procedure to complete:
205
    // some bad hdfs connection may cause some log to stdout stderr
206
    // which may pollute .out file and affect the script to check success
207
4
    std::this_thread::sleep_for(
208
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
209
8
    while (!stopped()) {
210
4
        std::vector<InstanceInfoPB> instances;
211
4
        get_all_instances(txn_kv_.get(), instances);
212
        // TODO(plat1ko): delete job recycle kv of non-existent instances
213
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
214
4
            std::stringstream ss;
215
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
216
4
            return ss.str();
217
4
        }();
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
213
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
214
4
            std::stringstream ss;
215
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
216
4
            return ss.str();
217
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
218
4
        if (!instances.empty()) {
219
            // enqueue instances
220
3
            std::lock_guard lock(mtx_);
221
30
            for (auto& instance : instances) {
222
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
223
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
224
                // skip instance already in pending queue
225
30
                if (success) {
226
30
                    pending_instance_queue_.push_back(std::move(instance));
227
30
                }
228
30
            }
229
3
            pending_instance_cond_.notify_all();
230
3
        }
231
4
        {
232
4
            std::unique_lock lock(mtx_);
233
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
234
7
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
234
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
235
4
        }
236
4
    }
237
4
}
238
239
8
void Recycler::recycle_callback() {
240
37
    while (!stopped()) {
241
35
        InstanceInfoPB instance;
242
35
        {
243
35
            std::unique_lock lock(mtx_);
244
35
            pending_instance_cond_.wait(
245
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
245
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
246
35
            if (stopped()) {
247
6
                return;
248
6
            }
249
29
            instance = std::move(pending_instance_queue_.front());
250
29
            pending_instance_queue_.pop_front();
251
29
            pending_instance_set_.erase(instance.instance_id());
252
29
        }
253
0
        auto& instance_id = instance.instance_id();
254
29
        {
255
29
            std::lock_guard lock(mtx_);
256
            // skip instance in recycling
257
29
            if (recycling_instance_map_.count(instance_id)) continue;
258
29
        }
259
29
        auto instance_recycler = std::make_shared<InstanceRecycler>(
260
29
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
261
262
29
        if (int r = instance_recycler->init(); r != 0) {
263
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
264
0
                         << " ret=" << r;
265
0
            continue;
266
0
        }
267
29
        std::string recycle_job_key;
268
29
        job_recycle_key({instance_id}, &recycle_job_key);
269
29
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
270
29
                                               ip_port_, config::recycle_interval_seconds * 1000);
271
29
        if (ret != 0) { // Prepare failed
272
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
273
20
                         << " ret=" << ret;
274
20
            continue;
275
20
        } else {
276
9
            std::lock_guard lock(mtx_);
277
9
            recycling_instance_map_.emplace(instance_id, instance_recycler);
278
9
        }
279
9
        if (stopped()) return;
280
9
        LOG_INFO("begin to recycle instance").tag("instance_id", instance_id);
281
9
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
282
9
        g_bvar_recycler_task_concurrency << 1;
283
9
        g_bvar_recycler_instance_running.put({instance_id}, 1);
284
9
        g_bvar_recycler_instance_recycle_times.put({instance_id}, std::make_pair(ctime_ms, -1));
285
9
        ret = instance_recycler->do_recycle();
286
9
        g_bvar_recycler_task_concurrency << -1;
287
9
        g_bvar_recycler_instance_running.put({instance_id}, -1);
288
        // If instance recycler has been aborted, don't finish this job
289
10
        if (!instance_recycler->stopped()) {
290
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
291
10
                                        ret == 0, ctime_ms);
292
10
        }
293
9
        {
294
9
            std::lock_guard lock(mtx_);
295
9
            recycling_instance_map_.erase(instance_id);
296
9
        }
297
9
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
298
9
        auto elpased_ms = now - ctime_ms;
299
9
        g_bvar_recycler_instance_recycle_times.put({instance_id}, std::make_pair(ctime_ms, now));
300
9
        g_bvar_recycler_instance_last_recycle_duration.put({instance_id}, elpased_ms);
301
9
        g_bvar_recycler_instance_next_time.put({instance_id},
302
9
                                               now + config::recycle_interval_seconds * 1000);
303
9
        LOG(INFO) << "recycle instance done, "
304
9
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
305
9
                  << " now: " << now;
306
307
9
        g_bvar_recycler_instance_recycle_last_success_times.put({instance_id}, now);
308
309
9
        LOG_INFO("finish recycle instance")
310
9
                .tag("instance_id", instance_id)
311
9
                .tag("cost_ms", elpased_ms);
312
9
    }
313
8
}
314
315
4
void Recycler::lease_recycle_jobs() {
316
54
    while (!stopped()) {
317
50
        std::vector<std::string> instances;
318
50
        instances.reserve(recycling_instance_map_.size());
319
50
        {
320
50
            std::lock_guard lock(mtx_);
321
50
            for (auto& [id, _] : recycling_instance_map_) {
322
30
                instances.push_back(id);
323
30
            }
324
50
        }
325
50
        for (auto& i : instances) {
326
30
            std::string recycle_job_key;
327
30
            job_recycle_key({i}, &recycle_job_key);
328
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
329
30
            if (ret == 1) {
330
0
                std::lock_guard lock(mtx_);
331
0
                if (auto it = recycling_instance_map_.find(i);
332
0
                    it != recycling_instance_map_.end()) {
333
0
                    it->second->stop();
334
0
                }
335
0
            }
336
30
        }
337
50
        {
338
50
            std::unique_lock lock(mtx_);
339
50
            notifier_.wait_for(lock,
340
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
341
100
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
341
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
342
50
        }
343
50
    }
344
4
}
345
346
4
void Recycler::check_recycle_tasks() {
347
7
    while (!stopped()) {
348
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
349
3
        {
350
3
            std::lock_guard lock(mtx_);
351
3
            recycling_instance_map = recycling_instance_map_;
352
3
        }
353
3
        for (auto& entry : recycling_instance_map) {
354
0
            entry.second->check_recycle_tasks();
355
0
        }
356
357
3
        std::unique_lock lock(mtx_);
358
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
359
6
                           [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
359
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
360
3
    }
361
4
}
362
363
4
int Recycler::start(brpc::Server* server) {
364
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
365
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
366
367
4
    if (config::enable_checker) {
368
0
        checker_ = std::make_unique<Checker>(txn_kv_);
369
0
        int ret = checker_->start();
370
0
        std::string msg;
371
0
        if (ret != 0) {
372
0
            msg = "failed to start checker";
373
0
            LOG(ERROR) << msg;
374
0
            std::cerr << msg << std::endl;
375
0
            return ret;
376
0
        }
377
0
        msg = "checker started";
378
0
        LOG(INFO) << msg;
379
0
        std::cout << msg << std::endl;
380
0
    }
381
382
4
    if (server) {
383
        // Add service
384
1
        auto recycler_service =
385
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
386
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
387
1
    }
388
389
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
389
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
390
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
391
8
        workers_.emplace_back([this] { recycle_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
391
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
392
8
    }
393
394
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
395
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
396
4
    return 0;
397
4
}
398
399
4
void Recycler::stop() {
400
4
    stopped_ = true;
401
4
    notifier_.notify_all();
402
4
    pending_instance_cond_.notify_all();
403
4
    {
404
4
        std::lock_guard lock(mtx_);
405
4
        for (auto& [_, recycler] : recycling_instance_map_) {
406
0
            recycler->stop();
407
0
        }
408
4
    }
409
20
    for (auto& w : workers_) {
410
20
        if (w.joinable()) w.join();
411
20
    }
412
4
    if (checker_) {
413
0
        checker_->stop();
414
0
    }
415
4
}
416
417
class InstanceRecycler::InvertedIndexIdCache {
418
public:
419
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
420
73
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
421
422
    // Return 0 if success, 1 if schema kv not found, negative for error
423
3.55k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
424
3.55k
        {
425
3.55k
            std::lock_guard lock(mtx_);
426
3.55k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
427
644
                return 0;
428
644
            }
429
2.90k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
430
2.90k
                it != inverted_index_id_map_.end()) {
431
2.37k
                res = it->second;
432
2.37k
                return 0;
433
2.37k
            }
434
2.90k
        }
435
        // Get schema from kv
436
        // TODO(plat1ko): Single flight
437
532
        std::unique_ptr<Transaction> txn;
438
532
        TxnErrorCode err = txn_kv_->create_txn(&txn);
439
532
        if (err != TxnErrorCode::TXN_OK) {
440
0
            LOG(WARNING) << "failed to create txn, err=" << err;
441
0
            return -1;
442
0
        }
443
532
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
444
532
        ValueBuf val_buf;
445
532
        err = cloud::get(txn.get(), schema_key, &val_buf);
446
532
        if (err != TxnErrorCode::TXN_OK) {
447
500
            LOG(WARNING) << "failed to get schema, err=" << err;
448
500
            return static_cast<int>(err);
449
500
        }
450
32
        doris::TabletSchemaCloudPB schema;
451
32
        if (!parse_schema_value(val_buf, &schema)) {
452
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
453
0
            return -1;
454
0
        }
455
32
        if (schema.index_size() > 0) {
456
26
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
457
26
            if (schema.has_inverted_index_storage_format()) {
458
23
                index_format = schema.inverted_index_storage_format();
459
23
            }
460
26
            res.first = index_format;
461
26
            res.second.reserve(schema.index_size());
462
62
            for (auto& i : schema.index()) {
463
62
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
464
62
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
465
62
                }
466
62
            }
467
26
        }
468
32
        insert(index_id, schema_version, res);
469
32
        return 0;
470
32
    }
471
472
    // Empty `ids` means this schema has no inverted index
473
32
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
474
32
        if (index_info.second.empty()) {
475
6
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
476
6
            std::lock_guard lock(mtx_);
477
6
            schemas_without_inverted_index_.emplace(index_id, schema_version);
478
26
        } else {
479
26
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
480
26
            std::lock_guard lock(mtx_);
481
26
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
482
26
        }
483
32
    }
484
485
private:
486
    std::string instance_id_;
487
    std::shared_ptr<TxnKv> txn_kv_;
488
489
    std::mutex mtx_;
490
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
491
    struct HashOfKey {
492
6.49k
        size_t operator()(const Key& key) const {
493
6.49k
            size_t seed = 0;
494
6.49k
            seed = std::hash<int64_t> {}(key.first);
495
6.49k
            seed = std::hash<int32_t> {}(key.second);
496
6.49k
            return seed;
497
6.49k
        }
498
    };
499
    // <index_id, schema_version> -> inverted_index_ids
500
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
501
    // Store <index_id, schema_version> of schema which doesn't have inverted index
502
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
503
};
504
505
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
506
                                   RecyclerThreadPoolGroup thread_pool_group,
507
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
508
        : txn_kv_(std::move(txn_kv)),
509
          instance_id_(instance.instance_id()),
510
          instance_info_(instance),
511
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
512
          _thread_pool_group(std::move(thread_pool_group)),
513
73
          txn_lazy_committer_(std::move(txn_lazy_committer)) {};
514
515
73
InstanceRecycler::~InstanceRecycler() = default;
516
517
73
int InstanceRecycler::init_obj_store_accessors() {
518
73
    for (const auto& obj_info : instance_info_.obj_info()) {
519
54
#ifdef UNIT_TEST
520
54
        auto accessor = std::make_shared<MockAccessor>();
521
#else
522
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
523
        if (!s3_conf) {
524
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
525
            return -1;
526
        }
527
528
        std::shared_ptr<S3Accessor> accessor;
529
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
530
        if (ret != 0) {
531
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
532
                         << " resource_id=" << obj_info.id();
533
            return ret;
534
        }
535
#endif
536
54
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
537
54
    }
538
539
73
    return 0;
540
73
}
541
542
73
int InstanceRecycler::init_storage_vault_accessors() {
543
73
    if (instance_info_.resource_ids().empty()) {
544
66
        return 0;
545
66
    }
546
547
7
    FullRangeGetIteratorOptions opts(txn_kv_);
548
7
    opts.prefetch = true;
549
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
550
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
551
552
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
553
18
        auto [k, v] = *kv;
554
18
        StorageVaultPB vault;
555
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
556
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
557
0
            return -1;
558
0
        }
559
18
        std::string recycler_storage_vault_white_list = accumulate(
560
18
                config::recycler_storage_vault_white_list.begin(),
561
18
                config::recycler_storage_vault_white_list.end(), std::string(),
562
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
562
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
563
18
        LOG_INFO("config::recycler_storage_vault_white_list")
564
18
                .tag("", recycler_storage_vault_white_list);
565
18
        if (!config::recycler_storage_vault_white_list.empty()) {
566
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
567
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
568
8
                it == config::recycler_storage_vault_white_list.end()) {
569
2
                LOG_WARNING(
570
2
                        "failed to init accessor for vault because this vault is not in "
571
2
                        "config::recycler_storage_vault_white_list. ")
572
2
                        .tag(" vault name:", vault.name())
573
2
                        .tag(" config::recycler_storage_vault_white_list:",
574
2
                             recycler_storage_vault_white_list);
575
2
                continue;
576
2
            }
577
8
        }
578
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
579
16
                                 &accessor_map_, &vault);
580
16
        if (vault.has_hdfs_info()) {
581
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
582
9
            int ret = accessor->init();
583
9
            if (ret != 0) {
584
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
585
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
586
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
587
4
                continue;
588
4
            }
589
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
590
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
591
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
592
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
593
7
        } else if (vault.has_obj_info()) {
594
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
595
7
            if (!s3_conf) {
596
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
597
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
598
1
                continue;
599
1
            }
600
601
6
            std::shared_ptr<S3Accessor> accessor;
602
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
603
6
            if (ret != 0) {
604
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
605
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
606
0
                             << " ret=" << ret
607
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
608
0
                continue;
609
0
            }
610
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
611
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
612
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
613
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
614
6
        }
615
16
    }
616
617
7
    if (!it->is_valid()) {
618
0
        LOG_WARNING("failed to get storage vault kv");
619
0
        return -1;
620
0
    }
621
622
7
    if (accessor_map_.empty()) {
623
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
624
1
        return -2;
625
1
    }
626
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
627
6
             instance_id_);
628
629
6
    return 0;
630
7
}
631
632
73
int InstanceRecycler::init() {
633
73
    int ret = init_obj_store_accessors();
634
73
    if (ret != 0) {
635
0
        return ret;
636
0
    }
637
638
73
    return init_storage_vault_accessors();
639
73
}
640
641
template <typename... Func>
642
80
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
80
    return [funcs...]() {
644
80
        return [](std::initializer_list<int> ret_vals) {
645
80
            int i = 0;
646
100
            for (int ret : ret_vals) {
647
100
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
100
            }
651
80
            return i;
652
80
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
20
            for (int ret : ret_vals) {
647
20
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
20
            }
651
10
            return i;
652
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
20
            for (int ret : ret_vals) {
647
20
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
20
            }
651
10
            return i;
652
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
0
                    i = ret;
649
0
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
653
80
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
654
80
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
642
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
643
10
    return [funcs...]() {
644
10
        return [](std::initializer_list<int> ret_vals) {
645
10
            int i = 0;
646
10
            for (int ret : ret_vals) {
647
10
                if (ret != 0) {
648
10
                    i = ret;
649
10
                }
650
10
            }
651
10
            return i;
652
10
        }({funcs()...});
653
10
    };
654
10
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
655
656
10
int InstanceRecycler::do_recycle() {
657
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
658
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
659
0
        return recycle_deleted_instance();
660
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
661
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
662
10
                                        fmt::format("instance id {}", instance_id_),
663
80
                                        [](int r) { return r != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
Line
Count
Source
663
80
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
664
10
        sync_executor
665
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
666
                                   // becase they may both recycle the same set of tablets
667
                        // recycle dropped table or idexes(mv, rollup)
668
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
Line
Count
Source
668
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
669
                        // recycle dropped partitions
670
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
670
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
671
10
                .add(task_wrapper(
672
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
672
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
673
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
673
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
674
10
                .add(task_wrapper(
675
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
675
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
676
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
676
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
677
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
677
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
678
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
678
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
679
10
                .add(task_wrapper(
680
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
680
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
681
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
681
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
682
10
        bool finished = true;
683
10
        std::vector<int> rets = sync_executor.when_all(&finished);
684
80
        for (int ret : rets) {
685
80
            if (ret != 0) {
686
0
                return ret;
687
0
            }
688
80
        }
689
10
        return finished ? 0 : -1;
690
10
    } else {
691
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
692
0
                     << " instance_id=" << instance_id_;
693
0
        return -1;
694
0
    }
695
10
}
696
697
/**
698
 * 1. delete all remote data
699
 * 2. delete all kv
700
 * 3. remove instance kv
701
 */
702
1
int InstanceRecycler::recycle_deleted_instance() {
703
1
    LOG_INFO("begin to recycle deleted instance").tag("instance_id", instance_id_);
704
705
1
    int ret = 0;
706
1
    auto start_time = steady_clock::now();
707
708
1
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
709
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
710
1
        LOG(INFO) << (ret == 0 ? "successfully" : "failed to")
711
1
                  << " recycle deleted instance, cost=" << cost
712
1
                  << "s, instance_id=" << instance_id_;
713
1
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEPi
Line
Count
Source
708
1
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
709
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
710
1
        LOG(INFO) << (ret == 0 ? "successfully" : "failed to")
711
1
                  << " recycle deleted instance, cost=" << cost
712
1
                  << "s, instance_id=" << instance_id_;
713
1
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEPi
714
715
    // delete all remote data
716
2
    for (auto& [_, accessor] : accessor_map_) {
717
2
        if (stopped()) {
718
0
            return ret;
719
0
        }
720
721
2
        LOG(INFO) << "begin to delete all objects in " << accessor->uri();
722
2
        int del_ret = accessor->delete_all();
723
2
        if (del_ret == 0) {
724
2
            LOG(INFO) << "successfully delete all objects in " << accessor->uri();
725
2
        } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
726
            // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
727
            // so the recycling has been successful.
728
0
            ret = -1;
729
0
        }
730
2
    }
731
732
1
    if (ret != 0) {
733
0
        LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
734
0
        return ret;
735
0
    }
736
737
    // delete all kv
738
1
    std::unique_ptr<Transaction> txn;
739
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
740
1
    if (err != TxnErrorCode::TXN_OK) {
741
0
        LOG(WARNING) << "failed to create txn";
742
0
        ret = -1;
743
0
        return -1;
744
0
    }
745
1
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
746
    // delete kv before deleting objects to prevent the checker from misjudging data loss
747
1
    std::string start_txn_key = txn_key_prefix(instance_id_);
748
1
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
749
1
    txn->remove(start_txn_key, end_txn_key);
750
1
    std::string start_version_key = version_key_prefix(instance_id_);
751
1
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
752
1
    txn->remove(start_version_key, end_version_key);
753
1
    std::string start_meta_key = meta_key_prefix(instance_id_);
754
1
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
755
1
    txn->remove(start_meta_key, end_meta_key);
756
1
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
757
1
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
758
1
    txn->remove(start_recycle_key, end_recycle_key);
759
1
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
760
1
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
761
1
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
762
1
    std::string start_copy_key = copy_key_prefix(instance_id_);
763
1
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
764
1
    txn->remove(start_copy_key, end_copy_key);
765
    // should not remove job key range, because we need to reserve job recycle kv
766
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
767
1
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
768
1
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
769
1
    txn->remove(start_job_tablet_key, end_job_tablet_key);
770
1
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
771
1
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
772
1
    std::string start_vault_key = storage_vault_key(key_info0);
773
1
    std::string end_vault_key = storage_vault_key(key_info1);
774
1
    txn->remove(start_vault_key, end_vault_key);
775
1
    err = txn->commit();
776
1
    if (err != TxnErrorCode::TXN_OK) {
777
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
778
0
        ret = -1;
779
0
    }
780
781
1
    if (ret == 0) {
782
        // remove instance kv
783
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
784
1
        err = txn_kv_->create_txn(&txn);
785
1
        if (err != TxnErrorCode::TXN_OK) {
786
0
            LOG(WARNING) << "failed to create txn";
787
0
            ret = -1;
788
0
            return ret;
789
0
        }
790
1
        std::string key;
791
1
        instance_key({instance_id_}, &key);
792
1
        txn->remove(key);
793
1
        err = txn->commit();
794
1
        if (err != TxnErrorCode::TXN_OK) {
795
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
796
0
                         << " err=" << err;
797
0
            ret = -1;
798
0
        }
799
1
    }
800
1
    return ret;
801
1
}
802
803
14
int InstanceRecycler::recycle_indexes() {
804
14
    const std::string task_name = "recycle_indexes";
805
14
    int64_t num_scanned = 0;
806
14
    int64_t num_expired = 0;
807
14
    int64_t num_recycled = 0;
808
809
14
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
810
14
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
811
14
    std::string index_key0;
812
14
    std::string index_key1;
813
14
    recycle_index_key(index_key_info0, &index_key0);
814
14
    recycle_index_key(index_key_info1, &index_key1);
815
816
14
    LOG_INFO("begin to recycle indexes").tag("instance_id", instance_id_);
817
818
14
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
819
14
    register_recycle_task(task_name, start_time);
820
821
14
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
822
14
        unregister_recycle_task(task_name);
823
14
        int64_t cost =
824
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
825
14
        LOG_INFO("recycle indexes finished, cost={}s", cost)
826
14
                .tag("instance_id", instance_id_)
827
14
                .tag("num_scanned", num_scanned)
828
14
                .tag("num_expired", num_expired)
829
14
                .tag("num_recycled", num_recycled);
830
14
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEPi
Line
Count
Source
821
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
822
12
        unregister_recycle_task(task_name);
823
12
        int64_t cost =
824
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
825
12
        LOG_INFO("recycle indexes finished, cost={}s", cost)
826
12
                .tag("instance_id", instance_id_)
827
12
                .tag("num_scanned", num_scanned)
828
12
                .tag("num_expired", num_expired)
829
12
                .tag("num_recycled", num_recycled);
830
12
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEPi
Line
Count
Source
821
2
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
822
2
        unregister_recycle_task(task_name);
823
2
        int64_t cost =
824
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
825
2
        LOG_INFO("recycle indexes finished, cost={}s", cost)
826
2
                .tag("instance_id", instance_id_)
827
2
                .tag("num_scanned", num_scanned)
828
2
                .tag("num_expired", num_expired)
829
2
                .tag("num_recycled", num_recycled);
830
2
    });
831
832
14
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
833
834
14
    auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) {
835
8
        if (config::force_immediate_recycle) {
836
2
            return 0L;
837
2
        }
838
6
        int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time();
839
6
        int64_t retention_seconds = config::retention_seconds;
840
6
        if (index.state() == RecycleIndexPB::DROPPED) {
841
6
            retention_seconds =
842
6
                    std::min(config::dropped_index_retention_seconds, retention_seconds);
843
6
        }
844
6
        int64_t final_expiration = expiration + retention_seconds;
845
6
        if (earlest_ts > final_expiration) {
846
2
            earlest_ts = final_expiration;
847
2
            g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts);
848
2
        }
849
6
        return final_expiration;
850
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_3clERKNS0_14RecycleIndexPBE
Line
Count
Source
834
6
    auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) {
835
6
        if (config::force_immediate_recycle) {
836
0
            return 0L;
837
0
        }
838
6
        int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time();
839
6
        int64_t retention_seconds = config::retention_seconds;
840
6
        if (index.state() == RecycleIndexPB::DROPPED) {
841
6
            retention_seconds =
842
6
                    std::min(config::dropped_index_retention_seconds, retention_seconds);
843
6
        }
844
6
        int64_t final_expiration = expiration + retention_seconds;
845
6
        if (earlest_ts > final_expiration) {
846
2
            earlest_ts = final_expiration;
847
2
            g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts);
848
2
        }
849
6
        return final_expiration;
850
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_3clERKNS0_14RecycleIndexPBE
Line
Count
Source
834
2
    auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) {
835
2
        if (config::force_immediate_recycle) {
836
2
            return 0L;
837
2
        }
838
0
        int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time();
839
0
        int64_t retention_seconds = config::retention_seconds;
840
0
        if (index.state() == RecycleIndexPB::DROPPED) {
841
0
            retention_seconds =
842
0
                    std::min(config::dropped_index_retention_seconds, retention_seconds);
843
0
        }
844
0
        int64_t final_expiration = expiration + retention_seconds;
845
0
        if (earlest_ts > final_expiration) {
846
0
            earlest_ts = final_expiration;
847
0
            g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts);
848
0
        }
849
0
        return final_expiration;
850
2
    };
851
852
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
853
14
    std::vector<std::string_view> index_keys;
854
14
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
855
8
        ++num_scanned;
856
8
        RecycleIndexPB index_pb;
857
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
858
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
859
0
            return -1;
860
0
        }
861
8
        int64_t current_time = ::time(nullptr);
862
8
        if (current_time < calc_expiration(index_pb)) { // not expired
863
0
            return 0;
864
0
        }
865
8
        ++num_expired;
866
        // decode index_id
867
8
        auto k1 = k;
868
8
        k1.remove_prefix(1);
869
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
870
8
        decode_key(&k1, &out);
871
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
872
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
873
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
874
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
875
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
876
        // Change state to RECYCLING
877
8
        std::unique_ptr<Transaction> txn;
878
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
879
8
        if (err != TxnErrorCode::TXN_OK) {
880
0
            LOG_WARNING("failed to create txn").tag("err", err);
881
0
            return -1;
882
0
        }
883
8
        std::string val;
884
8
        err = txn->get(k, &val);
885
8
        if (err ==
886
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
887
0
            LOG_INFO("index {} has been recycled or committed", index_id);
888
0
            return 0;
889
0
        }
890
8
        if (err != TxnErrorCode::TXN_OK) {
891
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
892
0
            return -1;
893
0
        }
894
8
        index_pb.Clear();
895
8
        if (!index_pb.ParseFromString(val)) {
896
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
897
0
            return -1;
898
0
        }
899
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
900
7
            index_pb.set_state(RecycleIndexPB::RECYCLING);
901
7
            txn->put(k, index_pb.SerializeAsString());
902
7
            err = txn->commit();
903
7
            if (err != TxnErrorCode::TXN_OK) {
904
0
                LOG_WARNING("failed to commit txn").tag("err", err);
905
0
                return -1;
906
0
            }
907
7
        }
908
8
        if (recycle_tablets(index_pb.table_id(), index_id) != 0) {
909
1
            LOG_WARNING("failed to recycle tablets under index")
910
1
                    .tag("table_id", index_pb.table_id())
911
1
                    .tag("instance_id", instance_id_)
912
1
                    .tag("index_id", index_id);
913
1
            return -1;
914
1
        }
915
7
        ++num_recycled;
916
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
917
7
        index_keys.push_back(k);
918
7
        return 0;
919
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
854
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
855
6
        ++num_scanned;
856
6
        RecycleIndexPB index_pb;
857
6
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
858
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
859
0
            return -1;
860
0
        }
861
6
        int64_t current_time = ::time(nullptr);
862
6
        if (current_time < calc_expiration(index_pb)) { // not expired
863
0
            return 0;
864
0
        }
865
6
        ++num_expired;
866
        // decode index_id
867
6
        auto k1 = k;
868
6
        k1.remove_prefix(1);
869
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
870
6
        decode_key(&k1, &out);
871
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
872
6
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
873
6
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
874
6
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
875
6
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
876
        // Change state to RECYCLING
877
6
        std::unique_ptr<Transaction> txn;
878
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
879
6
        if (err != TxnErrorCode::TXN_OK) {
880
0
            LOG_WARNING("failed to create txn").tag("err", err);
881
0
            return -1;
882
0
        }
883
6
        std::string val;
884
6
        err = txn->get(k, &val);
885
6
        if (err ==
886
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
887
0
            LOG_INFO("index {} has been recycled or committed", index_id);
888
0
            return 0;
889
0
        }
890
6
        if (err != TxnErrorCode::TXN_OK) {
891
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
892
0
            return -1;
893
0
        }
894
6
        index_pb.Clear();
895
6
        if (!index_pb.ParseFromString(val)) {
896
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
897
0
            return -1;
898
0
        }
899
6
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
900
6
            index_pb.set_state(RecycleIndexPB::RECYCLING);
901
6
            txn->put(k, index_pb.SerializeAsString());
902
6
            err = txn->commit();
903
6
            if (err != TxnErrorCode::TXN_OK) {
904
0
                LOG_WARNING("failed to commit txn").tag("err", err);
905
0
                return -1;
906
0
            }
907
6
        }
908
6
        if (recycle_tablets(index_pb.table_id(), index_id) != 0) {
909
0
            LOG_WARNING("failed to recycle tablets under index")
910
0
                    .tag("table_id", index_pb.table_id())
911
0
                    .tag("instance_id", instance_id_)
912
0
                    .tag("index_id", index_id);
913
0
            return -1;
914
0
        }
915
6
        ++num_recycled;
916
6
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
917
6
        index_keys.push_back(k);
918
6
        return 0;
919
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
854
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
855
2
        ++num_scanned;
856
2
        RecycleIndexPB index_pb;
857
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
858
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
859
0
            return -1;
860
0
        }
861
2
        int64_t current_time = ::time(nullptr);
862
2
        if (current_time < calc_expiration(index_pb)) { // not expired
863
0
            return 0;
864
0
        }
865
2
        ++num_expired;
866
        // decode index_id
867
2
        auto k1 = k;
868
2
        k1.remove_prefix(1);
869
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
870
2
        decode_key(&k1, &out);
871
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
872
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
873
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
874
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
875
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
876
        // Change state to RECYCLING
877
2
        std::unique_ptr<Transaction> txn;
878
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
879
2
        if (err != TxnErrorCode::TXN_OK) {
880
0
            LOG_WARNING("failed to create txn").tag("err", err);
881
0
            return -1;
882
0
        }
883
2
        std::string val;
884
2
        err = txn->get(k, &val);
885
2
        if (err ==
886
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
887
0
            LOG_INFO("index {} has been recycled or committed", index_id);
888
0
            return 0;
889
0
        }
890
2
        if (err != TxnErrorCode::TXN_OK) {
891
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
892
0
            return -1;
893
0
        }
894
2
        index_pb.Clear();
895
2
        if (!index_pb.ParseFromString(val)) {
896
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
897
0
            return -1;
898
0
        }
899
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
900
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
901
1
            txn->put(k, index_pb.SerializeAsString());
902
1
            err = txn->commit();
903
1
            if (err != TxnErrorCode::TXN_OK) {
904
0
                LOG_WARNING("failed to commit txn").tag("err", err);
905
0
                return -1;
906
0
            }
907
1
        }
908
2
        if (recycle_tablets(index_pb.table_id(), index_id) != 0) {
909
1
            LOG_WARNING("failed to recycle tablets under index")
910
1
                    .tag("table_id", index_pb.table_id())
911
1
                    .tag("instance_id", instance_id_)
912
1
                    .tag("index_id", index_id);
913
1
            return -1;
914
1
        }
915
1
        ++num_recycled;
916
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
917
1
        index_keys.push_back(k);
918
1
        return 0;
919
2
    };
920
921
14
    auto loop_done = [&index_keys, this]() -> int {
922
4
        if (index_keys.empty()) return 0;
923
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
924
3
                                                              [&](int*) { index_keys.clear(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
924
2
                                                              [&](int*) { index_keys.clear(); });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
924
1
                                                              [&](int*) { index_keys.clear(); });
925
3
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
926
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
927
0
            return -1;
928
0
        }
929
3
        return 0;
930
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEv
Line
Count
Source
921
2
    auto loop_done = [&index_keys, this]() -> int {
922
2
        if (index_keys.empty()) return 0;
923
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
924
2
                                                              [&](int*) { index_keys.clear(); });
925
2
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
926
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
927
0
            return -1;
928
0
        }
929
2
        return 0;
930
2
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEv
Line
Count
Source
921
2
    auto loop_done = [&index_keys, this]() -> int {
922
2
        if (index_keys.empty()) return 0;
923
1
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
924
1
                                                              [&](int*) { index_keys.clear(); });
925
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
926
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
927
0
            return -1;
928
0
        }
929
1
        return 0;
930
1
    };
931
932
14
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
933
14
}
934
935
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
936
271
                             int64_t tablet_id) {
937
271
    std::unique_ptr<Transaction> txn;
938
271
    TxnErrorCode err = txn_kv->create_txn(&txn);
939
271
    if (err != TxnErrorCode::TXN_OK) {
940
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
941
0
                     << " tablet_id=" << tablet_id << " err=" << err;
942
0
        return false;
943
0
    }
944
945
271
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
946
271
    std::string tablet_idx_val;
947
271
    err = txn->get(tablet_idx_key, &tablet_idx_val);
948
271
    if (TxnErrorCode::TXN_OK != err) {
949
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
950
0
                     << " tablet_id=" << tablet_id << " err=" << err
951
0
                     << " key=" << hex(tablet_idx_key);
952
0
        return false;
953
0
    }
954
955
271
    TabletIndexPB tablet_idx_pb;
956
271
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
957
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
958
0
                     << " tablet_id=" << tablet_id;
959
0
        return false;
960
0
    }
961
962
271
    if (!tablet_idx_pb.has_db_id()) {
963
        // In the previous version, the db_id was not set in the index_pb.
964
        // If updating to the version which enable txn lazy commit, the db_id will be set.
965
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
966
0
                  << " instance_id=" << instance_id
967
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
968
0
        return true;
969
0
    }
970
971
271
    std::string ver_val;
972
271
    std::string ver_key =
973
271
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
974
271
                                   tablet_idx_pb.partition_id()});
975
271
    err = txn->get(ver_key, &ver_val);
976
977
271
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
978
0
        LOG(INFO) << ""
979
0
                     "partition version not found, instance_id="
980
0
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
981
0
                  << " table_id=" << tablet_idx_pb.table_id()
982
0
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
983
0
                  << " key=" << hex(ver_key);
984
0
        return true;
985
0
    }
986
987
271
    if (TxnErrorCode::TXN_OK != err) {
988
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
989
0
                     << " db_id=" << tablet_idx_pb.db_id()
990
0
                     << " table_id=" << tablet_idx_pb.table_id()
991
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
992
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
993
0
        return false;
994
0
    }
995
996
271
    VersionPB version_pb;
997
271
    if (!version_pb.ParseFromString(ver_val)) {
998
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
999
0
                     << " db_id=" << tablet_idx_pb.db_id()
1000
0
                     << " table_id=" << tablet_idx_pb.table_id()
1001
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1002
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
1003
0
        return false;
1004
0
    }
1005
1006
271
    if (version_pb.pending_txn_ids_size() > 0) {
1007
20
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
1008
20
        DCHECK(version_pb.pending_txn_ids_size() == 1);
1009
20
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
1010
20
                     << " db_id=" << tablet_idx_pb.db_id()
1011
20
                     << " table_id=" << tablet_idx_pb.table_id()
1012
20
                     << " partition_id=" << tablet_idx_pb.partition_id()
1013
20
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
1014
20
                     << " key=" << hex(ver_key);
1015
20
        return false;
1016
20
    }
1017
251
    return true;
1018
271
}
1019
1020
14
int InstanceRecycler::recycle_partitions() {
1021
14
    const std::string task_name = "recycle_partitions";
1022
14
    int64_t num_scanned = 0;
1023
14
    int64_t num_expired = 0;
1024
14
    int64_t num_recycled = 0;
1025
1026
14
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
1027
14
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
1028
14
    std::string part_key0;
1029
14
    std::string part_key1;
1030
14
    recycle_partition_key(part_key_info0, &part_key0);
1031
14
    recycle_partition_key(part_key_info1, &part_key1);
1032
1033
14
    LOG_INFO("begin to recycle partitions").tag("instance_id", instance_id_);
1034
1035
14
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1036
14
    register_recycle_task(task_name, start_time);
1037
1038
14
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1039
14
        unregister_recycle_task(task_name);
1040
14
        int64_t cost =
1041
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1042
14
        LOG_INFO("recycle partitions finished, cost={}s", cost)
1043
14
                .tag("instance_id", instance_id_)
1044
14
                .tag("num_scanned", num_scanned)
1045
14
                .tag("num_expired", num_expired)
1046
14
                .tag("num_recycled", num_recycled);
1047
14
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEPi
Line
Count
Source
1038
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1039
12
        unregister_recycle_task(task_name);
1040
12
        int64_t cost =
1041
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1042
12
        LOG_INFO("recycle partitions finished, cost={}s", cost)
1043
12
                .tag("instance_id", instance_id_)
1044
12
                .tag("num_scanned", num_scanned)
1045
12
                .tag("num_expired", num_expired)
1046
12
                .tag("num_recycled", num_recycled);
1047
12
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEPi
Line
Count
Source
1038
2
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1039
2
        unregister_recycle_task(task_name);
1040
2
        int64_t cost =
1041
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1042
2
        LOG_INFO("recycle partitions finished, cost={}s", cost)
1043
2
                .tag("instance_id", instance_id_)
1044
2
                .tag("num_scanned", num_scanned)
1045
2
                .tag("num_expired", num_expired)
1046
2
                .tag("num_recycled", num_recycled);
1047
2
    });
1048
1049
14
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1050
1051
14
    auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) {
1052
8
        if (config::force_immediate_recycle) {
1053
2
            return 0L;
1054
2
        }
1055
6
        int64_t expiration =
1056
6
                partition.expiration() > 0 ? partition.expiration() : partition.creation_time();
1057
6
        int64_t retention_seconds = config::retention_seconds;
1058
6
        if (partition.state() == RecyclePartitionPB::DROPPED) {
1059
6
            retention_seconds =
1060
6
                    std::min(config::dropped_partition_retention_seconds, retention_seconds);
1061
6
        }
1062
6
        int64_t final_expiration = expiration + retention_seconds;
1063
6
        if (earlest_ts > final_expiration) {
1064
2
            earlest_ts = final_expiration;
1065
2
            g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts);
1066
2
        }
1067
6
        return final_expiration;
1068
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_3clERKNS0_18RecyclePartitionPBE
Line
Count
Source
1051
6
    auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) {
1052
6
        if (config::force_immediate_recycle) {
1053
0
            return 0L;
1054
0
        }
1055
6
        int64_t expiration =
1056
6
                partition.expiration() > 0 ? partition.expiration() : partition.creation_time();
1057
6
        int64_t retention_seconds = config::retention_seconds;
1058
6
        if (partition.state() == RecyclePartitionPB::DROPPED) {
1059
6
            retention_seconds =
1060
6
                    std::min(config::dropped_partition_retention_seconds, retention_seconds);
1061
6
        }
1062
6
        int64_t final_expiration = expiration + retention_seconds;
1063
6
        if (earlest_ts > final_expiration) {
1064
2
            earlest_ts = final_expiration;
1065
2
            g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts);
1066
2
        }
1067
6
        return final_expiration;
1068
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_3clERKNS0_18RecyclePartitionPBE
Line
Count
Source
1051
2
    auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) {
1052
2
        if (config::force_immediate_recycle) {
1053
2
            return 0L;
1054
2
        }
1055
0
        int64_t expiration =
1056
0
                partition.expiration() > 0 ? partition.expiration() : partition.creation_time();
1057
0
        int64_t retention_seconds = config::retention_seconds;
1058
0
        if (partition.state() == RecyclePartitionPB::DROPPED) {
1059
0
            retention_seconds =
1060
0
                    std::min(config::dropped_partition_retention_seconds, retention_seconds);
1061
0
        }
1062
0
        int64_t final_expiration = expiration + retention_seconds;
1063
0
        if (earlest_ts > final_expiration) {
1064
0
            earlest_ts = final_expiration;
1065
0
            g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts);
1066
0
        }
1067
0
        return final_expiration;
1068
2
    };
1069
1070
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
1071
14
    std::vector<std::string_view> partition_keys;
1072
14
    std::vector<std::string> partition_version_keys;
1073
14
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1074
8
        ++num_scanned;
1075
8
        RecyclePartitionPB part_pb;
1076
8
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1077
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1078
0
            return -1;
1079
0
        }
1080
8
        int64_t current_time = ::time(nullptr);
1081
8
        if (current_time < calc_expiration(part_pb)) { // not expired
1082
0
            return 0;
1083
0
        }
1084
8
        ++num_expired;
1085
        // decode partition_id
1086
8
        auto k1 = k;
1087
8
        k1.remove_prefix(1);
1088
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1089
8
        decode_key(&k1, &out);
1090
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1091
8
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1092
8
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1093
8
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1094
8
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1095
        // Change state to RECYCLING
1096
8
        std::unique_ptr<Transaction> txn;
1097
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1098
8
        if (err != TxnErrorCode::TXN_OK) {
1099
0
            LOG_WARNING("failed to create txn").tag("err", err);
1100
0
            return -1;
1101
0
        }
1102
8
        std::string val;
1103
8
        err = txn->get(k, &val);
1104
8
        if (err ==
1105
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1106
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1107
0
            return 0;
1108
0
        }
1109
8
        if (err != TxnErrorCode::TXN_OK) {
1110
0
            LOG_WARNING("failed to get kv");
1111
0
            return -1;
1112
0
        }
1113
8
        part_pb.Clear();
1114
8
        if (!part_pb.ParseFromString(val)) {
1115
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1116
0
            return -1;
1117
0
        }
1118
        // Partitions with PREPARED state MUST have no data
1119
8
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1120
8
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1121
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1122
7
            txn->put(k, part_pb.SerializeAsString());
1123
7
            err = txn->commit();
1124
7
            if (err != TxnErrorCode::TXN_OK) {
1125
0
                LOG_WARNING("failed to commit txn: {}", err);
1126
0
                return -1;
1127
0
            }
1128
7
        }
1129
1130
8
        int ret = 0;
1131
32
        for (int64_t index_id : part_pb.index_id()) {
1132
32
            if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) {
1133
1
                LOG_WARNING("failed to recycle tablets under partition")
1134
1
                        .tag("table_id", part_pb.table_id())
1135
1
                        .tag("instance_id", instance_id_)
1136
1
                        .tag("index_id", index_id)
1137
1
                        .tag("partition_id", partition_id);
1138
1
                ret = -1;
1139
1
            }
1140
32
        }
1141
8
        if (ret == 0) {
1142
7
            ++num_recycled;
1143
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1144
7
            partition_keys.push_back(k);
1145
7
            if (part_pb.db_id() > 0) {
1146
7
                partition_version_keys.push_back(partition_version_key(
1147
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1148
7
            }
1149
7
        }
1150
8
        return ret;
1151
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1073
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1074
6
        ++num_scanned;
1075
6
        RecyclePartitionPB part_pb;
1076
6
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1077
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1078
0
            return -1;
1079
0
        }
1080
6
        int64_t current_time = ::time(nullptr);
1081
6
        if (current_time < calc_expiration(part_pb)) { // not expired
1082
0
            return 0;
1083
0
        }
1084
6
        ++num_expired;
1085
        // decode partition_id
1086
6
        auto k1 = k;
1087
6
        k1.remove_prefix(1);
1088
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1089
6
        decode_key(&k1, &out);
1090
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1091
6
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1092
6
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1093
6
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1094
6
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1095
        // Change state to RECYCLING
1096
6
        std::unique_ptr<Transaction> txn;
1097
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1098
6
        if (err != TxnErrorCode::TXN_OK) {
1099
0
            LOG_WARNING("failed to create txn").tag("err", err);
1100
0
            return -1;
1101
0
        }
1102
6
        std::string val;
1103
6
        err = txn->get(k, &val);
1104
6
        if (err ==
1105
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1106
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1107
0
            return 0;
1108
0
        }
1109
6
        if (err != TxnErrorCode::TXN_OK) {
1110
0
            LOG_WARNING("failed to get kv");
1111
0
            return -1;
1112
0
        }
1113
6
        part_pb.Clear();
1114
6
        if (!part_pb.ParseFromString(val)) {
1115
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1116
0
            return -1;
1117
0
        }
1118
        // Partitions with PREPARED state MUST have no data
1119
6
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1120
6
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1121
6
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1122
6
            txn->put(k, part_pb.SerializeAsString());
1123
6
            err = txn->commit();
1124
6
            if (err != TxnErrorCode::TXN_OK) {
1125
0
                LOG_WARNING("failed to commit txn: {}", err);
1126
0
                return -1;
1127
0
            }
1128
6
        }
1129
1130
6
        int ret = 0;
1131
30
        for (int64_t index_id : part_pb.index_id()) {
1132
30
            if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) {
1133
0
                LOG_WARNING("failed to recycle tablets under partition")
1134
0
                        .tag("table_id", part_pb.table_id())
1135
0
                        .tag("instance_id", instance_id_)
1136
0
                        .tag("index_id", index_id)
1137
0
                        .tag("partition_id", partition_id);
1138
0
                ret = -1;
1139
0
            }
1140
30
        }
1141
6
        if (ret == 0) {
1142
6
            ++num_recycled;
1143
6
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1144
6
            partition_keys.push_back(k);
1145
6
            if (part_pb.db_id() > 0) {
1146
6
                partition_version_keys.push_back(partition_version_key(
1147
6
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1148
6
            }
1149
6
        }
1150
6
        return ret;
1151
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1073
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1074
2
        ++num_scanned;
1075
2
        RecyclePartitionPB part_pb;
1076
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1077
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1078
0
            return -1;
1079
0
        }
1080
2
        int64_t current_time = ::time(nullptr);
1081
2
        if (current_time < calc_expiration(part_pb)) { // not expired
1082
0
            return 0;
1083
0
        }
1084
2
        ++num_expired;
1085
        // decode partition_id
1086
2
        auto k1 = k;
1087
2
        k1.remove_prefix(1);
1088
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1089
2
        decode_key(&k1, &out);
1090
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1091
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1092
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1093
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1094
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1095
        // Change state to RECYCLING
1096
2
        std::unique_ptr<Transaction> txn;
1097
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1098
2
        if (err != TxnErrorCode::TXN_OK) {
1099
0
            LOG_WARNING("failed to create txn").tag("err", err);
1100
0
            return -1;
1101
0
        }
1102
2
        std::string val;
1103
2
        err = txn->get(k, &val);
1104
2
        if (err ==
1105
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1106
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1107
0
            return 0;
1108
0
        }
1109
2
        if (err != TxnErrorCode::TXN_OK) {
1110
0
            LOG_WARNING("failed to get kv");
1111
0
            return -1;
1112
0
        }
1113
2
        part_pb.Clear();
1114
2
        if (!part_pb.ParseFromString(val)) {
1115
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1116
0
            return -1;
1117
0
        }
1118
        // Partitions with PREPARED state MUST have no data
1119
2
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1120
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1121
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1122
1
            txn->put(k, part_pb.SerializeAsString());
1123
1
            err = txn->commit();
1124
1
            if (err != TxnErrorCode::TXN_OK) {
1125
0
                LOG_WARNING("failed to commit txn: {}", err);
1126
0
                return -1;
1127
0
            }
1128
1
        }
1129
1130
2
        int ret = 0;
1131
2
        for (int64_t index_id : part_pb.index_id()) {
1132
2
            if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) {
1133
1
                LOG_WARNING("failed to recycle tablets under partition")
1134
1
                        .tag("table_id", part_pb.table_id())
1135
1
                        .tag("instance_id", instance_id_)
1136
1
                        .tag("index_id", index_id)
1137
1
                        .tag("partition_id", partition_id);
1138
1
                ret = -1;
1139
1
            }
1140
2
        }
1141
2
        if (ret == 0) {
1142
1
            ++num_recycled;
1143
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1144
1
            partition_keys.push_back(k);
1145
1
            if (part_pb.db_id() > 0) {
1146
1
                partition_version_keys.push_back(partition_version_key(
1147
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1148
1
            }
1149
1
        }
1150
2
        return ret;
1151
2
    };
1152
1153
14
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1154
4
        if (partition_keys.empty()) return 0;
1155
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1156
3
            partition_keys.clear();
1157
3
            partition_version_keys.clear();
1158
3
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
1155
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1156
2
            partition_keys.clear();
1157
2
            partition_version_keys.clear();
1158
2
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
1155
1
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1156
1
            partition_keys.clear();
1157
1
            partition_version_keys.clear();
1158
1
        });
1159
3
        std::unique_ptr<Transaction> txn;
1160
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1161
3
        if (err != TxnErrorCode::TXN_OK) {
1162
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1163
0
            return -1;
1164
0
        }
1165
7
        for (auto& k : partition_keys) {
1166
7
            txn->remove(k);
1167
7
        }
1168
7
        for (auto& k : partition_version_keys) {
1169
7
            txn->remove(k);
1170
7
        }
1171
3
        err = txn->commit();
1172
3
        if (err != TxnErrorCode::TXN_OK) {
1173
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1174
0
                         << " err=" << err;
1175
0
            return -1;
1176
0
        }
1177
3
        return 0;
1178
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEv
Line
Count
Source
1153
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1154
2
        if (partition_keys.empty()) return 0;
1155
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1156
2
            partition_keys.clear();
1157
2
            partition_version_keys.clear();
1158
2
        });
1159
2
        std::unique_ptr<Transaction> txn;
1160
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1161
2
        if (err != TxnErrorCode::TXN_OK) {
1162
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1163
0
            return -1;
1164
0
        }
1165
6
        for (auto& k : partition_keys) {
1166
6
            txn->remove(k);
1167
6
        }
1168
6
        for (auto& k : partition_version_keys) {
1169
6
            txn->remove(k);
1170
6
        }
1171
2
        err = txn->commit();
1172
2
        if (err != TxnErrorCode::TXN_OK) {
1173
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1174
0
                         << " err=" << err;
1175
0
            return -1;
1176
0
        }
1177
2
        return 0;
1178
2
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEv
Line
Count
Source
1153
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1154
2
        if (partition_keys.empty()) return 0;
1155
1
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1156
1
            partition_keys.clear();
1157
1
            partition_version_keys.clear();
1158
1
        });
1159
1
        std::unique_ptr<Transaction> txn;
1160
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1161
1
        if (err != TxnErrorCode::TXN_OK) {
1162
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1163
0
            return -1;
1164
0
        }
1165
1
        for (auto& k : partition_keys) {
1166
1
            txn->remove(k);
1167
1
        }
1168
1
        for (auto& k : partition_version_keys) {
1169
1
            txn->remove(k);
1170
1
        }
1171
1
        err = txn->commit();
1172
1
        if (err != TxnErrorCode::TXN_OK) {
1173
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1174
0
                         << " err=" << err;
1175
0
            return -1;
1176
0
        }
1177
1
        return 0;
1178
1
    };
1179
1180
14
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
1181
14
}
1182
1183
12
int InstanceRecycler::recycle_versions() {
1184
12
    int64_t num_scanned = 0;
1185
12
    int64_t num_recycled = 0;
1186
1187
12
    LOG_INFO("begin to recycle table and partition versions").tag("instance_id", instance_id_);
1188
1189
12
    auto start_time = steady_clock::now();
1190
1191
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1192
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1193
12
        LOG_INFO("recycle table and partition versions finished, cost={}s", cost)
1194
12
                .tag("instance_id", instance_id_)
1195
12
                .tag("num_scanned", num_scanned)
1196
12
                .tag("num_recycled", num_recycled);
1197
12
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEPi
Line
Count
Source
1191
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1192
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1193
12
        LOG_INFO("recycle table and partition versions finished, cost={}s", cost)
1194
12
                .tag("instance_id", instance_id_)
1195
12
                .tag("num_scanned", num_scanned)
1196
12
                .tag("num_recycled", num_recycled);
1197
12
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEPi
1198
1199
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
1200
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
1201
12
    int64_t last_scanned_table_id = 0;
1202
12
    bool is_recycled = false; // Is last scanned kv recycled
1203
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled, this](
1204
12
                                std::string_view k, std::string_view) {
1205
2
        ++num_scanned;
1206
2
        auto k1 = k;
1207
2
        k1.remove_prefix(1);
1208
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1209
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1210
2
        decode_key(&k1, &out);
1211
2
        DCHECK_EQ(out.size(), 6) << k;
1212
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1213
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1214
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1215
0
            return 0;
1216
0
        }
1217
2
        last_scanned_table_id = table_id;
1218
2
        is_recycled = false;
1219
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1220
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1221
2
        std::unique_ptr<Transaction> txn;
1222
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1223
2
        if (err != TxnErrorCode::TXN_OK) {
1224
0
            return -1;
1225
0
        }
1226
2
        std::unique_ptr<RangeGetIterator> iter;
1227
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1228
2
        if (err != TxnErrorCode::TXN_OK) {
1229
0
            return -1;
1230
0
        }
1231
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1232
1
            return 0;
1233
1
        }
1234
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1235
        // 1. Remove all partition version kvs of this table
1236
1
        auto partition_version_key_begin =
1237
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1238
1
        auto partition_version_key_end =
1239
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1240
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1241
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1242
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1243
1
                     << " table_id=" << table_id;
1244
        // 2. Remove the table version kv of this table
1245
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1246
1
        txn->remove(tbl_version_key);
1247
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1248
        // 3. Remove mow delete bitmap update lock and tablet compaction lock
1249
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1250
1
        txn->remove(lock_key);
1251
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1252
1
        std::string tablet_compaction_key_begin =
1253
1
                mow_tablet_compaction_key({instance_id_, table_id, 0});
1254
1
        std::string tablet_compaction_key_end =
1255
1
                mow_tablet_compaction_key({instance_id_, table_id, INT64_MAX});
1256
1
        txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end);
1257
1
        LOG(WARNING) << "remove mow tablet compaction kv, begin="
1258
1
                     << hex(tablet_compaction_key_begin)
1259
1
                     << " end=" << hex(tablet_compaction_key_end) << " db_id=" << db_id
1260
1
                     << " table_id=" << table_id;
1261
1
        err = txn->commit();
1262
1
        if (err != TxnErrorCode::TXN_OK) {
1263
0
            return -1;
1264
0
        }
1265
1
        ++num_recycled;
1266
1
        is_recycled = true;
1267
1
        return 0;
1268
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1204
2
                                std::string_view k, std::string_view) {
1205
2
        ++num_scanned;
1206
2
        auto k1 = k;
1207
2
        k1.remove_prefix(1);
1208
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1209
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1210
2
        decode_key(&k1, &out);
1211
2
        DCHECK_EQ(out.size(), 6) << k;
1212
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1213
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1214
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1215
0
            return 0;
1216
0
        }
1217
2
        last_scanned_table_id = table_id;
1218
2
        is_recycled = false;
1219
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1220
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1221
2
        std::unique_ptr<Transaction> txn;
1222
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1223
2
        if (err != TxnErrorCode::TXN_OK) {
1224
0
            return -1;
1225
0
        }
1226
2
        std::unique_ptr<RangeGetIterator> iter;
1227
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1228
2
        if (err != TxnErrorCode::TXN_OK) {
1229
0
            return -1;
1230
0
        }
1231
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1232
1
            return 0;
1233
1
        }
1234
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1235
        // 1. Remove all partition version kvs of this table
1236
1
        auto partition_version_key_begin =
1237
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1238
1
        auto partition_version_key_end =
1239
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1240
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1241
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1242
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1243
1
                     << " table_id=" << table_id;
1244
        // 2. Remove the table version kv of this table
1245
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1246
1
        txn->remove(tbl_version_key);
1247
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1248
        // 3. Remove mow delete bitmap update lock and tablet compaction lock
1249
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1250
1
        txn->remove(lock_key);
1251
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1252
1
        std::string tablet_compaction_key_begin =
1253
1
                mow_tablet_compaction_key({instance_id_, table_id, 0});
1254
1
        std::string tablet_compaction_key_end =
1255
1
                mow_tablet_compaction_key({instance_id_, table_id, INT64_MAX});
1256
1
        txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end);
1257
1
        LOG(WARNING) << "remove mow tablet compaction kv, begin="
1258
1
                     << hex(tablet_compaction_key_begin)
1259
1
                     << " end=" << hex(tablet_compaction_key_end) << " db_id=" << db_id
1260
1
                     << " table_id=" << table_id;
1261
1
        err = txn->commit();
1262
1
        if (err != TxnErrorCode::TXN_OK) {
1263
0
            return -1;
1264
0
        }
1265
1
        ++num_recycled;
1266
1
        is_recycled = true;
1267
1
        return 0;
1268
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1269
1270
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
1271
12
}
1272
1273
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, int64_t partition_id,
1274
41
                                      bool is_empty_tablet) {
1275
41
    int64_t num_scanned = 0;
1276
41
    std::atomic_long num_recycled = 0;
1277
1278
41
    std::string tablet_key_begin, tablet_key_end;
1279
41
    std::string stats_key_begin, stats_key_end;
1280
41
    std::string job_key_begin, job_key_end;
1281
1282
41
    std::string tablet_belongs;
1283
41
    if (partition_id > 0) {
1284
        // recycle tablets in a partition belonging to the index
1285
32
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
1286
32
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
1287
32
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
1288
32
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
1289
32
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
1290
32
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
1291
32
        tablet_belongs = "partition";
1292
32
    } else {
1293
        // recycle tablets in the index
1294
9
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
1295
9
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
1296
9
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
1297
9
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
1298
9
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
1299
9
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
1300
9
        tablet_belongs = "index";
1301
9
    }
1302
1303
41
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
1304
41
            .tag("table_id", table_id)
1305
41
            .tag("index_id", index_id)
1306
41
            .tag("partition_id", partition_id);
1307
1308
41
    auto start_time = steady_clock::now();
1309
1310
41
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1311
41
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1312
41
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1313
41
                .tag("instance_id", instance_id_)
1314
41
                .tag("table_id", table_id)
1315
41
                .tag("index_id", index_id)
1316
41
                .tag("partition_id", partition_id)
1317
41
                .tag("num_scanned", num_scanned)
1318
41
                .tag("num_recycled", num_recycled);
1319
41
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_1clEPi
Line
Count
Source
1310
37
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1311
37
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1312
37
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1313
37
                .tag("instance_id", instance_id_)
1314
37
                .tag("table_id", table_id)
1315
37
                .tag("index_id", index_id)
1316
37
                .tag("partition_id", partition_id)
1317
37
                .tag("num_scanned", num_scanned)
1318
37
                .tag("num_recycled", num_recycled);
1319
37
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_1clEPi
Line
Count
Source
1310
4
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1311
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1312
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1313
4
                .tag("instance_id", instance_id_)
1314
4
                .tag("table_id", table_id)
1315
4
                .tag("index_id", index_id)
1316
4
                .tag("partition_id", partition_id)
1317
4
                .tag("num_scanned", num_scanned)
1318
4
                .tag("num_recycled", num_recycled);
1319
4
    });
1320
1321
    // The first string_view represents the tablet key which has been recycled
1322
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
1323
41
    using TabletKeyPair = std::pair<std::string_view, bool>;
1324
41
    SyncExecutor<TabletKeyPair> sync_executor(
1325
41
            _thread_pool_group.recycle_tablet_pool,
1326
41
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
1327
41
                        index_id, partition_id),
1328
251
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1328
231
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1328
20
            [](const TabletKeyPair& k) { return k.first.empty(); });
1329
1330
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
1331
41
    std::vector<std::string> tablet_idx_keys;
1332
41
    std::vector<std::string> init_rs_keys;
1333
271
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1334
271
        bool use_range_remove = true;
1335
271
        ++num_scanned;
1336
271
        doris::TabletMetaCloudPB tablet_meta_pb;
1337
271
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1338
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1339
0
            use_range_remove = false;
1340
0
            return -1;
1341
0
        }
1342
271
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1343
1344
271
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1345
20
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1346
20
            return -1;
1347
20
        }
1348
1349
251
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1350
251
        if (!is_empty_tablet) {
1351
251
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1352
251
                               k]() mutable -> TabletKeyPair {
1353
251
                if (recycle_tablet(tid) != 0) {
1354
0
                    LOG_WARNING("failed to recycle tablet")
1355
0
                            .tag("instance_id", instance_id_)
1356
0
                            .tag("tablet_id", tid);
1357
0
                    range_move = false;
1358
0
                    return {std::string_view(), range_move};
1359
0
                }
1360
251
                ++num_recycled;
1361
251
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1362
251
                return {k, range_move};
1363
251
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE_clEv
Line
Count
Source
1352
231
                               k]() mutable -> TabletKeyPair {
1353
231
                if (recycle_tablet(tid) != 0) {
1354
0
                    LOG_WARNING("failed to recycle tablet")
1355
0
                            .tag("instance_id", instance_id_)
1356
0
                            .tag("tablet_id", tid);
1357
0
                    range_move = false;
1358
0
                    return {std::string_view(), range_move};
1359
0
                }
1360
231
                ++num_recycled;
1361
231
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1362
231
                return {k, range_move};
1363
231
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE_clEv
Line
Count
Source
1352
20
                               k]() mutable -> TabletKeyPair {
1353
20
                if (recycle_tablet(tid) != 0) {
1354
0
                    LOG_WARNING("failed to recycle tablet")
1355
0
                            .tag("instance_id", instance_id_)
1356
0
                            .tag("tablet_id", tid);
1357
0
                    range_move = false;
1358
0
                    return {std::string_view(), range_move};
1359
0
                }
1360
20
                ++num_recycled;
1361
20
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1362
20
                return {k, range_move};
1363
20
            });
1364
251
        } else {
1365
            // Empty tablet only has a [0-1] init rowset
1366
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1367
0
            DCHECK([&]() {
1368
0
                std::unique_ptr<Transaction> txn;
1369
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1370
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1371
0
                    return false;
1372
0
                }
1373
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1374
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1375
0
                std::unique_ptr<RangeGetIterator> iter;
1376
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1377
0
                    err != TxnErrorCode::TXN_OK) {
1378
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1379
0
                    return false;
1380
0
                }
1381
0
                if (iter->has_next()) {
1382
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1383
0
                    return false;
1384
0
                }
1385
0
                return true;
1386
0
            }());
1387
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1388
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1389
0
                return {k, true};
1390
0
            });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE1_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE1_clEv
1391
0
            ++num_recycled;
1392
0
        }
1393
251
        return 0;
1394
271
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1333
231
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1334
231
        bool use_range_remove = true;
1335
231
        ++num_scanned;
1336
231
        doris::TabletMetaCloudPB tablet_meta_pb;
1337
231
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1338
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1339
0
            use_range_remove = false;
1340
0
            return -1;
1341
0
        }
1342
231
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1343
1344
231
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1345
0
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1346
0
            return -1;
1347
0
        }
1348
1349
231
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1350
231
        if (!is_empty_tablet) {
1351
231
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1352
231
                               k]() mutable -> TabletKeyPair {
1353
231
                if (recycle_tablet(tid) != 0) {
1354
231
                    LOG_WARNING("failed to recycle tablet")
1355
231
                            .tag("instance_id", instance_id_)
1356
231
                            .tag("tablet_id", tid);
1357
231
                    range_move = false;
1358
231
                    return {std::string_view(), range_move};
1359
231
                }
1360
231
                ++num_recycled;
1361
231
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1362
231
                return {k, range_move};
1363
231
            });
1364
231
        } else {
1365
            // Empty tablet only has a [0-1] init rowset
1366
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1367
0
            DCHECK([&]() {
1368
0
                std::unique_ptr<Transaction> txn;
1369
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1370
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1371
0
                    return false;
1372
0
                }
1373
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1374
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1375
0
                std::unique_ptr<RangeGetIterator> iter;
1376
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1377
0
                    err != TxnErrorCode::TXN_OK) {
1378
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1379
0
                    return false;
1380
0
                }
1381
0
                if (iter->has_next()) {
1382
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1383
0
                    return false;
1384
0
                }
1385
0
                return true;
1386
0
            }());
1387
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1388
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1389
0
                return {k, true};
1390
0
            });
1391
0
            ++num_recycled;
1392
0
        }
1393
231
        return 0;
1394
231
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1333
40
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1334
40
        bool use_range_remove = true;
1335
40
        ++num_scanned;
1336
40
        doris::TabletMetaCloudPB tablet_meta_pb;
1337
40
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1338
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1339
0
            use_range_remove = false;
1340
0
            return -1;
1341
0
        }
1342
40
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1343
1344
40
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1345
20
            LOG(WARNING) << "lazy txn not finished tablet_meta_pb=" << tablet_meta_pb.tablet_id();
1346
20
            return -1;
1347
20
        }
1348
1349
20
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1350
20
        if (!is_empty_tablet) {
1351
20
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1352
20
                               k]() mutable -> TabletKeyPair {
1353
20
                if (recycle_tablet(tid) != 0) {
1354
20
                    LOG_WARNING("failed to recycle tablet")
1355
20
                            .tag("instance_id", instance_id_)
1356
20
                            .tag("tablet_id", tid);
1357
20
                    range_move = false;
1358
20
                    return {std::string_view(), range_move};
1359
20
                }
1360
20
                ++num_recycled;
1361
20
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1362
20
                return {k, range_move};
1363
20
            });
1364
20
        } else {
1365
            // Empty tablet only has a [0-1] init rowset
1366
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1367
0
            DCHECK([&]() {
1368
0
                std::unique_ptr<Transaction> txn;
1369
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1370
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1371
0
                    return false;
1372
0
                }
1373
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1374
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1375
0
                std::unique_ptr<RangeGetIterator> iter;
1376
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1377
0
                    err != TxnErrorCode::TXN_OK) {
1378
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1379
0
                    return false;
1380
0
                }
1381
0
                if (iter->has_next()) {
1382
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1383
0
                    return false;
1384
0
                }
1385
0
                return true;
1386
0
            }());
1387
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1388
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1389
0
                return {k, true};
1390
0
            });
1391
0
            ++num_recycled;
1392
0
        }
1393
20
        return 0;
1394
40
    };
1395
1396
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
1397
41
    auto loop_done = [&, this]() -> int {
1398
41
        bool finished = true;
1399
41
        auto tablet_keys = sync_executor.when_all(&finished);
1400
41
        if (!finished) {
1401
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1402
0
            return -1;
1403
0
        }
1404
41
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1405
        // sort the vector using key's order
1406
39
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1407
980
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESG_EEDaS5_S8_
Line
Count
Source
1407
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESG_EEDaS5_S8_
Line
Count
Source
1407
36
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1408
39
        bool use_range_remove = true;
1409
251
        for (auto& [_, remove] : tablet_keys) {
1410
251
            if (!remove) {
1411
0
                use_range_remove = remove;
1412
0
                break;
1413
0
            }
1414
251
        }
1415
39
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1416
39
            tablet_idx_keys.clear();
1417
39
            init_rs_keys.clear();
1418
39
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlPiE_clES3_
Line
Count
Source
1415
37
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1416
37
            tablet_idx_keys.clear();
1417
37
            init_rs_keys.clear();
1418
37
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlPiE_clES3_
Line
Count
Source
1415
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1416
2
            tablet_idx_keys.clear();
1417
2
            init_rs_keys.clear();
1418
2
        });
1419
39
        std::unique_ptr<Transaction> txn;
1420
39
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1421
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1422
0
            return -1;
1423
0
        }
1424
39
        std::string tablet_key_end;
1425
39
        if (!tablet_keys.empty()) {
1426
39
            if (use_range_remove) {
1427
39
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1428
39
                txn->remove(tablet_keys.front().first, tablet_key_end);
1429
39
            } else {
1430
0
                for (auto& [k, _] : tablet_keys) {
1431
0
                    txn->remove(k);
1432
0
                }
1433
0
            }
1434
39
        }
1435
251
        for (auto& k : tablet_idx_keys) {
1436
251
            txn->remove(k);
1437
251
        }
1438
39
        for (auto& k : init_rs_keys) {
1439
0
            txn->remove(k);
1440
0
        }
1441
39
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1442
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1443
0
                         << ", err=" << err;
1444
0
            return -1;
1445
0
        }
1446
39
        return 0;
1447
39
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEv
Line
Count
Source
1397
37
    auto loop_done = [&, this]() -> int {
1398
37
        bool finished = true;
1399
37
        auto tablet_keys = sync_executor.when_all(&finished);
1400
37
        if (!finished) {
1401
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1402
0
            return -1;
1403
0
        }
1404
37
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1405
        // sort the vector using key's order
1406
37
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1407
37
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1408
37
        bool use_range_remove = true;
1409
231
        for (auto& [_, remove] : tablet_keys) {
1410
231
            if (!remove) {
1411
0
                use_range_remove = remove;
1412
0
                break;
1413
0
            }
1414
231
        }
1415
37
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1416
37
            tablet_idx_keys.clear();
1417
37
            init_rs_keys.clear();
1418
37
        });
1419
37
        std::unique_ptr<Transaction> txn;
1420
37
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1421
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1422
0
            return -1;
1423
0
        }
1424
37
        std::string tablet_key_end;
1425
37
        if (!tablet_keys.empty()) {
1426
37
            if (use_range_remove) {
1427
37
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1428
37
                txn->remove(tablet_keys.front().first, tablet_key_end);
1429
37
            } else {
1430
0
                for (auto& [k, _] : tablet_keys) {
1431
0
                    txn->remove(k);
1432
0
                }
1433
0
            }
1434
37
        }
1435
231
        for (auto& k : tablet_idx_keys) {
1436
231
            txn->remove(k);
1437
231
        }
1438
37
        for (auto& k : init_rs_keys) {
1439
0
            txn->remove(k);
1440
0
        }
1441
37
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1442
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1443
0
                         << ", err=" << err;
1444
0
            return -1;
1445
0
        }
1446
37
        return 0;
1447
37
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEv
Line
Count
Source
1397
4
    auto loop_done = [&, this]() -> int {
1398
4
        bool finished = true;
1399
4
        auto tablet_keys = sync_executor.when_all(&finished);
1400
4
        if (!finished) {
1401
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1402
0
            return -1;
1403
0
        }
1404
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1405
        // sort the vector using key's order
1406
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1407
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1408
2
        bool use_range_remove = true;
1409
20
        for (auto& [_, remove] : tablet_keys) {
1410
20
            if (!remove) {
1411
0
                use_range_remove = remove;
1412
0
                break;
1413
0
            }
1414
20
        }
1415
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1416
2
            tablet_idx_keys.clear();
1417
2
            init_rs_keys.clear();
1418
2
        });
1419
2
        std::unique_ptr<Transaction> txn;
1420
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1421
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1422
0
            return -1;
1423
0
        }
1424
2
        std::string tablet_key_end;
1425
2
        if (!tablet_keys.empty()) {
1426
2
            if (use_range_remove) {
1427
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1428
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
1429
2
            } else {
1430
0
                for (auto& [k, _] : tablet_keys) {
1431
0
                    txn->remove(k);
1432
0
                }
1433
0
            }
1434
2
        }
1435
20
        for (auto& k : tablet_idx_keys) {
1436
20
            txn->remove(k);
1437
20
        }
1438
2
        for (auto& k : init_rs_keys) {
1439
0
            txn->remove(k);
1440
0
        }
1441
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1442
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1443
0
                         << ", err=" << err;
1444
0
            return -1;
1445
0
        }
1446
2
        return 0;
1447
2
    };
1448
1449
41
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
1450
41
                               std::move(loop_done));
1451
41
    if (ret != 0) {
1452
2
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
1453
2
        return ret;
1454
2
    }
1455
1456
    // directly remove tablet stats and tablet jobs of these dropped index or partition
1457
39
    std::unique_ptr<Transaction> txn;
1458
39
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1459
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
1460
0
        return -1;
1461
0
    }
1462
39
    txn->remove(stats_key_begin, stats_key_end);
1463
39
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
1464
39
                 << " end=" << hex(stats_key_end);
1465
39
    txn->remove(job_key_begin, job_key_end);
1466
39
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
1467
39
    std::string schema_key_begin, schema_key_end;
1468
39
    std::string schema_dict_key;
1469
39
    if (partition_id <= 0) {
1470
        // Delete schema kv of this index
1471
8
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
1472
8
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
1473
8
        txn->remove(schema_key_begin, schema_key_end);
1474
8
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
1475
8
                     << " end=" << hex(schema_key_end);
1476
8
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
1477
8
        txn->remove(schema_dict_key);
1478
8
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
1479
8
    }
1480
1481
39
    TxnErrorCode err = txn->commit();
1482
39
    if (err != TxnErrorCode::TXN_OK) {
1483
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
1484
0
                     << " err=" << err;
1485
0
        return -1;
1486
0
    }
1487
1488
39
    return ret;
1489
39
}
1490
1491
4.00k
int InstanceRecycler::delete_rowset_data(const doris::RowsetMetaCloudPB& rs_meta_pb) {
1492
4.00k
    int64_t num_segments = rs_meta_pb.num_segments();
1493
4.00k
    if (num_segments <= 0) return 0;
1494
4.00k
    if (!rs_meta_pb.has_tablet_schema()) {
1495
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
1496
0
                                  rs_meta_pb.rowset_id_v2());
1497
0
    }
1498
4.00k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
1499
4.00k
    if (it == accessor_map_.end()) {
1500
0
        LOG_WARNING("instance has no such resource id")
1501
0
                .tag("instance_id", instance_id_)
1502
0
                .tag("resource_id", rs_meta_pb.resource_id());
1503
0
        return -1;
1504
0
    }
1505
4.00k
    auto& accessor = it->second;
1506
4.00k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
1507
4.00k
    int64_t tablet_id = rs_meta_pb.tablet_id();
1508
    // process inverted indexes
1509
4.00k
    std::vector<std::pair<int64_t, std::string>> index_ids;
1510
4.00k
    index_ids.reserve(rs_meta_pb.tablet_schema().index_size());
1511
8.00k
    for (auto& i : rs_meta_pb.tablet_schema().index()) {
1512
8.00k
        if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
1513
8.00k
            index_ids.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
1514
8.00k
        }
1515
8.00k
    }
1516
4.00k
    std::vector<std::string> file_paths;
1517
4.00k
    auto tablet_schema = rs_meta_pb.tablet_schema();
1518
4.00k
    auto index_storage_format = InvertedIndexStorageFormatPB::V1;
1519
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
1520
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1521
20.0k
        if (tablet_schema.has_inverted_index_storage_format()) {
1522
10.0k
            index_storage_format = tablet_schema.inverted_index_storage_format();
1523
10.0k
        }
1524
20.0k
        if (index_storage_format == InvertedIndexStorageFormatPB::V1) {
1525
40.0k
            for (const auto& index_id : index_ids) {
1526
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
1527
40.0k
                                                            index_id.second));
1528
40.0k
            }
1529
20.0k
        } else if (!index_ids.empty()) {
1530
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1531
0
        }
1532
20.0k
    }
1533
    // TODO(AlexYue): seems could do do batch
1534
4.00k
    return accessor->delete_files(file_paths);
1535
4.00k
}
1536
1537
int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaCloudPB>& rowsets,
1538
32
                                         RowsetRecyclingState type) {
1539
32
    int ret = 0;
1540
    // resource_id -> file_paths
1541
32
    std::map<std::string, std::vector<std::string>> resource_file_paths;
1542
    // (resource_id, tablet_id, rowset_id)
1543
32
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
1544
1545
6.14k
    for (const auto& rs : rowsets) {
1546
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
1547
        // due to aborted schema change.
1548
6.14k
        if (type == RowsetRecyclingState::FORMAL_ROWSET) {
1549
3.12k
            std::lock_guard lock(recycled_tablets_mtx_);
1550
3.12k
            if (recycled_tablets_.count(rs.tablet_id())) {
1551
0
                continue; // Rowset data has already been deleted
1552
0
            }
1553
3.12k
        }
1554
1555
6.14k
        auto it = accessor_map_.find(rs.resource_id());
1556
        // possible if the accessor is not initilized correctly
1557
6.14k
        if (it == accessor_map_.end()) [[unlikely]] {
1558
1
            LOG_WARNING("instance has no such resource id")
1559
1
                    .tag("instance_id", instance_id_)
1560
1
                    .tag("resource_id", rs.resource_id());
1561
1
            ret = -1;
1562
1
            continue;
1563
1
        }
1564
1565
6.14k
        auto& file_paths = resource_file_paths[rs.resource_id()];
1566
6.14k
        const auto& rowset_id = rs.rowset_id_v2();
1567
6.14k
        int64_t tablet_id = rs.tablet_id();
1568
6.14k
        int64_t num_segments = rs.num_segments();
1569
6.14k
        if (num_segments <= 0) continue;
1570
1571
        // Process inverted indexes
1572
6.14k
        std::vector<std::pair<int64_t, std::string>> index_ids;
1573
        // default format as v1.
1574
6.14k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1575
6.14k
        int inverted_index_get_ret = 0;
1576
6.14k
        if (rs.has_tablet_schema()) {
1577
5.54k
            for (const auto& index : rs.tablet_schema().index()) {
1578
5.54k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
1579
5.54k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
1580
5.54k
                }
1581
5.54k
            }
1582
2.59k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
1583
2.56k
                index_format = rs.tablet_schema().inverted_index_storage_format();
1584
2.56k
            }
1585
3.55k
        } else {
1586
3.55k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
1587
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
1588
0
                                "instance_id="
1589
0
                             << instance_id_ << " tablet_id=" << tablet_id
1590
0
                             << " rowset_id=" << rowset_id;
1591
0
                ret = -1;
1592
0
                continue;
1593
0
            }
1594
3.55k
            InvertedIndexInfo index_info;
1595
3.55k
            inverted_index_get_ret =
1596
3.55k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
1597
3.55k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
1598
3.55k
                                     &inverted_index_get_ret);
1599
3.55k
            if (inverted_index_get_ret == 0) {
1600
3.05k
                index_format = index_info.first;
1601
3.05k
                index_ids = index_info.second;
1602
3.05k
            } else if (inverted_index_get_ret == 1) {
1603
                // 1. Schema kv not found means tablet has been recycled
1604
                // Maybe some tablet recycle failed by some bugs
1605
                // We need to delete again to double check
1606
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
1607
                // because we are uncertain about the inverted index information.
1608
                // If there are inverted indexes, some data might not be deleted,
1609
                // but this is acceptable as we have made our best effort to delete the data.
1610
503
                LOG_INFO(
1611
503
                        "delete rowset data schema kv not found, need to delete again to double "
1612
503
                        "check")
1613
503
                        .tag("instance_id", instance_id_)
1614
503
                        .tag("tablet_id", tablet_id)
1615
503
                        .tag("rowset", rs.ShortDebugString());
1616
                // Currently index_ids is guaranteed to be empty,
1617
                // but we clear it again here as a safeguard against future code changes
1618
                // that might cause index_ids to no longer be empty
1619
503
                index_format = InvertedIndexStorageFormatPB::V2;
1620
503
                index_ids.clear();
1621
503
            } else {
1622
0
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
1623
0
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
1624
0
                ret = -1;
1625
0
                continue;
1626
0
            }
1627
3.55k
        }
1628
6.14k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
1629
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
1630
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
1631
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
1632
5
            continue;
1633
5
        }
1634
36.8k
        for (int64_t i = 0; i < num_segments; ++i) {
1635
30.6k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1636
30.6k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
1637
59.2k
                for (const auto& index_id : index_ids) {
1638
59.2k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
1639
59.2k
                                                                index_id.first, index_id.second));
1640
59.2k
                }
1641
28.1k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
1642
                // try to recycle inverted index v2 when get_ret == 1
1643
                // we treat schema not found as if it has a v2 format inverted index
1644
                // to reduce chance of data leakage
1645
2.50k
                if (inverted_index_get_ret == 1) {
1646
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
1647
2.50k
                            .tag("instance_id", instance_id_)
1648
2.50k
                            .tag("inverted index v2 path",
1649
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
1650
2.50k
                }
1651
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1652
2.50k
            }
1653
30.6k
        }
1654
6.13k
    }
1655
1656
32
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
1657
32
                                                 "delete_rowset_data",
1658
34
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_0clERKi
Line
Count
Source
1658
34
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_0clERKi
1659
32
    for (auto& [resource_id, file_paths] : resource_file_paths) {
1660
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1661
29
            DCHECK(accessor_map_.count(*rid))
1662
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1663
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1664
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1665
29
                                     &accessor_map_);
1666
29
            if (!accessor_map_.contains(*rid)) {
1667
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1668
0
                        .tag("resource_id", resource_id)
1669
0
                        .tag("instance_id", instance_id_);
1670
0
                return -1;
1671
0
            }
1672
29
            auto& accessor = accessor_map_[*rid];
1673
29
            return accessor->delete_files(*paths);
1674
29
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_1clEv
Line
Count
Source
1660
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1661
29
            DCHECK(accessor_map_.count(*rid))
1662
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1663
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1664
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1665
29
                                     &accessor_map_);
1666
29
            if (!accessor_map_.contains(*rid)) {
1667
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1668
0
                        .tag("resource_id", resource_id)
1669
0
                        .tag("instance_id", instance_id_);
1670
0
                return -1;
1671
0
            }
1672
29
            auto& accessor = accessor_map_[*rid];
1673
29
            return accessor->delete_files(*paths);
1674
29
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_1clEv
1675
29
    }
1676
32
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
1677
5
        LOG_INFO(
1678
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
1679
5
                "resource_id={}, tablet_id={}, instance_id={}",
1680
5
                rowset_id, resource_id, tablet_id, instance_id_);
1681
5
        concurrent_delete_executor.add(
1682
5
                [&]() -> int { return delete_rowset_data(resource_id, tablet_id, rowset_id); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_2clEv
Line
Count
Source
1682
5
                [&]() -> int { return delete_rowset_data(resource_id, tablet_id, rowset_id); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_2clEv
1683
5
    }
1684
32
    bool finished = true;
1685
32
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
1686
34
    for (int r : rets) {
1687
34
        if (r != 0) {
1688
0
            ret = -1;
1689
0
            break;
1690
0
        }
1691
34
    }
1692
32
    ret = finished ? ret : -1;
1693
32
    return ret;
1694
32
}
1695
1696
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
1697
2.90k
                                         const std::string& rowset_id) {
1698
2.90k
    auto it = accessor_map_.find(resource_id);
1699
2.90k
    if (it == accessor_map_.end()) {
1700
0
        LOG_WARNING("instance has no such resource id")
1701
0
                .tag("instance_id", instance_id_)
1702
0
                .tag("resource_id", resource_id)
1703
0
                .tag("tablet_id", tablet_id)
1704
0
                .tag("rowset_id", rowset_id);
1705
0
        return -1;
1706
0
    }
1707
2.90k
    auto& accessor = it->second;
1708
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
1709
2.90k
}
1710
1711
254
int InstanceRecycler::recycle_tablet(int64_t tablet_id) {
1712
254
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
1713
254
            .tag("instance_id", instance_id_)
1714
254
            .tag("tablet_id", tablet_id);
1715
1716
254
    int ret = 0;
1717
254
    auto start_time = steady_clock::now();
1718
1719
254
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
1720
1721
    // collect resource ids
1722
234
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
1723
234
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
1724
234
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
1725
234
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
1726
1727
234
    std::set<std::string> resource_ids;
1728
234
    int64_t recycle_rowsets_number = 0;
1729
234
    int64_t recycle_segments_number = 0;
1730
234
    int64_t recycle_rowsets_data_size = 0;
1731
234
    int64_t recycle_rowsets_index_size = 0;
1732
234
    int64_t max_rowset_version = 0;
1733
234
    int64_t min_rowset_creation_time = INT64_MAX;
1734
234
    int64_t max_rowset_creation_time = 0;
1735
234
    int64_t min_rowset_expiration_time = INT64_MAX;
1736
234
    int64_t max_rowset_expiration_time = 0;
1737
1738
234
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1739
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1740
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
1741
234
                .tag("instance_id", instance_id_)
1742
234
                .tag("tablet_id", tablet_id)
1743
234
                .tag("recycle rowsets number", recycle_rowsets_number)
1744
234
                .tag("recycle segments number", recycle_segments_number)
1745
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
1746
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
1747
234
                .tag("max rowset version", max_rowset_version)
1748
234
                .tag("min rowset creation time", min_rowset_creation_time)
1749
234
                .tag("max rowset creation time", max_rowset_creation_time)
1750
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
1751
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
1752
234
                .tag("ret", ret);
1753
234
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_1clEPi
Line
Count
Source
1738
234
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1739
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1740
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
1741
234
                .tag("instance_id", instance_id_)
1742
234
                .tag("tablet_id", tablet_id)
1743
234
                .tag("recycle rowsets number", recycle_rowsets_number)
1744
234
                .tag("recycle segments number", recycle_segments_number)
1745
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
1746
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
1747
234
                .tag("max rowset version", max_rowset_version)
1748
234
                .tag("min rowset creation time", min_rowset_creation_time)
1749
234
                .tag("max rowset creation time", max_rowset_creation_time)
1750
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
1751
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
1752
234
                .tag("ret", ret);
1753
234
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_1clEPi
1754
1755
234
    std::unique_ptr<Transaction> txn;
1756
234
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1757
0
        LOG_WARNING("failed to recycle tablet ")
1758
0
                .tag("tablet id", tablet_id)
1759
0
                .tag("instance_id", instance_id_)
1760
0
                .tag("reason", "failed to create txn");
1761
0
        ret = -1;
1762
0
    }
1763
234
    GetRowsetResponse resp;
1764
234
    std::string msg;
1765
234
    MetaServiceCode code = MetaServiceCode::OK;
1766
    // get rowsets in tablet
1767
234
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
1768
234
                        tablet_id, code, msg, &resp);
1769
234
    if (code != MetaServiceCode::OK) {
1770
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
1771
0
                .tag("tablet id", tablet_id)
1772
0
                .tag("msg", msg)
1773
0
                .tag("code", code)
1774
0
                .tag("instance id", instance_id_);
1775
0
        ret = -1;
1776
0
    }
1777
234
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
1778
1779
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
1780
        /*
1781
        * For compatibility, we skip the loop for [0-1] here. 
1782
        * The purpose of this loop is to delete object files,
1783
        * and since [0-1] only has meta and doesn't have object files, 
1784
        * skipping it doesn't affect system correctness. 
1785
        *
1786
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below 
1787
        * would return error -1 directly, causing the recycle operation to fail.
1788
        *
1789
        * [0-1] doesn't have resource id is a bug.
1790
        * In the future, we will fix this problem, after that,
1791
        * we can remove this if statement.
1792
        *
1793
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
1794
        */
1795
1796
2.50k
        if (rs_meta.end_version() == 1) {
1797
            // Assert that [0-1] has no resource_id to make sure
1798
            // this if statement will not be forgetted to remove
1799
            // when the resource id bug is fixed
1800
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
1801
0
            recycle_rowsets_number += 1;
1802
0
            continue;
1803
0
        }
1804
2.50k
        if (!rs_meta.has_resource_id()) {
1805
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
1806
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
1807
1
                    .tag("instance_id", instance_id_)
1808
1
                    .tag("tablet_id", tablet_id);
1809
1
            return -1;
1810
1
        }
1811
2.50k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
1812
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
1813
        // possible if the accessor is not initilized correctly
1814
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
1815
1
            LOG_WARNING(
1816
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
1817
1
                    "recycle process")
1818
1
                    .tag("tablet id", tablet_id)
1819
1
                    .tag("instance_id", instance_id_)
1820
1
                    .tag("resource_id", rs_meta.resource_id())
1821
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
1822
1
            return -1;
1823
1
        }
1824
2.50k
        recycle_rowsets_number += 1;
1825
2.50k
        recycle_segments_number += rs_meta.num_segments();
1826
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
1827
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
1828
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
1829
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
1830
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
1831
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
1832
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
1833
2.50k
        resource_ids.emplace(rs_meta.resource_id());
1834
2.50k
    }
1835
1836
232
    LOG_INFO("recycle tablet start to delete object")
1837
232
            .tag("instance id", instance_id_)
1838
232
            .tag("tablet id", tablet_id)
1839
232
            .tag("recycle tablet resource ids are",
1840
232
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
1841
232
                                 [](const std::string& a, const std::string& b) {
1842
203
                                     return a.empty() ? b : a + "," + b;
1843
203
                                 }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_0clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESA_
Line
Count
Source
1841
203
                                 [](const std::string& a, const std::string& b) {
1842
203
                                     return a.empty() ? b : a + "," + b;
1843
203
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_0clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESA_
1844
1845
232
    SyncExecutor<int> concurrent_delete_executor(
1846
232
            _thread_pool_group.s3_producer_pool,
1847
232
            fmt::format("delete tablet {} s3 rowset", tablet_id),
1848
232
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_2clERKi
Line
Count
Source
1848
203
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_2clERKi
1849
1850
    // delete all rowset data in this tablet
1851
    // ATTN: there may be data leak if not all accessor initilized successfully
1852
    //       partial data deleted if the tablet is stored cross-storage vault
1853
    //       vault id is not attached to TabletMeta...
1854
232
    for (const auto& resource_id : resource_ids) {
1855
203
        concurrent_delete_executor.add([&, accessor_ptr = accessor_map_[resource_id]]() {
1856
203
            if (accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)) != 0) {
1857
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
1858
1
                             << " path=" << accessor_ptr->uri();
1859
1
                return -1;
1860
1
            }
1861
202
            return 0;
1862
203
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_3clEv
Line
Count
Source
1855
203
        concurrent_delete_executor.add([&, accessor_ptr = accessor_map_[resource_id]]() {
1856
203
            if (accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)) != 0) {
1857
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
1858
1
                             << " path=" << accessor_ptr->uri();
1859
1
                return -1;
1860
1
            }
1861
202
            return 0;
1862
203
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_3clEv
1863
203
    }
1864
1865
232
    bool finished = true;
1866
232
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
1867
232
    for (int r : rets) {
1868
203
        if (r != 0) {
1869
1
            ret = -1;
1870
1
        }
1871
203
    }
1872
1873
232
    ret = finished ? ret : -1;
1874
1875
232
    if (ret != 0) { // failed recycle tablet data
1876
1
        LOG_WARNING("ret!=0")
1877
1
                .tag("finished", finished)
1878
1
                .tag("ret", ret)
1879
1
                .tag("instance_id", instance_id_)
1880
1
                .tag("tablet_id", tablet_id);
1881
1
        return ret;
1882
1
    }
1883
1884
231
    txn.reset();
1885
231
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1886
0
        LOG_WARNING("failed to recycle tablet ")
1887
0
                .tag("tablet id", tablet_id)
1888
0
                .tag("instance_id", instance_id_)
1889
0
                .tag("reason", "failed to create txn");
1890
0
        ret = -1;
1891
0
    }
1892
    // delete all rowset kv in this tablet
1893
231
    txn->remove(rs_key0, rs_key1);
1894
231
    txn->remove(recyc_rs_key0, recyc_rs_key1);
1895
1896
    // remove delete bitmap for MoW table
1897
231
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
1898
231
    txn->remove(pending_key);
1899
231
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
1900
231
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
1901
231
    txn->remove(delete_bitmap_start, delete_bitmap_end);
1902
1903
231
    TxnErrorCode err = txn->commit();
1904
231
    if (err != TxnErrorCode::TXN_OK) {
1905
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
1906
0
        ret = -1;
1907
0
    }
1908
1909
231
    if (ret == 0) {
1910
        // All object files under tablet have been deleted
1911
231
        std::lock_guard lock(recycled_tablets_mtx_);
1912
231
        recycled_tablets_.insert(tablet_id);
1913
231
    }
1914
1915
231
    return ret;
1916
232
}
1917
1918
13
int InstanceRecycler::recycle_rowsets() {
1919
13
    const std::string task_name = "recycle_rowsets";
1920
13
    int64_t num_scanned = 0;
1921
13
    int64_t num_expired = 0;
1922
13
    int64_t num_prepare = 0;
1923
13
    int64_t num_compacted = 0;
1924
13
    int64_t num_empty_rowset = 0;
1925
13
    size_t total_rowset_key_size = 0;
1926
13
    size_t total_rowset_value_size = 0;
1927
13
    size_t expired_rowset_size = 0;
1928
13
    std::atomic_long num_recycled = 0;
1929
1930
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
1931
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
1932
13
    std::string recyc_rs_key0;
1933
13
    std::string recyc_rs_key1;
1934
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
1935
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
1936
1937
13
    LOG_INFO("begin to recycle rowsets").tag("instance_id", instance_id_);
1938
1939
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1940
13
    register_recycle_task(task_name, start_time);
1941
1942
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1943
13
        unregister_recycle_task(task_name);
1944
13
        int64_t cost =
1945
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1946
13
        LOG_INFO("recycle rowsets finished, cost={}s", cost)
1947
13
                .tag("instance_id", instance_id_)
1948
13
                .tag("num_scanned", num_scanned)
1949
13
                .tag("num_expired", num_expired)
1950
13
                .tag("num_recycled", num_recycled)
1951
13
                .tag("num_recycled.prepare", num_prepare)
1952
13
                .tag("num_recycled.compacted", num_compacted)
1953
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
1954
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
1955
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
1956
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
1957
13
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEPi
Line
Count
Source
1942
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1943
13
        unregister_recycle_task(task_name);
1944
13
        int64_t cost =
1945
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1946
13
        LOG_INFO("recycle rowsets finished, cost={}s", cost)
1947
13
                .tag("instance_id", instance_id_)
1948
13
                .tag("num_scanned", num_scanned)
1949
13
                .tag("num_expired", num_expired)
1950
13
                .tag("num_recycled", num_recycled)
1951
13
                .tag("num_recycled.prepare", num_prepare)
1952
13
                .tag("num_recycled.compacted", num_compacted)
1953
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
1954
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
1955
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
1956
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
1957
13
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEPi
1958
1959
13
    std::vector<std::string> rowset_keys;
1960
13
    std::vector<doris::RowsetMetaCloudPB> rowsets;
1961
1962
    // Store keys of rowset recycled by background workers
1963
13
    std::mutex async_recycled_rowset_keys_mutex;
1964
13
    std::vector<std::string> async_recycled_rowset_keys;
1965
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
1966
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
1967
13
    worker_pool->start();
1968
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
1969
900
                                            int64_t tablet_id, const std::string& rowset_id) {
1970
        // Try to delete rowset data in background thread
1971
900
        int ret = worker_pool->submit_with_timeout(
1972
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
1973
780
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1974
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1975
0
                        return;
1976
0
                    }
1977
780
                    std::vector<std::string> keys;
1978
780
                    {
1979
780
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
1980
780
                        async_recycled_rowset_keys.push_back(std::move(key));
1981
780
                        if (async_recycled_rowset_keys.size() > 100) {
1982
7
                            keys.swap(async_recycled_rowset_keys);
1983
7
                        }
1984
780
                    }
1985
780
                    if (keys.empty()) return;
1986
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
1987
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
1988
0
                                     << instance_id_;
1989
7
                    } else {
1990
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
1991
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
1992
7
                                           num_recycled, start_time);
1993
7
                    }
1994
7
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
1972
780
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
1973
780
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1974
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1975
0
                        return;
1976
0
                    }
1977
780
                    std::vector<std::string> keys;
1978
780
                    {
1979
780
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
1980
780
                        async_recycled_rowset_keys.push_back(std::move(key));
1981
780
                        if (async_recycled_rowset_keys.size() > 100) {
1982
7
                            keys.swap(async_recycled_rowset_keys);
1983
7
                        }
1984
780
                    }
1985
780
                    if (keys.empty()) return;
1986
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
1987
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
1988
0
                                     << instance_id_;
1989
7
                    } else {
1990
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
1991
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
1992
7
                                           num_recycled, start_time);
1993
7
                    }
1994
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
1995
900
                0);
1996
900
        if (ret == 0) return 0;
1997
        // Submit task failed, delete rowset data in current thread
1998
120
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1999
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2000
0
            return -1;
2001
0
        }
2002
120
        rowset_keys.push_back(std::move(key));
2003
120
        return 0;
2004
120
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
1969
900
                                            int64_t tablet_id, const std::string& rowset_id) {
1970
        // Try to delete rowset data in background thread
1971
900
        int ret = worker_pool->submit_with_timeout(
1972
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
1973
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1974
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1975
900
                        return;
1976
900
                    }
1977
900
                    std::vector<std::string> keys;
1978
900
                    {
1979
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
1980
900
                        async_recycled_rowset_keys.push_back(std::move(key));
1981
900
                        if (async_recycled_rowset_keys.size() > 100) {
1982
900
                            keys.swap(async_recycled_rowset_keys);
1983
900
                        }
1984
900
                    }
1985
900
                    if (keys.empty()) return;
1986
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
1987
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
1988
900
                                     << instance_id_;
1989
900
                    } else {
1990
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
1991
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
1992
900
                                           num_recycled, start_time);
1993
900
                    }
1994
900
                },
1995
900
                0);
1996
900
        if (ret == 0) return 0;
1997
        // Submit task failed, delete rowset data in current thread
1998
120
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1999
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2000
0
            return -1;
2001
0
        }
2002
120
        rowset_keys.push_back(std::move(key));
2003
120
        return 0;
2004
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
2005
2006
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2007
2008
4.00k
    auto calc_expiration = [&earlest_ts, this](const RecycleRowsetPB& rs) {
2009
4.00k
        if (config::force_immediate_recycle) {
2010
0
            return 0L;
2011
0
        }
2012
        // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
2013
4.00k
        int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
2014
4.00k
        int64_t retention_seconds = config::retention_seconds;
2015
4.00k
        if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
2016
3.10k
            retention_seconds =
2017
3.10k
                    std::min(config::compacted_rowset_retention_seconds, retention_seconds);
2018
3.10k
        }
2019
4.00k
        int64_t final_expiration = expiration + retention_seconds;
2020
4.00k
        if (earlest_ts > final_expiration) {
2021
2
            earlest_ts = final_expiration;
2022
2
            g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, earlest_ts);
2023
2
        }
2024
4.00k
        return final_expiration;
2025
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clERKNS0_15RecycleRowsetPBE
Line
Count
Source
2008
4.00k
    auto calc_expiration = [&earlest_ts, this](const RecycleRowsetPB& rs) {
2009
4.00k
        if (config::force_immediate_recycle) {
2010
0
            return 0L;
2011
0
        }
2012
        // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
2013
4.00k
        int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
2014
4.00k
        int64_t retention_seconds = config::retention_seconds;
2015
4.00k
        if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
2016
3.10k
            retention_seconds =
2017
3.10k
                    std::min(config::compacted_rowset_retention_seconds, retention_seconds);
2018
3.10k
        }
2019
4.00k
        int64_t final_expiration = expiration + retention_seconds;
2020
4.00k
        if (earlest_ts > final_expiration) {
2021
2
            earlest_ts = final_expiration;
2022
2
            g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, earlest_ts);
2023
2
        }
2024
4.00k
        return final_expiration;
2025
4.00k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clERKNS0_15RecycleRowsetPBE
2026
2027
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
2028
4.00k
        ++num_scanned;
2029
4.00k
        total_rowset_key_size += k.size();
2030
4.00k
        total_rowset_value_size += v.size();
2031
4.00k
        RecycleRowsetPB rowset;
2032
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2033
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2034
0
            return -1;
2035
0
        }
2036
2037
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2038
0
                   << " num_expired=" << num_expired << " expiration=" << calc_expiration(rowset)
2039
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2040
4.00k
        int64_t current_time = ::time(nullptr);
2041
4.00k
        if (current_time < calc_expiration(rowset)) { // not expired
2042
0
            return 0;
2043
0
        }
2044
4.00k
        ++num_expired;
2045
4.00k
        expired_rowset_size += v.size();
2046
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2047
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2048
                // in old version, keep this key-value pair and it needs to be checked manually
2049
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2050
0
                return -1;
2051
0
            }
2052
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2053
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2054
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2055
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2056
0
                rowset_keys.emplace_back(k);
2057
0
                return -1;
2058
0
            }
2059
            // decode rowset_id
2060
250
            auto k1 = k;
2061
250
            k1.remove_prefix(1);
2062
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2063
250
            decode_key(&k1, &out);
2064
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2065
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2066
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2067
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2068
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2069
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2070
0
                return -1;
2071
0
            }
2072
250
            return 0;
2073
250
        }
2074
        // TODO(plat1ko): check rowset not referenced
2075
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2076
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2077
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2078
0
                LOG_INFO("recycle rowset that has empty resource id");
2079
0
            } else {
2080
                // other situations, keep this key-value pair and it needs to be checked manually
2081
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2082
0
                return -1;
2083
0
            }
2084
0
        }
2085
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2086
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2087
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2088
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2089
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2090
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2091
3.75k
                  << " rowset_meta_size=" << v.size()
2092
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2093
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2094
            // unable to calculate file path, can only be deleted by rowset id prefix
2095
650
            num_prepare += 1;
2096
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2097
650
                                             rowset_meta->tablet_id(),
2098
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2099
0
                return -1;
2100
0
            }
2101
3.10k
        } else {
2102
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2103
3.10k
            rowset_keys.emplace_back(k);
2104
3.10k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
2105
3.10k
                rowsets.push_back(std::move(*rowset_meta));
2106
3.10k
            } else {
2107
0
                ++num_empty_rowset;
2108
0
            }
2109
3.10k
        }
2110
3.75k
        return 0;
2111
3.75k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2027
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
2028
4.00k
        ++num_scanned;
2029
4.00k
        total_rowset_key_size += k.size();
2030
4.00k
        total_rowset_value_size += v.size();
2031
4.00k
        RecycleRowsetPB rowset;
2032
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2033
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2034
0
            return -1;
2035
0
        }
2036
2037
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2038
0
                   << " num_expired=" << num_expired << " expiration=" << calc_expiration(rowset)
2039
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2040
4.00k
        int64_t current_time = ::time(nullptr);
2041
4.00k
        if (current_time < calc_expiration(rowset)) { // not expired
2042
0
            return 0;
2043
0
        }
2044
4.00k
        ++num_expired;
2045
4.00k
        expired_rowset_size += v.size();
2046
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2047
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2048
                // in old version, keep this key-value pair and it needs to be checked manually
2049
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2050
0
                return -1;
2051
0
            }
2052
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2053
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2054
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2055
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2056
0
                rowset_keys.emplace_back(k);
2057
0
                return -1;
2058
0
            }
2059
            // decode rowset_id
2060
250
            auto k1 = k;
2061
250
            k1.remove_prefix(1);
2062
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2063
250
            decode_key(&k1, &out);
2064
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2065
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2066
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2067
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2068
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2069
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2070
0
                return -1;
2071
0
            }
2072
250
            return 0;
2073
250
        }
2074
        // TODO(plat1ko): check rowset not referenced
2075
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2076
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2077
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2078
0
                LOG_INFO("recycle rowset that has empty resource id");
2079
0
            } else {
2080
                // other situations, keep this key-value pair and it needs to be checked manually
2081
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2082
0
                return -1;
2083
0
            }
2084
0
        }
2085
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2086
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2087
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2088
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2089
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2090
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2091
3.75k
                  << " rowset_meta_size=" << v.size()
2092
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2093
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2094
            // unable to calculate file path, can only be deleted by rowset id prefix
2095
650
            num_prepare += 1;
2096
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2097
650
                                             rowset_meta->tablet_id(),
2098
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2099
0
                return -1;
2100
0
            }
2101
3.10k
        } else {
2102
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2103
3.10k
            rowset_keys.emplace_back(k);
2104
3.10k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
2105
3.10k
                rowsets.push_back(std::move(*rowset_meta));
2106
3.10k
            } else {
2107
0
                ++num_empty_rowset;
2108
0
            }
2109
3.10k
        }
2110
3.75k
        return 0;
2111
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
2112
2113
21
    auto loop_done = [&]() -> int {
2114
21
        std::vector<std::string> rowset_keys_to_delete;
2115
21
        std::vector<doris::RowsetMetaCloudPB> rowsets_to_delete;
2116
21
        rowset_keys_to_delete.swap(rowset_keys);
2117
21
        rowsets_to_delete.swap(rowsets);
2118
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2119
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2120
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) {
2121
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2122
0
                return;
2123
0
            }
2124
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2125
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2126
0
                return;
2127
0
            }
2128
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2129
21
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
2119
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2120
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) {
2121
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2122
0
                return;
2123
0
            }
2124
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2125
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2126
0
                return;
2127
0
            }
2128
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2129
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
2130
21
        return 0;
2131
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
2113
21
    auto loop_done = [&]() -> int {
2114
21
        std::vector<std::string> rowset_keys_to_delete;
2115
21
        std::vector<doris::RowsetMetaCloudPB> rowsets_to_delete;
2116
21
        rowset_keys_to_delete.swap(rowset_keys);
2117
21
        rowsets_to_delete.swap(rowsets);
2118
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2119
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2120
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) {
2121
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2122
21
                return;
2123
21
            }
2124
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2125
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2126
21
                return;
2127
21
            }
2128
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2129
21
        });
2130
21
        return 0;
2131
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
2132
2133
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
2134
13
                               std::move(loop_done));
2135
13
    worker_pool->stop();
2136
2137
13
    if (!async_recycled_rowset_keys.empty()) {
2138
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
2139
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2140
0
            return -1;
2141
2
        } else {
2142
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
2143
2
        }
2144
2
    }
2145
13
    return ret;
2146
13
}
2147
2148
bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id,
2149
3.05k
                     int64_t txn_id) {
2150
3.05k
    std::unique_ptr<Transaction> txn;
2151
3.05k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2152
3.05k
    if (err != TxnErrorCode::TXN_OK) {
2153
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
2154
0
        return false;
2155
0
    }
2156
2157
3.05k
    std::string index_val;
2158
3.05k
    const std::string index_key = txn_index_key({instance_id, txn_id});
2159
3.05k
    err = txn->get(index_key, &index_val);
2160
3.05k
    if (err != TxnErrorCode::TXN_OK) {
2161
3.03k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2162
3.03k
            TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled");
2163
            // txn has been recycled;
2164
3.03k
            LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id
2165
3.03k
                      << " instance_id=" << instance_id;
2166
3.03k
            return true;
2167
3.03k
        }
2168
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
2169
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
2170
0
                     << " err=" << err;
2171
0
        return false;
2172
3.03k
    }
2173
2174
20
    TxnIndexPB index_pb;
2175
20
    if (!index_pb.ParseFromString(index_val)) {
2176
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
2177
0
                     << " instance_id=" << instance_id;
2178
0
        return false;
2179
0
    }
2180
2181
20
    DCHECK(index_pb.has_tablet_index() == true);
2182
20
    if (!index_pb.tablet_index().has_db_id()) {
2183
        // In the previous version, the db_id was not set in the index_pb.
2184
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2185
0
        LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id
2186
0
                  << " index=" << index_pb.ShortDebugString();
2187
0
        return true;
2188
0
    }
2189
2190
20
    int64_t db_id = index_pb.tablet_index().db_id();
2191
20
    DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id
2192
0
                        << " instance_id=" << instance_id;
2193
2194
20
    std::string info_val;
2195
20
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
2196
20
    err = txn->get(info_key, &info_val);
2197
20
    if (err != TxnErrorCode::TXN_OK) {
2198
0
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2199
            // txn info has been recycled;
2200
0
            LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id
2201
0
                      << " instance_id=" << instance_id;
2202
0
            return true;
2203
0
        }
2204
2205
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
2206
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
2207
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
2208
0
                     << " err=" << err;
2209
0
        return false;
2210
0
    }
2211
2212
20
    TxnInfoPB txn_info;
2213
20
    if (!txn_info.ParseFromString(info_val)) {
2214
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
2215
0
                     << " instance_id=" << instance_id;
2216
0
        return false;
2217
0
    }
2218
2219
20
    DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id
2220
0
                                        << " txn_info=" << txn_info.ShortDebugString();
2221
2222
20
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() ||
2223
20
        TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) {
2224
10
        TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info);
2225
10
        return true;
2226
10
    }
2227
2228
10
    TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info);
2229
10
    return false;
2230
20
}
2231
2232
17
int InstanceRecycler::recycle_tmp_rowsets() {
2233
17
    const std::string task_name = "recycle_tmp_rowsets";
2234
17
    int64_t num_scanned = 0;
2235
17
    int64_t num_expired = 0;
2236
17
    int64_t num_recycled = 0;
2237
17
    size_t expired_rowset_size = 0;
2238
17
    size_t total_rowset_key_size = 0;
2239
17
    size_t total_rowset_value_size = 0;
2240
2241
17
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
2242
17
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
2243
17
    std::string tmp_rs_key0;
2244
17
    std::string tmp_rs_key1;
2245
17
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
2246
17
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
2247
2248
17
    LOG_INFO("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
2249
2250
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2251
17
    register_recycle_task(task_name, start_time);
2252
2253
17
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2254
17
        unregister_recycle_task(task_name);
2255
17
        int64_t cost =
2256
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2257
17
        LOG_INFO("recycle tmp rowsets finished, cost={}s", cost)
2258
17
                .tag("instance_id", instance_id_)
2259
17
                .tag("num_scanned", num_scanned)
2260
17
                .tag("num_expired", num_expired)
2261
17
                .tag("num_recycled", num_recycled)
2262
17
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2263
17
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2264
17
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2265
17
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEPi
Line
Count
Source
2253
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2254
13
        unregister_recycle_task(task_name);
2255
13
        int64_t cost =
2256
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2257
13
        LOG_INFO("recycle tmp rowsets finished, cost={}s", cost)
2258
13
                .tag("instance_id", instance_id_)
2259
13
                .tag("num_scanned", num_scanned)
2260
13
                .tag("num_expired", num_expired)
2261
13
                .tag("num_recycled", num_recycled)
2262
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2263
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2264
13
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2265
13
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEPi
Line
Count
Source
2253
4
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2254
4
        unregister_recycle_task(task_name);
2255
4
        int64_t cost =
2256
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2257
4
        LOG_INFO("recycle tmp rowsets finished, cost={}s", cost)
2258
4
                .tag("instance_id", instance_id_)
2259
4
                .tag("num_scanned", num_scanned)
2260
4
                .tag("num_expired", num_expired)
2261
4
                .tag("num_recycled", num_recycled)
2262
4
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2263
4
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2264
4
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2265
4
    });
2266
2267
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
2268
17
    std::vector<std::string_view> tmp_rowset_keys;
2269
17
    std::vector<doris::RowsetMetaCloudPB> tmp_rowsets;
2270
2271
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2272
3.05k
    auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) {
2273
        // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
2274
        //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
2275
        //  duration or timeout always < `retention_time` in practice.
2276
3.05k
        int64_t expiration =
2277
3.05k
                rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time();
2278
3.05k
        expiration = config::force_immediate_recycle ? 0 : expiration;
2279
3.05k
        int64_t final_expiration = expiration + config::retention_seconds;
2280
3.05k
        if (earlest_ts > final_expiration) {
2281
6
            earlest_ts = final_expiration;
2282
6
            g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts);
2283
6
        }
2284
3.05k
        return final_expiration;
2285
3.05k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
2272
3.02k
    auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) {
2273
        // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
2274
        //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
2275
        //  duration or timeout always < `retention_time` in practice.
2276
3.02k
        int64_t expiration =
2277
3.02k
                rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time();
2278
3.02k
        expiration = config::force_immediate_recycle ? 0 : expiration;
2279
3.02k
        int64_t final_expiration = expiration + config::retention_seconds;
2280
3.02k
        if (earlest_ts > final_expiration) {
2281
3
            earlest_ts = final_expiration;
2282
3
            g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts);
2283
3
        }
2284
3.02k
        return final_expiration;
2285
3.02k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
2272
30
    auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) {
2273
        // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
2274
        //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
2275
        //  duration or timeout always < `retention_time` in practice.
2276
30
        int64_t expiration =
2277
30
                rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time();
2278
30
        expiration = config::force_immediate_recycle ? 0 : expiration;
2279
30
        int64_t final_expiration = expiration + config::retention_seconds;
2280
30
        if (earlest_ts > final_expiration) {
2281
3
            earlest_ts = final_expiration;
2282
3
            g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts);
2283
3
        }
2284
30
        return final_expiration;
2285
30
    };
2286
2287
17
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
2288
17
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
2289
17
                             &calc_expiration,
2290
3.05k
                             this](std::string_view k, std::string_view v) -> int {
2291
3.05k
        ++num_scanned;
2292
3.05k
        total_rowset_key_size += k.size();
2293
3.05k
        total_rowset_value_size += v.size();
2294
3.05k
        doris::RowsetMetaCloudPB rowset;
2295
3.05k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2296
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2297
0
            return -1;
2298
0
        }
2299
3.05k
        int64_t expiration = calc_expiration(rowset);
2300
3.05k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2301
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2302
0
                   << " txn_expiration=" << rowset.txn_expiration()
2303
0
                   << " rowset_creation_time=" << rowset.creation_time();
2304
3.05k
        int64_t current_time = ::time(nullptr);
2305
3.05k
        if (current_time < expiration) { // not expired
2306
0
            return 0;
2307
0
        }
2308
2309
3.05k
        DCHECK_GT(rowset.txn_id(), 0)
2310
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
2311
3.05k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
2312
10
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
2313
10
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
2314
10
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
2315
10
                      << rowset.start_version() << '-' << rowset.end_version()
2316
10
                      << "] txn_id=" << rowset.txn_id()
2317
10
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
2318
10
                      << " txn_expiration=" << rowset.txn_expiration();
2319
10
            return 0;
2320
10
        }
2321
2322
3.04k
        ++num_expired;
2323
3.04k
        expired_rowset_size += v.size();
2324
3.04k
        if (!rowset.has_resource_id()) {
2325
20
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2326
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2327
0
                return -1;
2328
0
            }
2329
            // might be a delete pred rowset
2330
20
            tmp_rowset_keys.push_back(k);
2331
20
            return 0;
2332
20
        }
2333
        // TODO(plat1ko): check rowset not referenced
2334
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2335
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2336
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2337
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2338
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2339
3.02k
                  << " num_expired=" << num_expired;
2340
2341
3.02k
        tmp_rowset_keys.push_back(k);
2342
3.02k
        if (rowset.num_segments() > 0) { // Skip empty rowset
2343
3.02k
            tmp_rowsets.push_back(std::move(rowset));
2344
3.02k
        }
2345
3.02k
        return 0;
2346
3.04k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2290
3.02k
                             this](std::string_view k, std::string_view v) -> int {
2291
3.02k
        ++num_scanned;
2292
3.02k
        total_rowset_key_size += k.size();
2293
3.02k
        total_rowset_value_size += v.size();
2294
3.02k
        doris::RowsetMetaCloudPB rowset;
2295
3.02k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2296
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2297
0
            return -1;
2298
0
        }
2299
3.02k
        int64_t expiration = calc_expiration(rowset);
2300
3.02k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2301
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2302
0
                   << " txn_expiration=" << rowset.txn_expiration()
2303
0
                   << " rowset_creation_time=" << rowset.creation_time();
2304
3.02k
        int64_t current_time = ::time(nullptr);
2305
3.02k
        if (current_time < expiration) { // not expired
2306
0
            return 0;
2307
0
        }
2308
2309
3.02k
        DCHECK_GT(rowset.txn_id(), 0)
2310
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
2311
3.02k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
2312
0
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
2313
0
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
2314
0
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
2315
0
                      << rowset.start_version() << '-' << rowset.end_version()
2316
0
                      << "] txn_id=" << rowset.txn_id()
2317
0
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
2318
0
                      << " txn_expiration=" << rowset.txn_expiration();
2319
0
            return 0;
2320
0
        }
2321
2322
3.02k
        ++num_expired;
2323
3.02k
        expired_rowset_size += v.size();
2324
3.02k
        if (!rowset.has_resource_id()) {
2325
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2326
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2327
0
                return -1;
2328
0
            }
2329
            // might be a delete pred rowset
2330
0
            tmp_rowset_keys.push_back(k);
2331
0
            return 0;
2332
0
        }
2333
        // TODO(plat1ko): check rowset not referenced
2334
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2335
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2336
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2337
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2338
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2339
3.02k
                  << " num_expired=" << num_expired;
2340
2341
3.02k
        tmp_rowset_keys.push_back(k);
2342
3.02k
        if (rowset.num_segments() > 0) { // Skip empty rowset
2343
3.02k
            tmp_rowsets.push_back(std::move(rowset));
2344
3.02k
        }
2345
3.02k
        return 0;
2346
3.02k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2290
30
                             this](std::string_view k, std::string_view v) -> int {
2291
30
        ++num_scanned;
2292
30
        total_rowset_key_size += k.size();
2293
30
        total_rowset_value_size += v.size();
2294
30
        doris::RowsetMetaCloudPB rowset;
2295
30
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2296
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2297
0
            return -1;
2298
0
        }
2299
30
        int64_t expiration = calc_expiration(rowset);
2300
30
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2301
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2302
0
                   << " txn_expiration=" << rowset.txn_expiration()
2303
0
                   << " rowset_creation_time=" << rowset.creation_time();
2304
30
        int64_t current_time = ::time(nullptr);
2305
30
        if (current_time < expiration) { // not expired
2306
0
            return 0;
2307
0
        }
2308
2309
30
        DCHECK_GT(rowset.txn_id(), 0)
2310
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
2311
30
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
2312
10
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
2313
10
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
2314
10
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
2315
10
                      << rowset.start_version() << '-' << rowset.end_version()
2316
10
                      << "] txn_id=" << rowset.txn_id()
2317
10
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
2318
10
                      << " txn_expiration=" << rowset.txn_expiration();
2319
10
            return 0;
2320
10
        }
2321
2322
20
        ++num_expired;
2323
20
        expired_rowset_size += v.size();
2324
20
        if (!rowset.has_resource_id()) {
2325
20
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2326
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2327
0
                return -1;
2328
0
            }
2329
            // might be a delete pred rowset
2330
20
            tmp_rowset_keys.push_back(k);
2331
20
            return 0;
2332
20
        }
2333
        // TODO(plat1ko): check rowset not referenced
2334
0
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2335
0
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2336
0
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2337
0
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2338
0
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2339
0
                  << " num_expired=" << num_expired;
2340
2341
0
        tmp_rowset_keys.push_back(k);
2342
0
        if (rowset.num_segments() > 0) { // Skip empty rowset
2343
0
            tmp_rowsets.push_back(std::move(rowset));
2344
0
        }
2345
0
        return 0;
2346
20
    };
2347
2348
17
    auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int {
2349
6
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2350
6
            tmp_rowset_keys.clear();
2351
6
            tmp_rowsets.clear();
2352
6
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
2349
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2350
3
            tmp_rowset_keys.clear();
2351
3
            tmp_rowsets.clear();
2352
3
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
2349
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2350
3
            tmp_rowset_keys.clear();
2351
3
            tmp_rowsets.clear();
2352
3
        });
2353
6
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) {
2354
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2355
0
            return -1;
2356
0
        }
2357
6
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2358
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2359
0
            return -1;
2360
0
        }
2361
6
        num_recycled += tmp_rowset_keys.size();
2362
6
        return 0;
2363
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
2348
3
    auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int {
2349
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2350
3
            tmp_rowset_keys.clear();
2351
3
            tmp_rowsets.clear();
2352
3
        });
2353
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) {
2354
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2355
0
            return -1;
2356
0
        }
2357
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2358
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2359
0
            return -1;
2360
0
        }
2361
3
        num_recycled += tmp_rowset_keys.size();
2362
3
        return 0;
2363
3
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
2348
3
    auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int {
2349
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2350
3
            tmp_rowset_keys.clear();
2351
3
            tmp_rowsets.clear();
2352
3
        });
2353
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) {
2354
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2355
0
            return -1;
2356
0
        }
2357
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2358
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2359
0
            return -1;
2360
0
        }
2361
3
        num_recycled += tmp_rowset_keys.size();
2362
3
        return 0;
2363
3
    };
2364
2365
17
    return scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
2366
17
                            std::move(loop_done));
2367
17
}
2368
2369
int InstanceRecycler::scan_and_recycle(
2370
        std::string begin, std::string_view end,
2371
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
2372
174
        std::function<int()> loop_done) {
2373
174
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
2374
174
    int ret = 0;
2375
174
    int64_t cnt = 0;
2376
174
    int get_range_retried = 0;
2377
174
    std::string err;
2378
174
    std::unique_ptr<int, std::function<void(int*)>> defer_log(
2379
174
            (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) {
2380
174
                LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2381
174
                          << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2382
174
                          << " ret=" << ret << " err=" << err;
2383
174
            });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEPi
Line
Count
Source
2379
155
            (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) {
2380
155
                LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2381
155
                          << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2382
155
                          << " ret=" << ret << " err=" << err;
2383
155
            });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEPi
Line
Count
Source
2379
19
            (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) {
2380
19
                LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2381
19
                          << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2382
19
                          << " ret=" << ret << " err=" << err;
2383
19
            });
2384
2385
174
    std::unique_ptr<RangeGetIterator> it;
2386
194
    do {
2387
194
        if (get_range_retried > 1000) {
2388
0
            err = "txn_get exceeds max retry, may not scan all keys";
2389
0
            ret = -1;
2390
0
            return -1;
2391
0
        }
2392
194
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
2393
194
        if (get_ret != 0) { // txn kv may complain "Request for future version"
2394
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
2395
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
2396
0
                         << " get_range_retried=" << get_range_retried;
2397
0
            ++get_range_retried;
2398
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
2399
0
            continue; // try again
2400
0
        }
2401
194
        if (!it->has_next()) {
2402
91
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
2403
91
            break; // scan finished
2404
91
        }
2405
37.4k
        while (it->has_next()) {
2406
37.3k
            ++cnt;
2407
            // recycle corresponding resources
2408
37.3k
            auto [k, v] = it->next();
2409
37.3k
            if (!it->has_next()) {
2410
102
                begin = k;
2411
102
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
2412
102
            }
2413
            // if we want to continue scanning, the recycle_func should not return non-zero
2414
37.3k
            if (recycle_func(k, v) != 0) {
2415
22
                err = "recycle_func error";
2416
22
                ret = -1;
2417
22
            }
2418
37.3k
        }
2419
103
        begin.push_back('\x00'); // Update to next smallest key for iteration
2420
        // if we want to continue scanning, the recycle_func should not return non-zero
2421
103
        if (loop_done && loop_done() != 0) {
2422
2
            err = "loop_done error";
2423
2
            ret = -1;
2424
2
        }
2425
103
    } while (it->more() && !stopped());
2426
174
    return ret;
2427
174
}
2428
2429
20
int InstanceRecycler::abort_timeout_txn() {
2430
20
    const std::string task_name = "abort_timeout_txn";
2431
20
    int64_t num_scanned = 0;
2432
20
    int64_t num_timeout = 0;
2433
20
    int64_t num_abort = 0;
2434
20
    int64_t num_advance = 0;
2435
2436
20
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
2437
20
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
2438
20
    std::string begin_txn_running_key;
2439
20
    std::string end_txn_running_key;
2440
20
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
2441
20
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
2442
2443
20
    LOG_INFO("begin to abort timeout txn").tag("instance_id", instance_id_);
2444
2445
20
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2446
20
    register_recycle_task(task_name, start_time);
2447
2448
20
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2449
20
        unregister_recycle_task(task_name);
2450
20
        int64_t cost =
2451
20
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2452
20
        LOG_INFO("end to abort timeout txn, cost={}s", cost)
2453
20
                .tag("instance_id", instance_id_)
2454
20
                .tag("num_scanned", num_scanned)
2455
20
                .tag("num_timeout", num_timeout)
2456
20
                .tag("num_abort", num_abort)
2457
20
                .tag("num_advance", num_advance);
2458
20
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEPi
Line
Count
Source
2448
16
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2449
16
        unregister_recycle_task(task_name);
2450
16
        int64_t cost =
2451
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2452
16
        LOG_INFO("end to abort timeout txn, cost={}s", cost)
2453
16
                .tag("instance_id", instance_id_)
2454
16
                .tag("num_scanned", num_scanned)
2455
16
                .tag("num_timeout", num_timeout)
2456
16
                .tag("num_abort", num_abort)
2457
16
                .tag("num_advance", num_advance);
2458
16
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEPi
Line
Count
Source
2448
4
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2449
4
        unregister_recycle_task(task_name);
2450
4
        int64_t cost =
2451
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2452
4
        LOG_INFO("end to abort timeout txn, cost={}s", cost)
2453
4
                .tag("instance_id", instance_id_)
2454
4
                .tag("num_scanned", num_scanned)
2455
4
                .tag("num_timeout", num_timeout)
2456
4
                .tag("num_abort", num_abort)
2457
4
                .tag("num_advance", num_advance);
2458
4
    });
2459
2460
20
    int64_t current_time =
2461
20
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2462
2463
20
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
2464
20
                                  &current_time,
2465
20
                                  this](std::string_view k, std::string_view v) -> int {
2466
10
        ++num_scanned;
2467
2468
10
        std::unique_ptr<Transaction> txn;
2469
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2470
10
        if (err != TxnErrorCode::TXN_OK) {
2471
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2472
0
            return -1;
2473
0
        }
2474
10
        std::string_view k1 = k;
2475
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2476
10
        k1.remove_prefix(1); // Remove key space
2477
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2478
10
        if (decode_key(&k1, &out) != 0) {
2479
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2480
0
            return -1;
2481
0
        }
2482
10
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2483
10
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2484
10
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2485
        // Update txn_info
2486
10
        std::string txn_inf_key, txn_inf_val;
2487
10
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2488
10
        err = txn->get(txn_inf_key, &txn_inf_val);
2489
10
        if (err != TxnErrorCode::TXN_OK) {
2490
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2491
0
            return -1;
2492
0
        }
2493
10
        TxnInfoPB txn_info;
2494
10
        if (!txn_info.ParseFromString(txn_inf_val)) {
2495
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2496
0
            return -1;
2497
0
        }
2498
2499
10
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2500
4
            txn.reset();
2501
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2502
4
            std::shared_ptr<TxnLazyCommitTask> task =
2503
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2504
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2505
4
            if (ret.first != MetaServiceCode::OK) {
2506
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2507
0
                             << "msg=" << ret.second;
2508
0
                return -1;
2509
0
            }
2510
4
            ++num_advance;
2511
4
            return 0;
2512
6
        } else {
2513
6
            TxnRunningPB txn_running_pb;
2514
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2515
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2516
0
                return -1;
2517
0
            }
2518
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2519
4
                return 0;
2520
4
            }
2521
2
            ++num_timeout;
2522
2523
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2524
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2525
2
            txn_info.set_finish_time(current_time);
2526
2
            txn_info.set_reason("timeout");
2527
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2528
2
            txn_inf_val.clear();
2529
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2530
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2531
0
                return -1;
2532
0
            }
2533
2
            txn->put(txn_inf_key, txn_inf_val);
2534
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2535
            // Put recycle txn key
2536
2
            std::string recyc_txn_key, recyc_txn_val;
2537
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2538
2
            RecycleTxnPB recycle_txn_pb;
2539
2
            recycle_txn_pb.set_creation_time(current_time);
2540
2
            recycle_txn_pb.set_label(txn_info.label());
2541
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2542
0
                LOG_WARNING("failed to serialize txn recycle info")
2543
0
                        .tag("key", hex(k))
2544
0
                        .tag("db_id", db_id)
2545
0
                        .tag("txn_id", txn_id);
2546
0
                return -1;
2547
0
            }
2548
2
            txn->put(recyc_txn_key, recyc_txn_val);
2549
            // Remove txn running key
2550
2
            txn->remove(k);
2551
2
            err = txn->commit();
2552
2
            if (err != TxnErrorCode::TXN_OK) {
2553
0
                LOG_WARNING("failed to commit txn err={}", err)
2554
0
                        .tag("key", hex(k))
2555
0
                        .tag("db_id", db_id)
2556
0
                        .tag("txn_id", txn_id);
2557
0
                return -1;
2558
0
            }
2559
2
            ++num_abort;
2560
2
        }
2561
2562
2
        return 0;
2563
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2465
6
                                  this](std::string_view k, std::string_view v) -> int {
2466
6
        ++num_scanned;
2467
2468
6
        std::unique_ptr<Transaction> txn;
2469
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2470
6
        if (err != TxnErrorCode::TXN_OK) {
2471
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2472
0
            return -1;
2473
0
        }
2474
6
        std::string_view k1 = k;
2475
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2476
6
        k1.remove_prefix(1); // Remove key space
2477
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2478
6
        if (decode_key(&k1, &out) != 0) {
2479
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2480
0
            return -1;
2481
0
        }
2482
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2483
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2484
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2485
        // Update txn_info
2486
6
        std::string txn_inf_key, txn_inf_val;
2487
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2488
6
        err = txn->get(txn_inf_key, &txn_inf_val);
2489
6
        if (err != TxnErrorCode::TXN_OK) {
2490
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2491
0
            return -1;
2492
0
        }
2493
6
        TxnInfoPB txn_info;
2494
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
2495
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2496
0
            return -1;
2497
0
        }
2498
2499
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2500
0
            txn.reset();
2501
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2502
0
            std::shared_ptr<TxnLazyCommitTask> task =
2503
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2504
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2505
0
            if (ret.first != MetaServiceCode::OK) {
2506
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2507
0
                             << "msg=" << ret.second;
2508
0
                return -1;
2509
0
            }
2510
0
            ++num_advance;
2511
0
            return 0;
2512
6
        } else {
2513
6
            TxnRunningPB txn_running_pb;
2514
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2515
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2516
0
                return -1;
2517
0
            }
2518
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2519
4
                return 0;
2520
4
            }
2521
2
            ++num_timeout;
2522
2523
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2524
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2525
2
            txn_info.set_finish_time(current_time);
2526
2
            txn_info.set_reason("timeout");
2527
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2528
2
            txn_inf_val.clear();
2529
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2530
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2531
0
                return -1;
2532
0
            }
2533
2
            txn->put(txn_inf_key, txn_inf_val);
2534
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2535
            // Put recycle txn key
2536
2
            std::string recyc_txn_key, recyc_txn_val;
2537
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2538
2
            RecycleTxnPB recycle_txn_pb;
2539
2
            recycle_txn_pb.set_creation_time(current_time);
2540
2
            recycle_txn_pb.set_label(txn_info.label());
2541
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2542
0
                LOG_WARNING("failed to serialize txn recycle info")
2543
0
                        .tag("key", hex(k))
2544
0
                        .tag("db_id", db_id)
2545
0
                        .tag("txn_id", txn_id);
2546
0
                return -1;
2547
0
            }
2548
2
            txn->put(recyc_txn_key, recyc_txn_val);
2549
            // Remove txn running key
2550
2
            txn->remove(k);
2551
2
            err = txn->commit();
2552
2
            if (err != TxnErrorCode::TXN_OK) {
2553
0
                LOG_WARNING("failed to commit txn err={}", err)
2554
0
                        .tag("key", hex(k))
2555
0
                        .tag("db_id", db_id)
2556
0
                        .tag("txn_id", txn_id);
2557
0
                return -1;
2558
0
            }
2559
2
            ++num_abort;
2560
2
        }
2561
2562
2
        return 0;
2563
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2465
4
                                  this](std::string_view k, std::string_view v) -> int {
2466
4
        ++num_scanned;
2467
2468
4
        std::unique_ptr<Transaction> txn;
2469
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2470
4
        if (err != TxnErrorCode::TXN_OK) {
2471
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2472
0
            return -1;
2473
0
        }
2474
4
        std::string_view k1 = k;
2475
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2476
4
        k1.remove_prefix(1); // Remove key space
2477
4
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2478
4
        if (decode_key(&k1, &out) != 0) {
2479
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2480
0
            return -1;
2481
0
        }
2482
4
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2483
4
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2484
4
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2485
        // Update txn_info
2486
4
        std::string txn_inf_key, txn_inf_val;
2487
4
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2488
4
        err = txn->get(txn_inf_key, &txn_inf_val);
2489
4
        if (err != TxnErrorCode::TXN_OK) {
2490
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2491
0
            return -1;
2492
0
        }
2493
4
        TxnInfoPB txn_info;
2494
4
        if (!txn_info.ParseFromString(txn_inf_val)) {
2495
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2496
0
            return -1;
2497
0
        }
2498
2499
4
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2500
4
            txn.reset();
2501
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2502
4
            std::shared_ptr<TxnLazyCommitTask> task =
2503
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2504
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2505
4
            if (ret.first != MetaServiceCode::OK) {
2506
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2507
0
                             << "msg=" << ret.second;
2508
0
                return -1;
2509
0
            }
2510
4
            ++num_advance;
2511
4
            return 0;
2512
4
        } else {
2513
0
            TxnRunningPB txn_running_pb;
2514
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2515
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2516
0
                return -1;
2517
0
            }
2518
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2519
0
                return 0;
2520
0
            }
2521
0
            ++num_timeout;
2522
2523
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2524
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2525
0
            txn_info.set_finish_time(current_time);
2526
0
            txn_info.set_reason("timeout");
2527
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2528
0
            txn_inf_val.clear();
2529
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2530
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2531
0
                return -1;
2532
0
            }
2533
0
            txn->put(txn_inf_key, txn_inf_val);
2534
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2535
            // Put recycle txn key
2536
0
            std::string recyc_txn_key, recyc_txn_val;
2537
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2538
0
            RecycleTxnPB recycle_txn_pb;
2539
0
            recycle_txn_pb.set_creation_time(current_time);
2540
0
            recycle_txn_pb.set_label(txn_info.label());
2541
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2542
0
                LOG_WARNING("failed to serialize txn recycle info")
2543
0
                        .tag("key", hex(k))
2544
0
                        .tag("db_id", db_id)
2545
0
                        .tag("txn_id", txn_id);
2546
0
                return -1;
2547
0
            }
2548
0
            txn->put(recyc_txn_key, recyc_txn_val);
2549
            // Remove txn running key
2550
0
            txn->remove(k);
2551
0
            err = txn->commit();
2552
0
            if (err != TxnErrorCode::TXN_OK) {
2553
0
                LOG_WARNING("failed to commit txn err={}", err)
2554
0
                        .tag("key", hex(k))
2555
0
                        .tag("db_id", db_id)
2556
0
                        .tag("txn_id", txn_id);
2557
0
                return -1;
2558
0
            }
2559
0
            ++num_abort;
2560
0
        }
2561
2562
0
        return 0;
2563
4
    };
2564
2565
20
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
2566
20
                            std::move(handle_txn_running_kv));
2567
20
}
2568
2569
19
int InstanceRecycler::recycle_expired_txn_label() {
2570
19
    const std::string task_name = "recycle_expired_txn_label";
2571
19
    int64_t num_scanned = 0;
2572
19
    int64_t num_expired = 0;
2573
19
    int64_t num_recycled = 0;
2574
19
    int ret = 0;
2575
2576
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
2577
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
2578
19
    std::string begin_recycle_txn_key;
2579
19
    std::string end_recycle_txn_key;
2580
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
2581
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
2582
19
    std::vector<std::string> recycle_txn_info_keys;
2583
2584
19
    LOG_INFO("begin to recycle expired txn").tag("instance_id", instance_id_);
2585
2586
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2587
19
    register_recycle_task(task_name, start_time);
2588
19
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2589
19
        unregister_recycle_task(task_name);
2590
19
        int64_t cost =
2591
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2592
19
        LOG_INFO("end to recycle expired txn, cost={}s", cost)
2593
19
                .tag("instance_id", instance_id_)
2594
19
                .tag("num_scanned", num_scanned)
2595
19
                .tag("num_expired", num_expired)
2596
19
                .tag("num_recycled", num_recycled);
2597
19
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEPi
Line
Count
Source
2588
16
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2589
16
        unregister_recycle_task(task_name);
2590
16
        int64_t cost =
2591
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2592
16
        LOG_INFO("end to recycle expired txn, cost={}s", cost)
2593
16
                .tag("instance_id", instance_id_)
2594
16
                .tag("num_scanned", num_scanned)
2595
16
                .tag("num_expired", num_expired)
2596
16
                .tag("num_recycled", num_recycled);
2597
16
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEPi
Line
Count
Source
2588
3
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2589
3
        unregister_recycle_task(task_name);
2590
3
        int64_t cost =
2591
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2592
3
        LOG_INFO("end to recycle expired txn, cost={}s", cost)
2593
3
                .tag("instance_id", instance_id_)
2594
3
                .tag("num_scanned", num_scanned)
2595
3
                .tag("num_expired", num_expired)
2596
3
                .tag("num_recycled", num_recycled);
2597
3
    });
2598
2599
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2600
30.0k
    auto calc_expiration = [&earlest_ts, this](const RecycleTxnPB& recycle_txn_pb) {
2601
30.0k
        int64_t final_expiration =
2602
30.0k
                recycle_txn_pb.creation_time() + config::label_keep_max_second * 1000L;
2603
30.0k
        if (earlest_ts > final_expiration / 1000) {
2604
6
            earlest_ts = final_expiration / 1000;
2605
6
            g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, earlest_ts);
2606
6
        }
2607
30.0k
        return final_expiration;
2608
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNS0_12RecycleTxnPBE
Line
Count
Source
2600
30.0k
    auto calc_expiration = [&earlest_ts, this](const RecycleTxnPB& recycle_txn_pb) {
2601
30.0k
        int64_t final_expiration =
2602
30.0k
                recycle_txn_pb.creation_time() + config::label_keep_max_second * 1000L;
2603
30.0k
        if (earlest_ts > final_expiration / 1000) {
2604
6
            earlest_ts = final_expiration / 1000;
2605
6
            g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, earlest_ts);
2606
6
        }
2607
30.0k
        return final_expiration;
2608
30.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNS0_12RecycleTxnPBE
2609
2610
19
    SyncExecutor<int> concurrent_delete_executor(
2611
19
            _thread_pool_group.s3_producer_pool,
2612
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
2613
23.0k
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clERKi
Line
Count
Source
2613
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clERKi
Line
Count
Source
2613
3
            [](const int& ret) { return ret != 0; });
2614
2615
19
    int64_t current_time_ms =
2616
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2617
2618
30.0k
    auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int {
2619
30.0k
        ++num_scanned;
2620
30.0k
        RecycleTxnPB recycle_txn_pb;
2621
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2622
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2623
0
            return -1;
2624
0
        }
2625
30.0k
        if ((config::force_immediate_recycle) ||
2626
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2627
30.0k
            (calc_expiration(recycle_txn_pb) <= current_time_ms)) {
2628
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2629
23.0k
            num_expired++;
2630
23.0k
            recycle_txn_info_keys.emplace_back(k);
2631
23.0k
        }
2632
30.0k
        return 0;
2633
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2618
30.0k
    auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int {
2619
30.0k
        ++num_scanned;
2620
30.0k
        RecycleTxnPB recycle_txn_pb;
2621
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2622
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2623
0
            return -1;
2624
0
        }
2625
30.0k
        if ((config::force_immediate_recycle) ||
2626
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2627
30.0k
            (calc_expiration(recycle_txn_pb) <= current_time_ms)) {
2628
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2629
23.0k
            num_expired++;
2630
23.0k
            recycle_txn_info_keys.emplace_back(k);
2631
23.0k
        }
2632
30.0k
        return 0;
2633
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2618
3
    auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int {
2619
3
        ++num_scanned;
2620
3
        RecycleTxnPB recycle_txn_pb;
2621
3
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2622
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2623
0
            return -1;
2624
0
        }
2625
3
        if ((config::force_immediate_recycle) ||
2626
3
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2627
3
            (calc_expiration(recycle_txn_pb) <= current_time_ms)) {
2628
3
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2629
3
            num_expired++;
2630
3
            recycle_txn_info_keys.emplace_back(k);
2631
3
        }
2632
3
        return 0;
2633
3
    };
2634
2635
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2636
23.0k
        std::string_view k1 = k;
2637
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2638
23.0k
        k1.remove_prefix(1); // Remove key space
2639
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2640
23.0k
        int ret = decode_key(&k1, &out);
2641
23.0k
        if (ret != 0) {
2642
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2643
0
            return -1;
2644
0
        }
2645
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2646
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2647
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2648
23.0k
        std::unique_ptr<Transaction> txn;
2649
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2650
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2651
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2652
0
            return -1;
2653
0
        }
2654
        // Remove txn index kv
2655
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
2656
23.0k
        txn->remove(index_key);
2657
        // Remove txn info kv
2658
23.0k
        std::string info_key, info_val;
2659
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2660
23.0k
        err = txn->get(info_key, &info_val);
2661
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2662
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2663
0
            return -1;
2664
0
        }
2665
23.0k
        TxnInfoPB txn_info;
2666
23.0k
        if (!txn_info.ParseFromString(info_val)) {
2667
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2668
0
            return -1;
2669
0
        }
2670
23.0k
        txn->remove(info_key);
2671
        // Remove sub txn index kvs
2672
23.0k
        std::vector<std::string> sub_txn_index_keys;
2673
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2674
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2675
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
2676
22.9k
        }
2677
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2678
22.9k
            txn->remove(sub_txn_index_key);
2679
22.9k
        }
2680
        // Update txn label
2681
23.0k
        std::string label_key, label_val;
2682
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2683
23.0k
        err = txn->get(label_key, &label_val);
2684
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2685
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2686
0
                         << " err=" << err;
2687
0
            return -1;
2688
0
        }
2689
23.0k
        TxnLabelPB txn_label;
2690
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2691
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2692
0
            return -1;
2693
0
        }
2694
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2695
23.0k
        if (it != txn_label.txn_ids().end()) {
2696
23.0k
            txn_label.mutable_txn_ids()->erase(it);
2697
23.0k
        }
2698
23.0k
        if (txn_label.txn_ids().empty()) {
2699
23.0k
            txn->remove(label_key);
2700
23.0k
        } else {
2701
0
            if (!txn_label.SerializeToString(&label_val)) {
2702
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2703
0
                return -1;
2704
0
            }
2705
0
            txn->atomic_set_ver_value(label_key, label_val);
2706
0
        }
2707
        // Remove recycle txn kv
2708
23.0k
        txn->remove(k);
2709
23.0k
        err = txn->commit();
2710
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2711
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
2712
0
            return -1;
2713
0
        }
2714
23.0k
        ++num_recycled;
2715
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
2716
23.0k
        return 0;
2717
23.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_5clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
2635
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2636
23.0k
        std::string_view k1 = k;
2637
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2638
23.0k
        k1.remove_prefix(1); // Remove key space
2639
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2640
23.0k
        int ret = decode_key(&k1, &out);
2641
23.0k
        if (ret != 0) {
2642
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2643
0
            return -1;
2644
0
        }
2645
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2646
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2647
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2648
23.0k
        std::unique_ptr<Transaction> txn;
2649
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2650
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2651
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2652
0
            return -1;
2653
0
        }
2654
        // Remove txn index kv
2655
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
2656
23.0k
        txn->remove(index_key);
2657
        // Remove txn info kv
2658
23.0k
        std::string info_key, info_val;
2659
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2660
23.0k
        err = txn->get(info_key, &info_val);
2661
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2662
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2663
0
            return -1;
2664
0
        }
2665
23.0k
        TxnInfoPB txn_info;
2666
23.0k
        if (!txn_info.ParseFromString(info_val)) {
2667
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2668
0
            return -1;
2669
0
        }
2670
23.0k
        txn->remove(info_key);
2671
        // Remove sub txn index kvs
2672
23.0k
        std::vector<std::string> sub_txn_index_keys;
2673
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2674
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2675
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
2676
22.9k
        }
2677
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2678
22.9k
            txn->remove(sub_txn_index_key);
2679
22.9k
        }
2680
        // Update txn label
2681
23.0k
        std::string label_key, label_val;
2682
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2683
23.0k
        err = txn->get(label_key, &label_val);
2684
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2685
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2686
0
                         << " err=" << err;
2687
0
            return -1;
2688
0
        }
2689
23.0k
        TxnLabelPB txn_label;
2690
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2691
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2692
0
            return -1;
2693
0
        }
2694
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2695
23.0k
        if (it != txn_label.txn_ids().end()) {
2696
23.0k
            txn_label.mutable_txn_ids()->erase(it);
2697
23.0k
        }
2698
23.0k
        if (txn_label.txn_ids().empty()) {
2699
23.0k
            txn->remove(label_key);
2700
23.0k
        } else {
2701
0
            if (!txn_label.SerializeToString(&label_val)) {
2702
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2703
0
                return -1;
2704
0
            }
2705
0
            txn->atomic_set_ver_value(label_key, label_val);
2706
0
        }
2707
        // Remove recycle txn kv
2708
23.0k
        txn->remove(k);
2709
23.0k
        err = txn->commit();
2710
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2711
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
2712
0
            return -1;
2713
0
        }
2714
23.0k
        ++num_recycled;
2715
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
2716
23.0k
        return 0;
2717
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_5clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
2635
3
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2636
3
        std::string_view k1 = k;
2637
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2638
3
        k1.remove_prefix(1); // Remove key space
2639
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2640
3
        int ret = decode_key(&k1, &out);
2641
3
        if (ret != 0) {
2642
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2643
0
            return -1;
2644
0
        }
2645
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2646
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2647
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2648
3
        std::unique_ptr<Transaction> txn;
2649
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2650
3
        if (err != TxnErrorCode::TXN_OK) {
2651
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2652
0
            return -1;
2653
0
        }
2654
        // Remove txn index kv
2655
3
        auto index_key = txn_index_key({instance_id_, txn_id});
2656
3
        txn->remove(index_key);
2657
        // Remove txn info kv
2658
3
        std::string info_key, info_val;
2659
3
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2660
3
        err = txn->get(info_key, &info_val);
2661
3
        if (err != TxnErrorCode::TXN_OK) {
2662
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2663
0
            return -1;
2664
0
        }
2665
3
        TxnInfoPB txn_info;
2666
3
        if (!txn_info.ParseFromString(info_val)) {
2667
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2668
0
            return -1;
2669
0
        }
2670
3
        txn->remove(info_key);
2671
        // Remove sub txn index kvs
2672
3
        std::vector<std::string> sub_txn_index_keys;
2673
3
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2674
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2675
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
2676
0
        }
2677
3
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2678
0
            txn->remove(sub_txn_index_key);
2679
0
        }
2680
        // Update txn label
2681
3
        std::string label_key, label_val;
2682
3
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2683
3
        err = txn->get(label_key, &label_val);
2684
3
        if (err != TxnErrorCode::TXN_OK) {
2685
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2686
0
                         << " err=" << err;
2687
0
            return -1;
2688
0
        }
2689
3
        TxnLabelPB txn_label;
2690
3
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2691
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2692
0
            return -1;
2693
0
        }
2694
3
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2695
3
        if (it != txn_label.txn_ids().end()) {
2696
3
            txn_label.mutable_txn_ids()->erase(it);
2697
3
        }
2698
3
        if (txn_label.txn_ids().empty()) {
2699
3
            txn->remove(label_key);
2700
3
        } else {
2701
0
            if (!txn_label.SerializeToString(&label_val)) {
2702
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2703
0
                return -1;
2704
0
            }
2705
0
            txn->atomic_set_ver_value(label_key, label_val);
2706
0
        }
2707
        // Remove recycle txn kv
2708
3
        txn->remove(k);
2709
3
        err = txn->commit();
2710
3
        if (err != TxnErrorCode::TXN_OK) {
2711
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
2712
0
            return -1;
2713
0
        }
2714
3
        ++num_recycled;
2715
3
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
2716
3
        return 0;
2717
3
    };
2718
2719
19
    auto loop_done = [&]() -> int {
2720
10
        std::unique_ptr<int, std::function<void(int*)>> defer(
2721
10
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlPiE_clES3_
Line
Count
Source
2721
7
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlPiE_clES3_
Line
Count
Source
2721
3
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
2722
10
        TEST_SYNC_POINT_CALLBACK(
2723
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
2724
10
                &recycle_txn_info_keys);
2725
23.0k
        for (const auto& k : recycle_txn_info_keys) {
2726
23.0k
            concurrent_delete_executor.add([&]() {
2727
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
2728
0
                    LOG_WARNING("failed to delete recycle txn kv")
2729
0
                            .tag("instance id", instance_id_)
2730
0
                            .tag("key", hex(k));
2731
0
                    return -1;
2732
0
                }
2733
23.0k
                return 0;
2734
23.0k
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlvE_clEv
Line
Count
Source
2726
23.0k
            concurrent_delete_executor.add([&]() {
2727
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
2728
0
                    LOG_WARNING("failed to delete recycle txn kv")
2729
0
                            .tag("instance id", instance_id_)
2730
0
                            .tag("key", hex(k));
2731
0
                    return -1;
2732
0
                }
2733
23.0k
                return 0;
2734
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlvE_clEv
Line
Count
Source
2726
3
            concurrent_delete_executor.add([&]() {
2727
3
                if (delete_recycle_txn_kv(k) != 0) {
2728
0
                    LOG_WARNING("failed to delete recycle txn kv")
2729
0
                            .tag("instance id", instance_id_)
2730
0
                            .tag("key", hex(k));
2731
0
                    return -1;
2732
0
                }
2733
3
                return 0;
2734
3
            });
2735
23.0k
        }
2736
10
        bool finished = true;
2737
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2738
23.0k
        for (int r : rets) {
2739
23.0k
            if (r != 0) {
2740
0
                ret = -1;
2741
0
            }
2742
23.0k
        }
2743
2744
10
        ret = finished ? ret : -1;
2745
2746
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
2747
2748
10
        if (ret != 0) {
2749
2
            LOG_WARNING("recycle txn kv ret!=0")
2750
2
                    .tag("finished", finished)
2751
2
                    .tag("ret", ret)
2752
2
                    .tag("instance_id", instance_id_);
2753
2
            return ret;
2754
2
        }
2755
8
        return ret;
2756
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEv
Line
Count
Source
2719
7
    auto loop_done = [&]() -> int {
2720
7
        std::unique_ptr<int, std::function<void(int*)>> defer(
2721
7
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
2722
7
        TEST_SYNC_POINT_CALLBACK(
2723
7
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
2724
7
                &recycle_txn_info_keys);
2725
23.0k
        for (const auto& k : recycle_txn_info_keys) {
2726
23.0k
            concurrent_delete_executor.add([&]() {
2727
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
2728
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
2729
23.0k
                            .tag("instance id", instance_id_)
2730
23.0k
                            .tag("key", hex(k));
2731
23.0k
                    return -1;
2732
23.0k
                }
2733
23.0k
                return 0;
2734
23.0k
            });
2735
23.0k
        }
2736
7
        bool finished = true;
2737
7
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2738
23.0k
        for (int r : rets) {
2739
23.0k
            if (r != 0) {
2740
0
                ret = -1;
2741
0
            }
2742
23.0k
        }
2743
2744
7
        ret = finished ? ret : -1;
2745
2746
7
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
2747
2748
7
        if (ret != 0) {
2749
2
            LOG_WARNING("recycle txn kv ret!=0")
2750
2
                    .tag("finished", finished)
2751
2
                    .tag("ret", ret)
2752
2
                    .tag("instance_id", instance_id_);
2753
2
            return ret;
2754
2
        }
2755
5
        return ret;
2756
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEv
Line
Count
Source
2719
3
    auto loop_done = [&]() -> int {
2720
3
        std::unique_ptr<int, std::function<void(int*)>> defer(
2721
3
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
2722
3
        TEST_SYNC_POINT_CALLBACK(
2723
3
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
2724
3
                &recycle_txn_info_keys);
2725
3
        for (const auto& k : recycle_txn_info_keys) {
2726
3
            concurrent_delete_executor.add([&]() {
2727
3
                if (delete_recycle_txn_kv(k) != 0) {
2728
3
                    LOG_WARNING("failed to delete recycle txn kv")
2729
3
                            .tag("instance id", instance_id_)
2730
3
                            .tag("key", hex(k));
2731
3
                    return -1;
2732
3
                }
2733
3
                return 0;
2734
3
            });
2735
3
        }
2736
3
        bool finished = true;
2737
3
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2738
3
        for (int r : rets) {
2739
3
            if (r != 0) {
2740
0
                ret = -1;
2741
0
            }
2742
3
        }
2743
2744
3
        ret = finished ? ret : -1;
2745
2746
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
2747
2748
3
        if (ret != 0) {
2749
0
            LOG_WARNING("recycle txn kv ret!=0")
2750
0
                    .tag("finished", finished)
2751
0
                    .tag("ret", ret)
2752
0
                    .tag("instance_id", instance_id_);
2753
0
            return ret;
2754
0
        }
2755
3
        return ret;
2756
3
    };
2757
2758
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
2759
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
2760
19
}
2761
2762
struct CopyJobIdTuple {
2763
    std::string instance_id;
2764
    std::string stage_id;
2765
    long table_id;
2766
    std::string copy_id;
2767
    std::string stage_path;
2768
};
2769
struct BatchObjStoreAccessor {
2770
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
2771
                          TxnKv* txn_kv)
2772
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
2773
3
    ~BatchObjStoreAccessor() {
2774
3
        if (!paths_.empty()) {
2775
3
            consume();
2776
3
        }
2777
3
    }
2778
2779
    /**
2780
    * To implicitely do batch work and submit the batch delete task to s3
2781
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
2782
    *
2783
    * @param copy_job The protubuf struct consists of the copy job files.
2784
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
2785
    *            it would last until we finish the delete task, here we need pass one string value
2786
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
2787
    */
2788
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
2789
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
2790
5
        auto& file_keys = copy_file_keys_[key];
2791
5
        file_keys.log_trace =
2792
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
2793
5
                            instance_id, stage_id, table_id, copy_id, path);
2794
5
        std::string_view log_trace = file_keys.log_trace;
2795
2.03k
        for (const auto& file : copy_job.object_files()) {
2796
2.03k
            auto relative_path = file.relative_path();
2797
2.03k
            paths_.push_back(relative_path);
2798
2.03k
            file_keys.keys.push_back(copy_file_key(
2799
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
2800
2.03k
            LOG_INFO(log_trace)
2801
2.03k
                    .tag("relative_path", relative_path)
2802
2.03k
                    .tag("batch_count", batch_count_);
2803
2.03k
        }
2804
5
        LOG_INFO(log_trace)
2805
5
                .tag("objects_num", copy_job.object_files().size())
2806
5
                .tag("batch_count", batch_count_);
2807
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
2808
        // recommend using delete objects when objects num is less than 10)
2809
5
        if (paths_.size() < 1000) {
2810
3
            return;
2811
3
        }
2812
2
        consume();
2813
2
    }
2814
2815
private:
2816
5
    void consume() {
2817
5
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [this](int*) {
2818
5
            paths_.clear();
2819
5
            copy_file_keys_.clear();
2820
5
            batch_count_++;
2821
5
        });
2822
5
        LOG_INFO("begin to delete {} internal stage objects in batch {}", paths_.size(),
2823
5
                 batch_count_);
2824
5
        StopWatch sw;
2825
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
2826
5
        if (0 != accessor_->delete_files(paths_)) {
2827
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
2828
2
                        paths_.size(), batch_count_, sw.elapsed_us());
2829
2
            return;
2830
2
        }
2831
3
        LOG_INFO("succeed to delete {} internal stage objects in batch {} and it takes {} us",
2832
3
                 paths_.size(), batch_count_, sw.elapsed_us());
2833
        // delete fdb's keys
2834
3
        for (auto& file_keys : copy_file_keys_) {
2835
3
            auto& [log_trace, keys] = file_keys.second;
2836
3
            std::unique_ptr<Transaction> txn;
2837
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
2838
0
                LOG(WARNING) << "failed to create txn";
2839
0
                continue;
2840
0
            }
2841
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
2842
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
2843
            // limited, should not cause the txn commit failed.
2844
1.02k
            for (const auto& key : keys) {
2845
1.02k
                txn->remove(key);
2846
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
2847
1.02k
            }
2848
3
            txn->remove(file_keys.first);
2849
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
2850
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
2851
0
                continue;
2852
0
            }
2853
3
        }
2854
3
    }
2855
    std::shared_ptr<StorageVaultAccessor> accessor_;
2856
    // the path of the s3 files to be deleted
2857
    std::vector<std::string> paths_;
2858
    struct CopyFiles {
2859
        std::string log_trace;
2860
        std::vector<std::string> keys;
2861
    };
2862
    // pair<std::string, std::vector<std::string>>
2863
    // first: instance_id_ stage_id table_id query_id
2864
    // second: keys to be deleted
2865
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
2866
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
2867
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
2868
    // which can together uniquely identifies different tasks for tracing log
2869
    uint64_t& batch_count_;
2870
    TxnKv* txn_kv_;
2871
};
2872
2873
13
int InstanceRecycler::recycle_copy_jobs() {
2874
13
    int64_t num_scanned = 0;
2875
13
    int64_t num_finished = 0;
2876
13
    int64_t num_expired = 0;
2877
13
    int64_t num_recycled = 0;
2878
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
2879
13
    uint64_t batch_count = 0;
2880
13
    const std::string task_name = "recycle_copy_jobs";
2881
2882
13
    LOG_INFO("begin to recycle copy jobs").tag("instance_id", instance_id_);
2883
2884
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2885
13
    register_recycle_task(task_name, start_time);
2886
2887
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2888
13
        unregister_recycle_task(task_name);
2889
13
        int64_t cost =
2890
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2891
13
        LOG_INFO("recycle copy jobs finished, cost={}s", cost)
2892
13
                .tag("instance_id", instance_id_)
2893
13
                .tag("num_scanned", num_scanned)
2894
13
                .tag("num_finished", num_finished)
2895
13
                .tag("num_expired", num_expired)
2896
13
                .tag("num_recycled", num_recycled);
2897
13
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEPi
Line
Count
Source
2887
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2888
13
        unregister_recycle_task(task_name);
2889
13
        int64_t cost =
2890
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2891
13
        LOG_INFO("recycle copy jobs finished, cost={}s", cost)
2892
13
                .tag("instance_id", instance_id_)
2893
13
                .tag("num_scanned", num_scanned)
2894
13
                .tag("num_finished", num_finished)
2895
13
                .tag("num_expired", num_expired)
2896
13
                .tag("num_recycled", num_recycled);
2897
13
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEPi
2898
2899
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
2900
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
2901
13
    std::string key0;
2902
13
    std::string key1;
2903
13
    copy_job_key(key_info0, &key0);
2904
13
    copy_job_key(key_info1, &key1);
2905
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
2906
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
2907
13
                         &batch_count, &stage_accessor_map, &task_name,
2908
16
                         this](std::string_view k, std::string_view v) -> int {
2909
16
        ++num_scanned;
2910
16
        CopyJobPB copy_job;
2911
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
2912
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
2913
0
            return -1;
2914
0
        }
2915
2916
        // decode copy job key
2917
16
        auto k1 = k;
2918
16
        k1.remove_prefix(1);
2919
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2920
16
        decode_key(&k1, &out);
2921
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
2922
        // -> CopyJobPB
2923
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
2924
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
2925
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
2926
2927
16
        bool check_storage = true;
2928
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
2929
12
            ++num_finished;
2930
2931
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
2932
7
                auto it = stage_accessor_map.find(stage_id);
2933
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
2934
7
                std::string_view path;
2935
7
                if (it != stage_accessor_map.end()) {
2936
2
                    accessor = it->second;
2937
5
                } else {
2938
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
2939
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
2940
5
                                                      &inner_accessor);
2941
5
                    if (ret < 0) { // error
2942
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
2943
0
                        return -1;
2944
5
                    } else if (ret == 0) {
2945
3
                        path = inner_accessor->uri();
2946
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
2947
3
                                inner_accessor, batch_count, txn_kv_.get());
2948
3
                        stage_accessor_map.emplace(stage_id, accessor);
2949
3
                    } else { // stage not found, skip check storage
2950
2
                        check_storage = false;
2951
2
                    }
2952
5
                }
2953
7
                if (check_storage) {
2954
                    // TODO delete objects with key and etag is not supported
2955
5
                    accessor->add(std::move(copy_job), std::string(k),
2956
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
2957
5
                    return 0;
2958
5
                }
2959
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
2960
5
                int64_t current_time =
2961
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2962
5
                if (copy_job.finish_time_ms() > 0) {
2963
2
                    if (!config::force_immediate_recycle &&
2964
2
                        current_time < copy_job.finish_time_ms() +
2965
2
                                               config::copy_job_max_retention_second * 1000) {
2966
1
                        return 0;
2967
1
                    }
2968
3
                } else {
2969
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
2970
3
                    if (!config::force_immediate_recycle &&
2971
3
                        current_time < copy_job.start_time_ms() +
2972
3
                                               config::copy_job_max_retention_second * 1000) {
2973
1
                        return 0;
2974
1
                    }
2975
3
                }
2976
5
            }
2977
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
2978
4
            int64_t current_time =
2979
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2980
            // if copy job is timeout: delete all copy file kvs and copy job kv
2981
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
2982
2
                return 0;
2983
2
            }
2984
2
            ++num_expired;
2985
2
        }
2986
2987
        // delete all copy files
2988
7
        std::vector<std::string> copy_file_keys;
2989
70
        for (auto& file : copy_job.object_files()) {
2990
70
            copy_file_keys.push_back(copy_file_key(
2991
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
2992
70
        }
2993
7
        std::unique_ptr<Transaction> txn;
2994
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2995
0
            LOG(WARNING) << "failed to create txn";
2996
0
            return -1;
2997
0
        }
2998
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
2999
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
3000
        // limited, should not cause the txn commit failed.
3001
70
        for (const auto& key : copy_file_keys) {
3002
70
            txn->remove(key);
3003
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
3004
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
3005
70
                      << ", query_id=" << copy_id;
3006
70
        }
3007
7
        txn->remove(k);
3008
7
        TxnErrorCode err = txn->commit();
3009
7
        if (err != TxnErrorCode::TXN_OK) {
3010
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
3011
0
            return -1;
3012
0
        }
3013
3014
7
        ++num_recycled;
3015
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3016
7
        return 0;
3017
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2908
16
                         this](std::string_view k, std::string_view v) -> int {
2909
16
        ++num_scanned;
2910
16
        CopyJobPB copy_job;
2911
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
2912
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
2913
0
            return -1;
2914
0
        }
2915
2916
        // decode copy job key
2917
16
        auto k1 = k;
2918
16
        k1.remove_prefix(1);
2919
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2920
16
        decode_key(&k1, &out);
2921
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
2922
        // -> CopyJobPB
2923
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
2924
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
2925
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
2926
2927
16
        bool check_storage = true;
2928
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
2929
12
            ++num_finished;
2930
2931
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
2932
7
                auto it = stage_accessor_map.find(stage_id);
2933
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
2934
7
                std::string_view path;
2935
7
                if (it != stage_accessor_map.end()) {
2936
2
                    accessor = it->second;
2937
5
                } else {
2938
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
2939
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
2940
5
                                                      &inner_accessor);
2941
5
                    if (ret < 0) { // error
2942
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
2943
0
                        return -1;
2944
5
                    } else if (ret == 0) {
2945
3
                        path = inner_accessor->uri();
2946
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
2947
3
                                inner_accessor, batch_count, txn_kv_.get());
2948
3
                        stage_accessor_map.emplace(stage_id, accessor);
2949
3
                    } else { // stage not found, skip check storage
2950
2
                        check_storage = false;
2951
2
                    }
2952
5
                }
2953
7
                if (check_storage) {
2954
                    // TODO delete objects with key and etag is not supported
2955
5
                    accessor->add(std::move(copy_job), std::string(k),
2956
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
2957
5
                    return 0;
2958
5
                }
2959
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
2960
5
                int64_t current_time =
2961
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2962
5
                if (copy_job.finish_time_ms() > 0) {
2963
2
                    if (!config::force_immediate_recycle &&
2964
2
                        current_time < copy_job.finish_time_ms() +
2965
2
                                               config::copy_job_max_retention_second * 1000) {
2966
1
                        return 0;
2967
1
                    }
2968
3
                } else {
2969
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
2970
3
                    if (!config::force_immediate_recycle &&
2971
3
                        current_time < copy_job.start_time_ms() +
2972
3
                                               config::copy_job_max_retention_second * 1000) {
2973
1
                        return 0;
2974
1
                    }
2975
3
                }
2976
5
            }
2977
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
2978
4
            int64_t current_time =
2979
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2980
            // if copy job is timeout: delete all copy file kvs and copy job kv
2981
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
2982
2
                return 0;
2983
2
            }
2984
2
            ++num_expired;
2985
2
        }
2986
2987
        // delete all copy files
2988
7
        std::vector<std::string> copy_file_keys;
2989
70
        for (auto& file : copy_job.object_files()) {
2990
70
            copy_file_keys.push_back(copy_file_key(
2991
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
2992
70
        }
2993
7
        std::unique_ptr<Transaction> txn;
2994
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2995
0
            LOG(WARNING) << "failed to create txn";
2996
0
            return -1;
2997
0
        }
2998
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
2999
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
3000
        // limited, should not cause the txn commit failed.
3001
70
        for (const auto& key : copy_file_keys) {
3002
70
            txn->remove(key);
3003
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
3004
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
3005
70
                      << ", query_id=" << copy_id;
3006
70
        }
3007
7
        txn->remove(k);
3008
7
        TxnErrorCode err = txn->commit();
3009
7
        if (err != TxnErrorCode::TXN_OK) {
3010
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
3011
0
            return -1;
3012
0
        }
3013
3014
7
        ++num_recycled;
3015
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3016
7
        return 0;
3017
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3018
3019
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
3020
13
}
3021
3022
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
3023
                                             const StagePB::StageType& stage_type,
3024
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
3025
5
#ifdef UNIT_TEST
3026
    // In unit test, external use the same accessor as the internal stage
3027
5
    auto it = accessor_map_.find(stage_id);
3028
5
    if (it != accessor_map_.end()) {
3029
3
        *accessor = it->second;
3030
3
    } else {
3031
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
3032
2
        return 1;
3033
2
    }
3034
#else
3035
    // init s3 accessor and add to accessor map
3036
    auto stage_it =
3037
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
3038
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
3039
3040
    if (stage_it == instance_info_.stages().end()) {
3041
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
3042
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
3043
        return 1;
3044
    }
3045
3046
    const auto& object_store_info = stage_it->obj_info();
3047
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
3048
3049
    S3Conf s3_conf;
3050
    if (stage_type == StagePB::EXTERNAL) {
3051
        if (stage_access_type == StagePB::AKSK) {
3052
            auto conf = S3Conf::from_obj_store_info(object_store_info);
3053
            if (!conf) {
3054
                return -1;
3055
            }
3056
3057
            s3_conf = std::move(*conf);
3058
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
3059
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
3060
            if (!conf) {
3061
                return -1;
3062
            }
3063
3064
            s3_conf = std::move(*conf);
3065
            if (instance_info_.ram_user().has_encryption_info()) {
3066
                AkSkPair plain_ak_sk_pair;
3067
                int ret = decrypt_ak_sk_helper(
3068
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
3069
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
3070
                if (ret != 0) {
3071
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
3072
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
3073
                    return -1;
3074
                }
3075
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
3076
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
3077
            } else {
3078
                s3_conf.ak = instance_info_.ram_user().ak();
3079
                s3_conf.sk = instance_info_.ram_user().sk();
3080
            }
3081
        } else {
3082
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
3083
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
3084
            return -1;
3085
        }
3086
    } else if (stage_type == StagePB::INTERNAL) {
3087
        int idx = stoi(object_store_info.id());
3088
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3089
            LOG(WARNING) << "invalid idx: " << idx;
3090
            return -1;
3091
        }
3092
3093
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
3094
        auto conf = S3Conf::from_obj_store_info(old_obj);
3095
        if (!conf) {
3096
            return -1;
3097
        }
3098
3099
        s3_conf = std::move(*conf);
3100
        s3_conf.prefix = object_store_info.prefix();
3101
    } else {
3102
        LOG(WARNING) << "unknown stage type " << stage_type;
3103
        return -1;
3104
    }
3105
3106
    std::shared_ptr<S3Accessor> s3_accessor;
3107
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
3108
    if (ret != 0) {
3109
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
3110
        return -1;
3111
    }
3112
3113
    *accessor = std::move(s3_accessor);
3114
#endif
3115
3
    return 0;
3116
5
}
3117
3118
11
int InstanceRecycler::recycle_stage() {
3119
11
    int64_t num_scanned = 0;
3120
11
    int64_t num_recycled = 0;
3121
11
    const std::string task_name = "recycle_stage";
3122
3123
11
    LOG_INFO("begin to recycle stage").tag("instance_id", instance_id_);
3124
3125
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3126
11
    register_recycle_task(task_name, start_time);
3127
3128
11
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3129
11
        unregister_recycle_task(task_name);
3130
11
        int64_t cost =
3131
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3132
11
        LOG_INFO("recycle stage, cost={}s", cost)
3133
11
                .tag("instance_id", instance_id_)
3134
11
                .tag("num_scanned", num_scanned)
3135
11
                .tag("num_recycled", num_recycled);
3136
11
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEPi
Line
Count
Source
3128
11
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3129
11
        unregister_recycle_task(task_name);
3130
11
        int64_t cost =
3131
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3132
11
        LOG_INFO("recycle stage, cost={}s", cost)
3133
11
                .tag("instance_id", instance_id_)
3134
11
                .tag("num_scanned", num_scanned)
3135
11
                .tag("num_recycled", num_recycled);
3136
11
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEPi
3137
3138
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
3139
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
3140
11
    std::string key0 = recycle_stage_key(key_info0);
3141
11
    std::string key1 = recycle_stage_key(key_info1);
3142
3143
11
    std::vector<std::string_view> stage_keys;
3144
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, this](
3145
11
                                std::string_view k, std::string_view v) -> int {
3146
1
        ++num_scanned;
3147
1
        RecycleStagePB recycle_stage;
3148
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
3149
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
3150
0
            return -1;
3151
0
        }
3152
3153
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
3154
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3155
0
            LOG(WARNING) << "invalid idx: " << idx;
3156
0
            return -1;
3157
0
        }
3158
3159
1
        std::shared_ptr<StorageVaultAccessor> accessor;
3160
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
3161
1
                [&] {
3162
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
3163
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3164
1
                    if (!s3_conf) {
3165
1
                        return -1;
3166
1
                    }
3167
3168
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
3169
1
                    std::shared_ptr<S3Accessor> s3_accessor;
3170
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
3171
1
                    if (ret != 0) {
3172
1
                        return -1;
3173
1
                    }
3174
3175
1
                    accessor = std::move(s3_accessor);
3176
1
                    return 0;
3177
1
                }(),
3178
1
                "recycle_stage:get_accessor", &accessor);
3179
3180
1
        if (ret != 0) {
3181
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
3182
0
            return ret;
3183
0
        }
3184
3185
1
        LOG_INFO("begin to delete objects of dropped internal stage")
3186
1
                .tag("instance_id", instance_id_)
3187
1
                .tag("stage_id", recycle_stage.stage().stage_id())
3188
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
3189
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
3190
1
                .tag("obj_info_id", idx)
3191
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
3192
1
        ret = accessor->delete_all();
3193
1
        if (ret != 0) {
3194
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
3195
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
3196
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
3197
0
                         << ", ret=" << ret;
3198
0
            return -1;
3199
0
        }
3200
1
        ++num_recycled;
3201
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
3202
1
        stage_keys.push_back(k);
3203
1
        return 0;
3204
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3145
1
                                std::string_view k, std::string_view v) -> int {
3146
1
        ++num_scanned;
3147
1
        RecycleStagePB recycle_stage;
3148
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
3149
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
3150
0
            return -1;
3151
0
        }
3152
3153
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
3154
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3155
0
            LOG(WARNING) << "invalid idx: " << idx;
3156
0
            return -1;
3157
0
        }
3158
3159
1
        std::shared_ptr<StorageVaultAccessor> accessor;
3160
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
3161
1
                [&] {
3162
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
3163
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3164
1
                    if (!s3_conf) {
3165
1
                        return -1;
3166
1
                    }
3167
3168
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
3169
1
                    std::shared_ptr<S3Accessor> s3_accessor;
3170
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
3171
1
                    if (ret != 0) {
3172
1
                        return -1;
3173
1
                    }
3174
3175
1
                    accessor = std::move(s3_accessor);
3176
1
                    return 0;
3177
1
                }(),
3178
1
                "recycle_stage:get_accessor", &accessor);
3179
3180
1
        if (ret != 0) {
3181
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
3182
0
            return ret;
3183
0
        }
3184
3185
1
        LOG_INFO("begin to delete objects of dropped internal stage")
3186
1
                .tag("instance_id", instance_id_)
3187
1
                .tag("stage_id", recycle_stage.stage().stage_id())
3188
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
3189
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
3190
1
                .tag("obj_info_id", idx)
3191
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
3192
1
        ret = accessor->delete_all();
3193
1
        if (ret != 0) {
3194
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
3195
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
3196
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
3197
0
                         << ", ret=" << ret;
3198
0
            return -1;
3199
0
        }
3200
1
        ++num_recycled;
3201
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
3202
1
        stage_keys.push_back(k);
3203
1
        return 0;
3204
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3205
3206
11
    auto loop_done = [&stage_keys, this]() -> int {
3207
1
        if (stage_keys.empty()) return 0;
3208
1
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
3209
1
                                                              [&](int*) { stage_keys.clear(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
3209
1
                                                              [&](int*) { stage_keys.clear(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEvENKUlPiE_clES3_
3210
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
3211
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
3212
0
            return -1;
3213
0
        }
3214
1
        return 0;
3215
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEv
Line
Count
Source
3206
1
    auto loop_done = [&stage_keys, this]() -> int {
3207
1
        if (stage_keys.empty()) return 0;
3208
1
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
3209
1
                                                              [&](int*) { stage_keys.clear(); });
3210
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
3211
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
3212
0
            return -1;
3213
0
        }
3214
1
        return 0;
3215
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEv
3216
3217
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
3218
11
}
3219
3220
10
int InstanceRecycler::recycle_expired_stage_objects() {
3221
10
    LOG_INFO("begin to recycle expired stage objects").tag("instance_id", instance_id_);
3222
3223
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3224
3225
10
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3226
10
        int64_t cost =
3227
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3228
10
        LOG_INFO("recycle expired stage objects, cost={}s", cost).tag("instance_id", instance_id_);
3229
10
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEPi
Line
Count
Source
3225
10
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3226
10
        int64_t cost =
3227
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3228
10
        LOG_INFO("recycle expired stage objects, cost={}s", cost).tag("instance_id", instance_id_);
3229
10
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEPi
3230
10
    int ret = 0;
3231
10
    for (const auto& stage : instance_info_.stages()) {
3232
0
        std::stringstream ss;
3233
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
3234
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
3235
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
3236
0
           << ", prefix=" << stage.obj_info().prefix();
3237
3238
0
        if (stopped()) break;
3239
0
        if (stage.type() == StagePB::EXTERNAL) {
3240
0
            continue;
3241
0
        }
3242
0
        int idx = stoi(stage.obj_info().id());
3243
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3244
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
3245
0
            continue;
3246
0
        }
3247
3248
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
3249
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3250
0
        if (!s3_conf) {
3251
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
3252
0
            continue;
3253
0
        }
3254
3255
0
        s3_conf->prefix = stage.obj_info().prefix();
3256
0
        std::shared_ptr<S3Accessor> accessor;
3257
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
3258
0
        if (ret1 != 0) {
3259
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
3260
0
            ret = -1;
3261
0
            continue;
3262
0
        }
3263
3264
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
3265
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
3266
0
            ret = -1;
3267
0
            continue;
3268
0
        }
3269
3270
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
3271
0
        int64_t expiration_time =
3272
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
3273
0
                config::internal_stage_objects_expire_time_second;
3274
0
        if (config::force_immediate_recycle) {
3275
0
            expiration_time = INT64_MAX;
3276
0
        }
3277
0
        ret1 = accessor->delete_all(expiration_time);
3278
0
        if (ret1 != 0) {
3279
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
3280
0
                         << ss.str();
3281
0
            ret = -1;
3282
0
            continue;
3283
0
        }
3284
0
    }
3285
10
    return ret;
3286
10
}
3287
3288
121
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
3289
121
    std::lock_guard lock(recycle_tasks_mutex);
3290
121
    running_recycle_tasks[task_name] = start_time;
3291
121
}
3292
3293
121
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
3294
121
    std::lock_guard lock(recycle_tasks_mutex);
3295
121
    DCHECK(running_recycle_tasks[task_name] > 0);
3296
121
    running_recycle_tasks.erase(task_name);
3297
121
}
3298
3299
21
bool InstanceRecycler::check_recycle_tasks() {
3300
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
3301
21
    {
3302
21
        std::lock_guard lock(recycle_tasks_mutex);
3303
21
        tmp_running_recycle_tasks = running_recycle_tasks;
3304
21
    }
3305
3306
21
    bool found = false;
3307
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3308
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
3309
20
        int64_t cost = now - start_time;
3310
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
3311
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
3312
20
                    .tag("instance_id", instance_id_)
3313
20
                    .tag("task", task_name);
3314
20
            found = true;
3315
20
        }
3316
20
    }
3317
3318
21
    return found;
3319
21
}
3320
3321
} // namespace doris::cloud