Coverage Report

Created: 2025-05-21 15:28

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/server.h>
21
#include <butil/endpoint.h>
22
#include <gen_cpp/cloud.pb.h>
23
#include <gen_cpp/olap_file.pb.h>
24
25
#include <atomic>
26
#include <chrono>
27
#include <cstddef>
28
#include <cstdint>
29
#include <deque>
30
#include <numeric>
31
#include <string>
32
#include <string_view>
33
34
#include "common/stopwatch.h"
35
#include "meta-service/meta_service.h"
36
#include "meta-service/meta_service_helper.h"
37
#include "meta-service/meta_service_schema.h"
38
#include "meta-service/txn_kv.h"
39
#include "meta-service/txn_kv_error.h"
40
#include "recycler/checker.h"
41
#include "recycler/hdfs_accessor.h"
42
#include "recycler/s3_accessor.h"
43
#include "recycler/storage_vault_accessor.h"
44
#ifdef UNIT_TEST
45
#include "../test/mock_accessor.h"
46
#endif
47
#include "common/bvars.h"
48
#include "common/config.h"
49
#include "common/encryption_util.h"
50
#include "common/logging.h"
51
#include "common/simple_thread_pool.h"
52
#include "common/util.h"
53
#include "cpp/sync_point.h"
54
#include "meta-service/keys.h"
55
#include "recycler/recycler_service.h"
56
#include "recycler/sync_executor.h"
57
#include "recycler/util.h"
58
59
namespace doris::cloud {
60
61
using namespace std::chrono;
62
63
// return 0 for success get a key, 1 for key not found, negative for error
64
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
65
0
    std::unique_ptr<Transaction> txn;
66
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
67
0
    if (err != TxnErrorCode::TXN_OK) {
68
0
        return -1;
69
0
    }
70
0
    switch (txn->get(key, &val, true)) {
71
0
    case TxnErrorCode::TXN_OK:
72
0
        return 0;
73
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
74
0
        return 1;
75
0
    default:
76
0
        return -1;
77
0
    };
78
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
79
80
// 0 for success, negative for error
81
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
82
182
                   std::unique_ptr<RangeGetIterator>& it) {
83
182
    std::unique_ptr<Transaction> txn;
84
182
    TxnErrorCode err = txn_kv->create_txn(&txn);
85
182
    if (err != TxnErrorCode::TXN_OK) {
86
0
        return -1;
87
0
    }
88
182
    switch (txn->get(begin, end, &it, true)) {
89
182
    case TxnErrorCode::TXN_OK:
90
182
        return 0;
91
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
92
0
        return 1;
93
0
    default:
94
0
        return -1;
95
182
    };
96
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
82
180
                   std::unique_ptr<RangeGetIterator>& it) {
83
180
    std::unique_ptr<Transaction> txn;
84
180
    TxnErrorCode err = txn_kv->create_txn(&txn);
85
180
    if (err != TxnErrorCode::TXN_OK) {
86
0
        return -1;
87
0
    }
88
180
    switch (txn->get(begin, end, &it, true)) {
89
180
    case TxnErrorCode::TXN_OK:
90
180
        return 0;
91
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
92
0
        return 1;
93
0
    default:
94
0
        return -1;
95
180
    };
96
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
82
2
                   std::unique_ptr<RangeGetIterator>& it) {
83
2
    std::unique_ptr<Transaction> txn;
84
2
    TxnErrorCode err = txn_kv->create_txn(&txn);
85
2
    if (err != TxnErrorCode::TXN_OK) {
86
0
        return -1;
87
0
    }
88
2
    switch (txn->get(begin, end, &it, true)) {
89
2
    case TxnErrorCode::TXN_OK:
90
2
        return 0;
91
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
92
0
        return 1;
93
0
    default:
94
0
        return -1;
95
2
    };
96
0
}
97
98
// return 0 for success otherwise error
99
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
100
6
    std::unique_ptr<Transaction> txn;
101
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
102
6
    if (err != TxnErrorCode::TXN_OK) {
103
0
        return -1;
104
0
    }
105
3.02k
    for (auto k : keys) {
106
3.02k
        txn->remove(k);
107
3.02k
    }
108
6
    switch (txn->commit()) {
109
6
    case TxnErrorCode::TXN_OK:
110
6
        return 0;
111
0
    case TxnErrorCode::TXN_CONFLICT:
112
0
        return -1;
113
0
    default:
114
0
        return -1;
115
6
    }
116
6
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
99
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
100
6
    std::unique_ptr<Transaction> txn;
101
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
102
6
    if (err != TxnErrorCode::TXN_OK) {
103
0
        return -1;
104
0
    }
105
3.02k
    for (auto k : keys) {
106
3.02k
        txn->remove(k);
107
3.02k
    }
108
6
    switch (txn->commit()) {
109
6
    case TxnErrorCode::TXN_OK:
110
6
        return 0;
111
0
    case TxnErrorCode::TXN_CONFLICT:
112
0
        return -1;
113
0
    default:
114
0
        return -1;
115
6
    }
116
6
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
117
118
// return 0 for success otherwise error
119
30
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
120
30
    std::unique_ptr<Transaction> txn;
121
30
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
30
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
4.00k
    for (auto& k : keys) {
126
4.00k
        txn->remove(k);
127
4.00k
    }
128
30
    switch (txn->commit()) {
129
30
    case TxnErrorCode::TXN_OK:
130
30
        return 0;
131
0
    case TxnErrorCode::TXN_CONFLICT:
132
0
        return -1;
133
0
    default:
134
0
        return -1;
135
30
    }
136
30
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
119
30
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
120
30
    std::unique_ptr<Transaction> txn;
121
30
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
30
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
4.00k
    for (auto& k : keys) {
126
4.00k
        txn->remove(k);
127
4.00k
    }
128
30
    switch (txn->commit()) {
129
30
    case TxnErrorCode::TXN_OK:
130
30
        return 0;
131
0
    case TxnErrorCode::TXN_CONFLICT:
132
0
        return -1;
133
0
    default:
134
0
        return -1;
135
30
    }
136
30
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
137
138
// return 0 for success otherwise error
139
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
140
0
                                       std::string_view end) {
141
0
    std::unique_ptr<Transaction> txn;
142
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
143
0
    if (err != TxnErrorCode::TXN_OK) {
144
0
        return -1;
145
0
    }
146
0
    txn->remove(begin, end);
147
0
    switch (txn->commit()) {
148
0
    case TxnErrorCode::TXN_OK:
149
0
        return 0;
150
0
    case TxnErrorCode::TXN_CONFLICT:
151
0
        return -1;
152
0
    default:
153
0
        return -1;
154
0
    }
155
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
156
157
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
158
                                      int64_t num_scanned, int64_t num_recycled,
159
27
                                      int64_t start_time) {
160
27
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
161
0
        int64_t cost =
162
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
163
0
        if (cost > config::recycle_task_threshold_seconds) {
164
0
            LOG_INFO("recycle task cost too much time cost={}s", cost)
165
0
                    .tag("instance_id", instance_id)
166
0
                    .tag("task", task_name)
167
0
                    .tag("num_scanned", num_scanned)
168
0
                    .tag("num_recycled", num_recycled);
169
0
        }
170
0
    }
171
27
    return;
172
27
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
159
27
                                      int64_t start_time) {
160
27
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
161
0
        int64_t cost =
162
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
163
0
        if (cost > config::recycle_task_threshold_seconds) {
164
0
            LOG_INFO("recycle task cost too much time cost={}s", cost)
165
0
                    .tag("instance_id", instance_id)
166
0
                    .tag("task", task_name)
167
0
                    .tag("num_scanned", num_scanned)
168
0
                    .tag("num_recycled", num_recycled);
169
0
        }
170
0
    }
171
27
    return;
172
27
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
173
174
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
175
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
176
177
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
178
4
                                                               "s3_producer_pool");
179
4
    s3_producer_pool->start();
180
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
181
4
                                                                  "recycle_tablet_pool");
182
4
    recycle_tablet_pool->start();
183
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
184
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
185
4
    group_recycle_function_pool->start();
186
4
    _thread_pool_group =
187
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
188
4
                                    std::move(group_recycle_function_pool));
189
190
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_);
191
4
}
192
193
4
Recycler::~Recycler() {
194
4
    if (!stopped()) {
195
0
        stop();
196
0
    }
197
4
}
198
199
4
void Recycler::instance_scanner_callback() {
200
    // sleep 60 seconds before scheduling for the launch procedure to complete:
201
    // some bad hdfs connection may cause some log to stdout stderr
202
    // which may pollute .out file and affect the script to check success
203
4
    std::this_thread::sleep_for(
204
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
205
8
    while (!stopped()) {
206
4
        std::vector<InstanceInfoPB> instances;
207
4
        get_all_instances(txn_kv_.get(), instances);
208
        // TODO(plat1ko): delete job recycle kv of non-existent instances
209
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
210
4
            std::stringstream ss;
211
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
212
4
            return ss.str();
213
4
        }();
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
209
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
210
4
            std::stringstream ss;
211
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
212
4
            return ss.str();
213
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
214
4
        if (!instances.empty()) {
215
            // enqueue instances
216
3
            std::lock_guard lock(mtx_);
217
30
            for (auto& instance : instances) {
218
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
219
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
220
                // skip instance already in pending queue
221
30
                if (success) {
222
30
                    pending_instance_queue_.push_back(std::move(instance));
223
30
                }
224
30
            }
225
3
            pending_instance_cond_.notify_all();
226
3
        }
227
4
        {
228
4
            std::unique_lock lock(mtx_);
229
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
230
7
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
230
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
231
4
        }
232
4
    }
233
4
}
234
235
8
void Recycler::recycle_callback() {
236
38
    while (!stopped()) {
237
38
        InstanceInfoPB instance;
238
38
        {
239
38
            std::unique_lock lock(mtx_);
240
38
            pending_instance_cond_.wait(
241
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
241
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
242
38
            if (stopped()) {
243
8
                return;
244
8
            }
245
30
            instance = std::move(pending_instance_queue_.front());
246
30
            pending_instance_queue_.pop_front();
247
30
            pending_instance_set_.erase(instance.instance_id());
248
30
        }
249
0
        auto& instance_id = instance.instance_id();
250
30
        {
251
30
            std::lock_guard lock(mtx_);
252
            // skip instance in recycling
253
30
            if (recycling_instance_map_.count(instance_id)) continue;
254
30
        }
255
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
256
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
257
258
30
        if (int r = instance_recycler->init(); r != 0) {
259
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
260
0
                         << " ret=" << r;
261
0
            continue;
262
0
        }
263
30
        std::string recycle_job_key;
264
30
        job_recycle_key({instance_id}, &recycle_job_key);
265
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
266
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
267
30
        if (ret != 0) { // Prepare failed
268
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
269
20
                         << " ret=" << ret;
270
20
            continue;
271
20
        } else {
272
10
            std::lock_guard lock(mtx_);
273
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
274
10
        }
275
10
        if (stopped()) return;
276
10
        LOG_INFO("begin to recycle instance").tag("instance_id", instance_id);
277
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
278
10
        ret = instance_recycler->do_recycle();
279
        // If instance recycler has been aborted, don't finish this job
280
10
        if (!instance_recycler->stopped()) {
281
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
282
10
                                        ret == 0, ctime_ms);
283
10
        }
284
10
        {
285
10
            std::lock_guard lock(mtx_);
286
10
            recycling_instance_map_.erase(instance_id);
287
10
        }
288
10
        auto elpased_ms =
289
10
                duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count() -
290
10
                ctime_ms;
291
10
        LOG_INFO("finish recycle instance")
292
10
                .tag("instance_id", instance_id)
293
10
                .tag("cost_ms", elpased_ms);
294
10
    }
295
8
}
296
297
4
void Recycler::lease_recycle_jobs() {
298
54
    while (!stopped()) {
299
50
        std::vector<std::string> instances;
300
50
        instances.reserve(recycling_instance_map_.size());
301
50
        {
302
50
            std::lock_guard lock(mtx_);
303
50
            for (auto& [id, _] : recycling_instance_map_) {
304
30
                instances.push_back(id);
305
30
            }
306
50
        }
307
50
        for (auto& i : instances) {
308
30
            std::string recycle_job_key;
309
30
            job_recycle_key({i}, &recycle_job_key);
310
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
311
30
            if (ret == 1) {
312
0
                std::lock_guard lock(mtx_);
313
0
                if (auto it = recycling_instance_map_.find(i);
314
0
                    it != recycling_instance_map_.end()) {
315
0
                    it->second->stop();
316
0
                }
317
0
            }
318
30
        }
319
50
        {
320
50
            std::unique_lock lock(mtx_);
321
50
            notifier_.wait_for(lock,
322
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
323
100
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
323
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
324
50
        }
325
50
    }
326
4
}
327
328
4
void Recycler::check_recycle_tasks() {
329
7
    while (!stopped()) {
330
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
331
3
        {
332
3
            std::lock_guard lock(mtx_);
333
3
            recycling_instance_map = recycling_instance_map_;
334
3
        }
335
3
        for (auto& entry : recycling_instance_map) {
336
0
            entry.second->check_recycle_tasks();
337
0
        }
338
339
3
        std::unique_lock lock(mtx_);
340
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
341
6
                           [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
341
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
342
3
    }
343
4
}
344
345
4
int Recycler::start(brpc::Server* server) {
346
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
347
348
4
    if (config::enable_checker) {
349
0
        checker_ = std::make_unique<Checker>(txn_kv_);
350
0
        int ret = checker_->start();
351
0
        std::string msg;
352
0
        if (ret != 0) {
353
0
            msg = "failed to start checker";
354
0
            LOG(ERROR) << msg;
355
0
            std::cerr << msg << std::endl;
356
0
            return ret;
357
0
        }
358
0
        msg = "checker started";
359
0
        LOG(INFO) << msg;
360
0
        std::cout << msg << std::endl;
361
0
    }
362
363
4
    if (server) {
364
        // Add service
365
1
        auto recycler_service =
366
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
367
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
368
1
    }
369
370
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
370
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
371
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
372
8
        workers_.emplace_back([this] { recycle_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
372
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
373
8
    }
374
375
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
376
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
377
4
    return 0;
378
4
}
379
380
4
void Recycler::stop() {
381
4
    stopped_ = true;
382
4
    notifier_.notify_all();
383
4
    pending_instance_cond_.notify_all();
384
4
    {
385
4
        std::lock_guard lock(mtx_);
386
4
        for (auto& [_, recycler] : recycling_instance_map_) {
387
0
            recycler->stop();
388
0
        }
389
4
    }
390
20
    for (auto& w : workers_) {
391
20
        if (w.joinable()) w.join();
392
20
    }
393
4
    if (checker_) {
394
0
        checker_->stop();
395
0
    }
396
4
}
397
398
class InstanceRecycler::InvertedIndexIdCache {
399
public:
400
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
401
68
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
402
403
    // Return 0 if success, 1 if schema kv not found, negative for error
404
3.55k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
405
3.55k
        {
406
3.55k
            std::lock_guard lock(mtx_);
407
3.55k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
408
644
                return 0;
409
644
            }
410
2.90k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
411
2.90k
                it != inverted_index_id_map_.end()) {
412
2.37k
                res = it->second;
413
2.37k
                return 0;
414
2.37k
            }
415
2.90k
        }
416
        // Get schema from kv
417
        // TODO(plat1ko): Single flight
418
532
        std::unique_ptr<Transaction> txn;
419
532
        TxnErrorCode err = txn_kv_->create_txn(&txn);
420
532
        if (err != TxnErrorCode::TXN_OK) {
421
0
            LOG(WARNING) << "failed to create txn, err=" << err;
422
0
            return -1;
423
0
        }
424
532
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
425
532
        ValueBuf val_buf;
426
532
        err = cloud::get(txn.get(), schema_key, &val_buf);
427
532
        if (err != TxnErrorCode::TXN_OK) {
428
500
            LOG(WARNING) << "failed to get schema, err=" << err;
429
500
            return static_cast<int>(err);
430
500
        }
431
32
        doris::TabletSchemaCloudPB schema;
432
32
        if (!parse_schema_value(val_buf, &schema)) {
433
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
434
0
            return -1;
435
0
        }
436
32
        if (schema.index_size() > 0) {
437
26
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
438
26
            if (schema.has_inverted_index_storage_format()) {
439
23
                index_format = schema.inverted_index_storage_format();
440
23
            }
441
26
            res.first = index_format;
442
26
            res.second.reserve(schema.index_size());
443
62
            for (auto& i : schema.index()) {
444
62
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
445
62
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
446
62
                }
447
62
            }
448
26
        }
449
32
        insert(index_id, schema_version, res);
450
32
        return 0;
451
32
    }
452
453
    // Empty `ids` means this schema has no inverted index
454
32
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
455
32
        if (index_info.second.empty()) {
456
6
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
457
6
            std::lock_guard lock(mtx_);
458
6
            schemas_without_inverted_index_.emplace(index_id, schema_version);
459
26
        } else {
460
26
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
461
26
            std::lock_guard lock(mtx_);
462
26
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
463
26
        }
464
32
    }
465
466
private:
467
    std::string instance_id_;
468
    std::shared_ptr<TxnKv> txn_kv_;
469
470
    std::mutex mtx_;
471
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
472
    struct HashOfKey {
473
6.49k
        size_t operator()(const Key& key) const {
474
6.49k
            size_t seed = 0;
475
6.49k
            seed = std::hash<int64_t> {}(key.first);
476
6.49k
            seed = std::hash<int32_t> {}(key.second);
477
6.49k
            return seed;
478
6.49k
        }
479
    };
480
    // <index_id, schema_version> -> inverted_index_ids
481
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
482
    // Store <index_id, schema_version> of schema which doesn't have inverted index
483
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
484
};
485
486
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
487
                                   RecyclerThreadPoolGroup thread_pool_group,
488
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
489
        : txn_kv_(std::move(txn_kv)),
490
          instance_id_(instance.instance_id()),
491
          instance_info_(instance),
492
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
493
          _thread_pool_group(std::move(thread_pool_group)),
494
68
          txn_lazy_committer_(std::move(txn_lazy_committer)) {};
495
496
68
InstanceRecycler::~InstanceRecycler() = default;
497
498
68
int InstanceRecycler::init_obj_store_accessors() {
499
68
    for (const auto& obj_info : instance_info_.obj_info()) {
500
54
#ifdef UNIT_TEST
501
54
        auto accessor = std::make_shared<MockAccessor>();
502
#else
503
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
504
        if (!s3_conf) {
505
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
506
            return -1;
507
        }
508
509
        std::shared_ptr<S3Accessor> accessor;
510
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
511
        if (ret != 0) {
512
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
513
                         << " resource_id=" << obj_info.id();
514
            return ret;
515
        }
516
#endif
517
54
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
518
54
    }
519
520
68
    return 0;
521
68
}
522
523
68
int InstanceRecycler::init_storage_vault_accessors() {
524
68
    if (instance_info_.resource_ids().empty()) {
525
61
        return 0;
526
61
    }
527
528
7
    FullRangeGetIteratorOptions opts(txn_kv_);
529
7
    opts.prefetch = true;
530
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
531
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
532
533
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
534
18
        auto [k, v] = *kv;
535
18
        StorageVaultPB vault;
536
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
537
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
538
0
            return -1;
539
0
        }
540
18
        std::string recycler_storage_vault_white_list = accumulate(
541
18
                config::recycler_storage_vault_white_list.begin(),
542
18
                config::recycler_storage_vault_white_list.end(), std::string(),
543
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
543
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
544
18
        LOG_INFO("config::recycler_storage_vault_white_list")
545
18
                .tag("", recycler_storage_vault_white_list);
546
18
        if (!config::recycler_storage_vault_white_list.empty()) {
547
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
548
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
549
8
                it == config::recycler_storage_vault_white_list.end()) {
550
2
                LOG_WARNING(
551
2
                        "failed to init accessor for vault because this vault is not in "
552
2
                        "config::recycler_storage_vault_white_list. ")
553
2
                        .tag(" vault name:", vault.name())
554
2
                        .tag(" config::recycler_storage_vault_white_list:",
555
2
                             recycler_storage_vault_white_list);
556
2
                continue;
557
2
            }
558
8
        }
559
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
560
16
                                 &accessor_map_, &vault);
561
16
        if (vault.has_hdfs_info()) {
562
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
563
9
            int ret = accessor->init();
564
9
            if (ret != 0) {
565
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
566
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
567
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
568
4
                continue;
569
4
            }
570
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
571
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
572
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
573
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
574
7
        } else if (vault.has_obj_info()) {
575
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
576
7
            if (!s3_conf) {
577
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
578
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
579
1
                continue;
580
1
            }
581
582
6
            std::shared_ptr<S3Accessor> accessor;
583
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
584
6
            if (ret != 0) {
585
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
586
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
587
0
                             << " ret=" << ret
588
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
589
0
                continue;
590
0
            }
591
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
592
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
593
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
594
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
595
6
        }
596
16
    }
597
598
7
    if (!it->is_valid()) {
599
0
        LOG_WARNING("failed to get storage vault kv");
600
0
        return -1;
601
0
    }
602
603
7
    if (accessor_map_.empty()) {
604
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
605
1
        return -2;
606
1
    }
607
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
608
6
             instance_id_);
609
610
6
    return 0;
611
7
}
612
613
68
int InstanceRecycler::init() {
614
68
    int ret = init_obj_store_accessors();
615
68
    if (ret != 0) {
616
0
        return ret;
617
0
    }
618
619
68
    return init_storage_vault_accessors();
620
68
}
621
622
template <typename... Func>
623
80
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
80
    return [funcs...]() {
625
80
        return [](std::initializer_list<int> ret_vals) {
626
80
            int i = 0;
627
100
            for (int ret : ret_vals) {
628
100
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
100
            }
632
80
            return i;
633
80
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
20
            for (int ret : ret_vals) {
628
20
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
20
            }
632
10
            return i;
633
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
20
            for (int ret : ret_vals) {
628
20
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
20
            }
632
10
            return i;
633
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
0
                    i = ret;
630
0
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
634
80
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
635
80
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
623
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
624
10
    return [funcs...]() {
625
10
        return [](std::initializer_list<int> ret_vals) {
626
10
            int i = 0;
627
10
            for (int ret : ret_vals) {
628
10
                if (ret != 0) {
629
10
                    i = ret;
630
10
                }
631
10
            }
632
10
            return i;
633
10
        }({funcs()...});
634
10
    };
635
10
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_1ZNS2_10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_5ZNS2_10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
636
637
10
int InstanceRecycler::do_recycle() {
638
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
639
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
640
0
        return recycle_deleted_instance();
641
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
642
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
643
10
                                        fmt::format("instance id {}", instance_id_),
644
80
                                        [](int r) { return r != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
Line
Count
Source
644
80
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEi
645
10
        sync_executor
646
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
647
                                   // becase they may both recycle the same set of tablets
648
                        // recycle dropped table or idexes(mv, rollup)
649
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
Line
Count
Source
649
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEv
650
                        // recycle dropped partitions
651
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
651
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
652
10
                .add(task_wrapper(
653
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
653
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
654
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
654
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
655
10
                .add(task_wrapper(
656
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
656
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
657
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
657
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
658
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
658
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
659
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
659
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
660
10
                .add(task_wrapper(
661
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
661
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
662
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
662
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
663
10
        bool finished = true;
664
10
        std::vector<int> rets = sync_executor.when_all(&finished);
665
80
        for (int ret : rets) {
666
80
            if (ret != 0) {
667
0
                return ret;
668
0
            }
669
80
        }
670
10
        return finished ? 0 : -1;
671
10
    } else {
672
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
673
0
                     << " instance_id=" << instance_id_;
674
0
        return -1;
675
0
    }
676
10
}
677
678
/**
679
 * 1. delete all remote data
680
 * 2. delete all kv
681
 * 3. remove instance kv
682
 */
683
1
int InstanceRecycler::recycle_deleted_instance() {
684
1
    LOG_INFO("begin to recycle deleted instance").tag("instance_id", instance_id_);
685
686
1
    int ret = 0;
687
1
    auto start_time = steady_clock::now();
688
689
1
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
690
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
691
1
        LOG(INFO) << (ret == 0 ? "successfully" : "failed to")
692
1
                  << " recycle deleted instance, cost=" << cost
693
1
                  << "s, instance_id=" << instance_id_;
694
1
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEPi
Line
Count
Source
689
1
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
690
1
        auto cost = duration<float>(steady_clock::now() - start_time).count();
691
1
        LOG(INFO) << (ret == 0 ? "successfully" : "failed to")
692
1
                  << " recycle deleted instance, cost=" << cost
693
1
                  << "s, instance_id=" << instance_id_;
694
1
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEPi
695
696
    // delete all remote data
697
2
    for (auto& [_, accessor] : accessor_map_) {
698
2
        if (stopped()) {
699
0
            return ret;
700
0
        }
701
702
2
        LOG(INFO) << "begin to delete all objects in " << accessor->uri();
703
2
        int del_ret = accessor->delete_all();
704
2
        if (del_ret == 0) {
705
2
            LOG(INFO) << "successfully delete all objects in " << accessor->uri();
706
2
        } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
707
            // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
708
            // so the recycling has been successful.
709
0
            ret = -1;
710
0
        }
711
2
    }
712
713
1
    if (ret != 0) {
714
0
        LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
715
0
        return ret;
716
0
    }
717
718
    // delete all kv
719
1
    std::unique_ptr<Transaction> txn;
720
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
721
1
    if (err != TxnErrorCode::TXN_OK) {
722
0
        LOG(WARNING) << "failed to create txn";
723
0
        ret = -1;
724
0
        return -1;
725
0
    }
726
1
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
727
    // delete kv before deleting objects to prevent the checker from misjudging data loss
728
1
    std::string start_txn_key = txn_key_prefix(instance_id_);
729
1
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
730
1
    txn->remove(start_txn_key, end_txn_key);
731
1
    std::string start_version_key = version_key_prefix(instance_id_);
732
1
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
733
1
    txn->remove(start_version_key, end_version_key);
734
1
    std::string start_meta_key = meta_key_prefix(instance_id_);
735
1
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
736
1
    txn->remove(start_meta_key, end_meta_key);
737
1
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
738
1
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
739
1
    txn->remove(start_recycle_key, end_recycle_key);
740
1
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
741
1
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
742
1
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
743
1
    std::string start_copy_key = copy_key_prefix(instance_id_);
744
1
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
745
1
    txn->remove(start_copy_key, end_copy_key);
746
    // should not remove job key range, because we need to reserve job recycle kv
747
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
748
1
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
749
1
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
750
1
    txn->remove(start_job_tablet_key, end_job_tablet_key);
751
1
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
752
1
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
753
1
    std::string start_vault_key = storage_vault_key(key_info0);
754
1
    std::string end_vault_key = storage_vault_key(key_info1);
755
1
    txn->remove(start_vault_key, end_vault_key);
756
1
    err = txn->commit();
757
1
    if (err != TxnErrorCode::TXN_OK) {
758
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
759
0
        ret = -1;
760
0
    }
761
762
1
    if (ret == 0) {
763
        // remove instance kv
764
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
765
1
        err = txn_kv_->create_txn(&txn);
766
1
        if (err != TxnErrorCode::TXN_OK) {
767
0
            LOG(WARNING) << "failed to create txn";
768
0
            ret = -1;
769
0
            return ret;
770
0
        }
771
1
        std::string key;
772
1
        instance_key({instance_id_}, &key);
773
1
        txn->remove(key);
774
1
        err = txn->commit();
775
1
        if (err != TxnErrorCode::TXN_OK) {
776
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
777
0
                         << " err=" << err;
778
0
            ret = -1;
779
0
        }
780
1
    }
781
1
    return ret;
782
1
}
783
784
12
int InstanceRecycler::recycle_indexes() {
785
12
    const std::string task_name = "recycle_indexes";
786
12
    int64_t num_scanned = 0;
787
12
    int64_t num_expired = 0;
788
12
    int64_t num_recycled = 0;
789
790
12
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
791
12
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
792
12
    std::string index_key0;
793
12
    std::string index_key1;
794
12
    recycle_index_key(index_key_info0, &index_key0);
795
12
    recycle_index_key(index_key_info1, &index_key1);
796
797
12
    LOG_INFO("begin to recycle indexes").tag("instance_id", instance_id_);
798
799
12
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
800
12
    register_recycle_task(task_name, start_time);
801
802
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
803
12
        unregister_recycle_task(task_name);
804
12
        int64_t cost =
805
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
806
12
        LOG_INFO("recycle indexes finished, cost={}s", cost)
807
12
                .tag("instance_id", instance_id_)
808
12
                .tag("num_scanned", num_scanned)
809
12
                .tag("num_expired", num_expired)
810
12
                .tag("num_recycled", num_recycled);
811
12
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEPi
Line
Count
Source
802
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
803
12
        unregister_recycle_task(task_name);
804
12
        int64_t cost =
805
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
806
12
        LOG_INFO("recycle indexes finished, cost={}s", cost)
807
12
                .tag("instance_id", instance_id_)
808
12
                .tag("num_scanned", num_scanned)
809
12
                .tag("num_expired", num_expired)
810
12
                .tag("num_recycled", num_recycled);
811
12
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEPi
812
813
12
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
814
815
12
    auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) {
816
6
        if (config::force_immediate_recycle) {
817
0
            return 0L;
818
0
        }
819
6
        int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time();
820
6
        int64_t retention_seconds = config::retention_seconds;
821
6
        if (index.state() == RecycleIndexPB::DROPPED) {
822
6
            retention_seconds =
823
6
                    std::min(config::dropped_index_retention_seconds, retention_seconds);
824
6
        }
825
6
        int64_t final_expiration = expiration + retention_seconds;
826
6
        if (earlest_ts > final_expiration) {
827
2
            earlest_ts = final_expiration;
828
2
            g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts);
829
2
        }
830
6
        return final_expiration;
831
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_3clERKNS0_14RecycleIndexPBE
Line
Count
Source
815
6
    auto calc_expiration = [&earlest_ts, this](const RecycleIndexPB& index) {
816
6
        if (config::force_immediate_recycle) {
817
0
            return 0L;
818
0
        }
819
6
        int64_t expiration = index.expiration() > 0 ? index.expiration() : index.creation_time();
820
6
        int64_t retention_seconds = config::retention_seconds;
821
6
        if (index.state() == RecycleIndexPB::DROPPED) {
822
6
            retention_seconds =
823
6
                    std::min(config::dropped_index_retention_seconds, retention_seconds);
824
6
        }
825
6
        int64_t final_expiration = expiration + retention_seconds;
826
6
        if (earlest_ts > final_expiration) {
827
2
            earlest_ts = final_expiration;
828
2
            g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, earlest_ts);
829
2
        }
830
6
        return final_expiration;
831
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_3clERKNS0_14RecycleIndexPBE
832
833
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
834
12
    std::vector<std::string_view> index_keys;
835
12
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
836
6
        ++num_scanned;
837
6
        RecycleIndexPB index_pb;
838
6
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
839
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
840
0
            return -1;
841
0
        }
842
6
        int64_t current_time = ::time(nullptr);
843
6
        if (current_time < calc_expiration(index_pb)) { // not expired
844
0
            return 0;
845
0
        }
846
6
        ++num_expired;
847
        // decode index_id
848
6
        auto k1 = k;
849
6
        k1.remove_prefix(1);
850
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
851
6
        decode_key(&k1, &out);
852
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
853
6
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
854
6
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
855
6
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
856
6
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
857
        // Change state to RECYCLING
858
6
        std::unique_ptr<Transaction> txn;
859
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
860
6
        if (err != TxnErrorCode::TXN_OK) {
861
0
            LOG_WARNING("failed to create txn").tag("err", err);
862
0
            return -1;
863
0
        }
864
6
        std::string val;
865
6
        err = txn->get(k, &val);
866
6
        if (err ==
867
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
868
0
            LOG_INFO("index {} has been recycled or committed", index_id);
869
0
            return 0;
870
0
        }
871
6
        if (err != TxnErrorCode::TXN_OK) {
872
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
873
0
            return -1;
874
0
        }
875
6
        index_pb.Clear();
876
6
        if (!index_pb.ParseFromString(val)) {
877
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
878
0
            return -1;
879
0
        }
880
6
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
881
6
            index_pb.set_state(RecycleIndexPB::RECYCLING);
882
6
            txn->put(k, index_pb.SerializeAsString());
883
6
            err = txn->commit();
884
6
            if (err != TxnErrorCode::TXN_OK) {
885
0
                LOG_WARNING("failed to commit txn").tag("err", err);
886
0
                return -1;
887
0
            }
888
6
        }
889
6
        if (recycle_tablets(index_pb.table_id(), index_id) != 0) {
890
0
            LOG_WARNING("failed to recycle tablets under index")
891
0
                    .tag("table_id", index_pb.table_id())
892
0
                    .tag("instance_id", instance_id_)
893
0
                    .tag("index_id", index_id);
894
0
            return -1;
895
0
        }
896
6
        ++num_recycled;
897
6
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
898
6
        index_keys.push_back(k);
899
6
        return 0;
900
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
835
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
836
6
        ++num_scanned;
837
6
        RecycleIndexPB index_pb;
838
6
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
839
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
840
0
            return -1;
841
0
        }
842
6
        int64_t current_time = ::time(nullptr);
843
6
        if (current_time < calc_expiration(index_pb)) { // not expired
844
0
            return 0;
845
0
        }
846
6
        ++num_expired;
847
        // decode index_id
848
6
        auto k1 = k;
849
6
        k1.remove_prefix(1);
850
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
851
6
        decode_key(&k1, &out);
852
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
853
6
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
854
6
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
855
6
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
856
6
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
857
        // Change state to RECYCLING
858
6
        std::unique_ptr<Transaction> txn;
859
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
860
6
        if (err != TxnErrorCode::TXN_OK) {
861
0
            LOG_WARNING("failed to create txn").tag("err", err);
862
0
            return -1;
863
0
        }
864
6
        std::string val;
865
6
        err = txn->get(k, &val);
866
6
        if (err ==
867
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
868
0
            LOG_INFO("index {} has been recycled or committed", index_id);
869
0
            return 0;
870
0
        }
871
6
        if (err != TxnErrorCode::TXN_OK) {
872
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
873
0
            return -1;
874
0
        }
875
6
        index_pb.Clear();
876
6
        if (!index_pb.ParseFromString(val)) {
877
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
878
0
            return -1;
879
0
        }
880
6
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
881
6
            index_pb.set_state(RecycleIndexPB::RECYCLING);
882
6
            txn->put(k, index_pb.SerializeAsString());
883
6
            err = txn->commit();
884
6
            if (err != TxnErrorCode::TXN_OK) {
885
0
                LOG_WARNING("failed to commit txn").tag("err", err);
886
0
                return -1;
887
0
            }
888
6
        }
889
6
        if (recycle_tablets(index_pb.table_id(), index_id) != 0) {
890
0
            LOG_WARNING("failed to recycle tablets under index")
891
0
                    .tag("table_id", index_pb.table_id())
892
0
                    .tag("instance_id", instance_id_)
893
0
                    .tag("index_id", index_id);
894
0
            return -1;
895
0
        }
896
6
        ++num_recycled;
897
6
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
898
6
        index_keys.push_back(k);
899
6
        return 0;
900
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
901
902
12
    auto loop_done = [&index_keys, this]() -> int {
903
2
        if (index_keys.empty()) return 0;
904
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
905
2
                                                              [&](int*) { index_keys.clear(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
905
2
                                                              [&](int*) { index_keys.clear(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEvENKUlPiE_clES3_
906
2
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
907
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
908
0
            return -1;
909
0
        }
910
2
        return 0;
911
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEv
Line
Count
Source
902
2
    auto loop_done = [&index_keys, this]() -> int {
903
2
        if (index_keys.empty()) return 0;
904
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
905
2
                                                              [&](int*) { index_keys.clear(); });
906
2
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
907
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
908
0
            return -1;
909
0
        }
910
2
        return 0;
911
2
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clEv
912
913
12
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
914
12
}
915
916
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
917
231
                             int64_t tablet_id) {
918
231
    std::unique_ptr<Transaction> txn;
919
231
    TxnErrorCode err = txn_kv->create_txn(&txn);
920
231
    if (err != TxnErrorCode::TXN_OK) {
921
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
922
0
                     << " tablet_id=" << tablet_id << " err=" << err;
923
0
        return false;
924
0
    }
925
926
231
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
927
231
    std::string tablet_idx_val;
928
231
    err = txn->get(tablet_idx_key, &tablet_idx_val);
929
231
    if (TxnErrorCode::TXN_OK != err) {
930
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
931
0
                     << " tablet_id=" << tablet_id << " err=" << err
932
0
                     << " key=" << hex(tablet_idx_key);
933
0
        return false;
934
0
    }
935
936
231
    TabletIndexPB tablet_idx_pb;
937
231
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
938
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
939
0
                     << " tablet_id=" << tablet_id;
940
0
        return false;
941
0
    }
942
943
231
    if (!tablet_idx_pb.has_db_id()) {
944
0
        return true;
945
0
    }
946
947
231
    std::string ver_val;
948
231
    std::string ver_key =
949
231
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
950
231
                                   tablet_idx_pb.partition_id()});
951
231
    err = txn->get(ver_key, &ver_val);
952
953
231
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
954
0
        return true;
955
0
    }
956
957
231
    if (TxnErrorCode::TXN_OK != err) {
958
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
959
0
                     << " db_id=" << tablet_idx_pb.db_id()
960
0
                     << " table_id=" << tablet_idx_pb.table_id()
961
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
962
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
963
0
        return false;
964
0
    }
965
966
231
    VersionPB version_pb;
967
231
    if (!version_pb.ParseFromString(ver_val)) {
968
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
969
0
                     << " db_id=" << tablet_idx_pb.db_id()
970
0
                     << " table_id=" << tablet_idx_pb.table_id()
971
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
972
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
973
0
        return false;
974
0
    }
975
976
231
    if (version_pb.pending_txn_ids_size() > 0) {
977
0
        DCHECK(version_pb.pending_txn_ids_size() == 1);
978
0
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
979
0
                     << " db_id=" << tablet_idx_pb.db_id()
980
0
                     << " table_id=" << tablet_idx_pb.table_id()
981
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
982
0
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
983
0
                     << " key=" << hex(ver_key);
984
0
        return false;
985
0
    }
986
231
    return true;
987
231
}
988
989
12
int InstanceRecycler::recycle_partitions() {
990
12
    const std::string task_name = "recycle_partitions";
991
12
    int64_t num_scanned = 0;
992
12
    int64_t num_expired = 0;
993
12
    int64_t num_recycled = 0;
994
995
12
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
996
12
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
997
12
    std::string part_key0;
998
12
    std::string part_key1;
999
12
    recycle_partition_key(part_key_info0, &part_key0);
1000
12
    recycle_partition_key(part_key_info1, &part_key1);
1001
1002
12
    LOG_INFO("begin to recycle partitions").tag("instance_id", instance_id_);
1003
1004
12
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1005
12
    register_recycle_task(task_name, start_time);
1006
1007
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1008
12
        unregister_recycle_task(task_name);
1009
12
        int64_t cost =
1010
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1011
12
        LOG_INFO("recycle partitions finished, cost={}s", cost)
1012
12
                .tag("instance_id", instance_id_)
1013
12
                .tag("num_scanned", num_scanned)
1014
12
                .tag("num_expired", num_expired)
1015
12
                .tag("num_recycled", num_recycled);
1016
12
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEPi
Line
Count
Source
1007
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1008
12
        unregister_recycle_task(task_name);
1009
12
        int64_t cost =
1010
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1011
12
        LOG_INFO("recycle partitions finished, cost={}s", cost)
1012
12
                .tag("instance_id", instance_id_)
1013
12
                .tag("num_scanned", num_scanned)
1014
12
                .tag("num_expired", num_expired)
1015
12
                .tag("num_recycled", num_recycled);
1016
12
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEPi
1017
1018
12
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1019
1020
12
    auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) {
1021
6
        if (config::force_immediate_recycle) {
1022
0
            return 0L;
1023
0
        }
1024
6
        int64_t expiration =
1025
6
                partition.expiration() > 0 ? partition.expiration() : partition.creation_time();
1026
6
        int64_t retention_seconds = config::retention_seconds;
1027
6
        if (partition.state() == RecyclePartitionPB::DROPPED) {
1028
6
            retention_seconds =
1029
6
                    std::min(config::dropped_partition_retention_seconds, retention_seconds);
1030
6
        }
1031
6
        int64_t final_expiration = expiration + retention_seconds;
1032
6
        if (earlest_ts > final_expiration) {
1033
2
            earlest_ts = final_expiration;
1034
2
            g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts);
1035
2
        }
1036
6
        return final_expiration;
1037
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_3clERKNS0_18RecyclePartitionPBE
Line
Count
Source
1020
6
    auto calc_expiration = [&earlest_ts, this](const RecyclePartitionPB& partition) {
1021
6
        if (config::force_immediate_recycle) {
1022
0
            return 0L;
1023
0
        }
1024
6
        int64_t expiration =
1025
6
                partition.expiration() > 0 ? partition.expiration() : partition.creation_time();
1026
6
        int64_t retention_seconds = config::retention_seconds;
1027
6
        if (partition.state() == RecyclePartitionPB::DROPPED) {
1028
6
            retention_seconds =
1029
6
                    std::min(config::dropped_partition_retention_seconds, retention_seconds);
1030
6
        }
1031
6
        int64_t final_expiration = expiration + retention_seconds;
1032
6
        if (earlest_ts > final_expiration) {
1033
2
            earlest_ts = final_expiration;
1034
2
            g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, earlest_ts);
1035
2
        }
1036
6
        return final_expiration;
1037
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_3clERKNS0_18RecyclePartitionPBE
1038
1039
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
1040
12
    std::vector<std::string_view> partition_keys;
1041
12
    std::vector<std::string> partition_version_keys;
1042
12
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1043
6
        ++num_scanned;
1044
6
        RecyclePartitionPB part_pb;
1045
6
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1046
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1047
0
            return -1;
1048
0
        }
1049
6
        int64_t current_time = ::time(nullptr);
1050
6
        if (current_time < calc_expiration(part_pb)) { // not expired
1051
0
            return 0;
1052
0
        }
1053
6
        ++num_expired;
1054
        // decode partition_id
1055
6
        auto k1 = k;
1056
6
        k1.remove_prefix(1);
1057
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1058
6
        decode_key(&k1, &out);
1059
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1060
6
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1061
6
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1062
6
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1063
6
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1064
        // Change state to RECYCLING
1065
6
        std::unique_ptr<Transaction> txn;
1066
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1067
6
        if (err != TxnErrorCode::TXN_OK) {
1068
0
            LOG_WARNING("failed to create txn").tag("err", err);
1069
0
            return -1;
1070
0
        }
1071
6
        std::string val;
1072
6
        err = txn->get(k, &val);
1073
6
        if (err ==
1074
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1075
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1076
0
            return 0;
1077
0
        }
1078
6
        if (err != TxnErrorCode::TXN_OK) {
1079
0
            LOG_WARNING("failed to get kv");
1080
0
            return -1;
1081
0
        }
1082
6
        part_pb.Clear();
1083
6
        if (!part_pb.ParseFromString(val)) {
1084
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1085
0
            return -1;
1086
0
        }
1087
        // Partitions with PREPARED state MUST have no data
1088
6
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1089
6
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1090
6
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1091
6
            txn->put(k, part_pb.SerializeAsString());
1092
6
            err = txn->commit();
1093
6
            if (err != TxnErrorCode::TXN_OK) {
1094
0
                LOG_WARNING("failed to commit txn: {}", err);
1095
0
                return -1;
1096
0
            }
1097
6
        }
1098
1099
6
        int ret = 0;
1100
30
        for (int64_t index_id : part_pb.index_id()) {
1101
30
            if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) {
1102
0
                LOG_WARNING("failed to recycle tablets under partition")
1103
0
                        .tag("table_id", part_pb.table_id())
1104
0
                        .tag("instance_id", instance_id_)
1105
0
                        .tag("index_id", index_id)
1106
0
                        .tag("partition_id", partition_id);
1107
0
                ret = -1;
1108
0
            }
1109
30
        }
1110
6
        if (ret == 0) {
1111
6
            ++num_recycled;
1112
6
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1113
6
            partition_keys.push_back(k);
1114
6
            if (part_pb.db_id() > 0) {
1115
6
                partition_version_keys.push_back(partition_version_key(
1116
6
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1117
6
            }
1118
6
        }
1119
6
        return ret;
1120
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1042
6
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1043
6
        ++num_scanned;
1044
6
        RecyclePartitionPB part_pb;
1045
6
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1046
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1047
0
            return -1;
1048
0
        }
1049
6
        int64_t current_time = ::time(nullptr);
1050
6
        if (current_time < calc_expiration(part_pb)) { // not expired
1051
0
            return 0;
1052
0
        }
1053
6
        ++num_expired;
1054
        // decode partition_id
1055
6
        auto k1 = k;
1056
6
        k1.remove_prefix(1);
1057
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1058
6
        decode_key(&k1, &out);
1059
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1060
6
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1061
6
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1062
6
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1063
6
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1064
        // Change state to RECYCLING
1065
6
        std::unique_ptr<Transaction> txn;
1066
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1067
6
        if (err != TxnErrorCode::TXN_OK) {
1068
0
            LOG_WARNING("failed to create txn").tag("err", err);
1069
0
            return -1;
1070
0
        }
1071
6
        std::string val;
1072
6
        err = txn->get(k, &val);
1073
6
        if (err ==
1074
6
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1075
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1076
0
            return 0;
1077
0
        }
1078
6
        if (err != TxnErrorCode::TXN_OK) {
1079
0
            LOG_WARNING("failed to get kv");
1080
0
            return -1;
1081
0
        }
1082
6
        part_pb.Clear();
1083
6
        if (!part_pb.ParseFromString(val)) {
1084
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1085
0
            return -1;
1086
0
        }
1087
        // Partitions with PREPARED state MUST have no data
1088
6
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
1089
6
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1090
6
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1091
6
            txn->put(k, part_pb.SerializeAsString());
1092
6
            err = txn->commit();
1093
6
            if (err != TxnErrorCode::TXN_OK) {
1094
0
                LOG_WARNING("failed to commit txn: {}", err);
1095
0
                return -1;
1096
0
            }
1097
6
        }
1098
1099
6
        int ret = 0;
1100
30
        for (int64_t index_id : part_pb.index_id()) {
1101
30
            if (recycle_tablets(part_pb.table_id(), index_id, partition_id, is_empty_tablet) != 0) {
1102
0
                LOG_WARNING("failed to recycle tablets under partition")
1103
0
                        .tag("table_id", part_pb.table_id())
1104
0
                        .tag("instance_id", instance_id_)
1105
0
                        .tag("index_id", index_id)
1106
0
                        .tag("partition_id", partition_id);
1107
0
                ret = -1;
1108
0
            }
1109
30
        }
1110
6
        if (ret == 0) {
1111
6
            ++num_recycled;
1112
6
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1113
6
            partition_keys.push_back(k);
1114
6
            if (part_pb.db_id() > 0) {
1115
6
                partition_version_keys.push_back(partition_version_key(
1116
6
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1117
6
            }
1118
6
        }
1119
6
        return ret;
1120
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1121
1122
12
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1123
2
        if (partition_keys.empty()) return 0;
1124
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1125
2
            partition_keys.clear();
1126
2
            partition_version_keys.clear();
1127
2
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
1124
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1125
2
            partition_keys.clear();
1126
2
            partition_version_keys.clear();
1127
2
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEvENKUlPiE_clES3_
1128
2
        std::unique_ptr<Transaction> txn;
1129
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1130
2
        if (err != TxnErrorCode::TXN_OK) {
1131
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1132
0
            return -1;
1133
0
        }
1134
6
        for (auto& k : partition_keys) {
1135
6
            txn->remove(k);
1136
6
        }
1137
6
        for (auto& k : partition_version_keys) {
1138
6
            txn->remove(k);
1139
6
        }
1140
2
        err = txn->commit();
1141
2
        if (err != TxnErrorCode::TXN_OK) {
1142
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1143
0
                         << " err=" << err;
1144
0
            return -1;
1145
0
        }
1146
2
        return 0;
1147
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEv
Line
Count
Source
1122
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1123
2
        if (partition_keys.empty()) return 0;
1124
2
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1125
2
            partition_keys.clear();
1126
2
            partition_version_keys.clear();
1127
2
        });
1128
2
        std::unique_ptr<Transaction> txn;
1129
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1130
2
        if (err != TxnErrorCode::TXN_OK) {
1131
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1132
0
            return -1;
1133
0
        }
1134
6
        for (auto& k : partition_keys) {
1135
6
            txn->remove(k);
1136
6
        }
1137
6
        for (auto& k : partition_version_keys) {
1138
6
            txn->remove(k);
1139
6
        }
1140
2
        err = txn->commit();
1141
2
        if (err != TxnErrorCode::TXN_OK) {
1142
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1143
0
                         << " err=" << err;
1144
0
            return -1;
1145
0
        }
1146
2
        return 0;
1147
2
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clEv
1148
1149
12
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
1150
12
}
1151
1152
12
int InstanceRecycler::recycle_versions() {
1153
12
    int64_t num_scanned = 0;
1154
12
    int64_t num_recycled = 0;
1155
1156
12
    LOG_INFO("begin to recycle table and partition versions").tag("instance_id", instance_id_);
1157
1158
12
    auto start_time = steady_clock::now();
1159
1160
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1161
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1162
12
        LOG_INFO("recycle table and partition versions finished, cost={}s", cost)
1163
12
                .tag("instance_id", instance_id_)
1164
12
                .tag("num_scanned", num_scanned)
1165
12
                .tag("num_recycled", num_recycled);
1166
12
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEPi
Line
Count
Source
1160
12
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1161
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1162
12
        LOG_INFO("recycle table and partition versions finished, cost={}s", cost)
1163
12
                .tag("instance_id", instance_id_)
1164
12
                .tag("num_scanned", num_scanned)
1165
12
                .tag("num_recycled", num_recycled);
1166
12
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEPi
1167
1168
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
1169
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
1170
12
    int64_t last_scanned_table_id = 0;
1171
12
    bool is_recycled = false; // Is last scanned kv recycled
1172
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled, this](
1173
12
                                std::string_view k, std::string_view) {
1174
2
        ++num_scanned;
1175
2
        auto k1 = k;
1176
2
        k1.remove_prefix(1);
1177
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1178
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1179
2
        decode_key(&k1, &out);
1180
2
        DCHECK_EQ(out.size(), 6) << k;
1181
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1182
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1183
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1184
0
            return 0;
1185
0
        }
1186
2
        last_scanned_table_id = table_id;
1187
2
        is_recycled = false;
1188
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1189
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1190
2
        std::unique_ptr<Transaction> txn;
1191
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1192
2
        if (err != TxnErrorCode::TXN_OK) {
1193
0
            return -1;
1194
0
        }
1195
2
        std::unique_ptr<RangeGetIterator> iter;
1196
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1197
2
        if (err != TxnErrorCode::TXN_OK) {
1198
0
            return -1;
1199
0
        }
1200
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1201
1
            return 0;
1202
1
        }
1203
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1204
        // 1. Remove all partition version kvs of this table
1205
1
        auto partition_version_key_begin =
1206
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1207
1
        auto partition_version_key_end =
1208
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1209
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1210
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1211
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1212
1
                     << " table_id=" << table_id;
1213
        // 2. Remove the table version kv of this table
1214
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1215
1
        txn->remove(tbl_version_key);
1216
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1217
        // 3. Remove mow delete bitmap update lock and tablet compaction lock
1218
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1219
1
        txn->remove(lock_key);
1220
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1221
1
        std::string tablet_compaction_key_begin =
1222
1
                mow_tablet_compaction_key({instance_id_, table_id, 0});
1223
1
        std::string tablet_compaction_key_end =
1224
1
                mow_tablet_compaction_key({instance_id_, table_id, INT64_MAX});
1225
1
        txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end);
1226
1
        LOG(WARNING) << "remove mow tablet compaction kv, begin="
1227
1
                     << hex(tablet_compaction_key_begin)
1228
1
                     << " end=" << hex(tablet_compaction_key_end) << " db_id=" << db_id
1229
1
                     << " table_id=" << table_id;
1230
1
        err = txn->commit();
1231
1
        if (err != TxnErrorCode::TXN_OK) {
1232
0
            return -1;
1233
0
        }
1234
1
        ++num_recycled;
1235
1
        is_recycled = true;
1236
1
        return 0;
1237
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1173
2
                                std::string_view k, std::string_view) {
1174
2
        ++num_scanned;
1175
2
        auto k1 = k;
1176
2
        k1.remove_prefix(1);
1177
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1178
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1179
2
        decode_key(&k1, &out);
1180
2
        DCHECK_EQ(out.size(), 6) << k;
1181
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1182
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1183
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1184
0
            return 0;
1185
0
        }
1186
2
        last_scanned_table_id = table_id;
1187
2
        is_recycled = false;
1188
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1189
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1190
2
        std::unique_ptr<Transaction> txn;
1191
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1192
2
        if (err != TxnErrorCode::TXN_OK) {
1193
0
            return -1;
1194
0
        }
1195
2
        std::unique_ptr<RangeGetIterator> iter;
1196
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1197
2
        if (err != TxnErrorCode::TXN_OK) {
1198
0
            return -1;
1199
0
        }
1200
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1201
1
            return 0;
1202
1
        }
1203
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1204
        // 1. Remove all partition version kvs of this table
1205
1
        auto partition_version_key_begin =
1206
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1207
1
        auto partition_version_key_end =
1208
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1209
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1210
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1211
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1212
1
                     << " table_id=" << table_id;
1213
        // 2. Remove the table version kv of this table
1214
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1215
1
        txn->remove(tbl_version_key);
1216
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1217
        // 3. Remove mow delete bitmap update lock and tablet compaction lock
1218
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1219
1
        txn->remove(lock_key);
1220
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1221
1
        std::string tablet_compaction_key_begin =
1222
1
                mow_tablet_compaction_key({instance_id_, table_id, 0});
1223
1
        std::string tablet_compaction_key_end =
1224
1
                mow_tablet_compaction_key({instance_id_, table_id, INT64_MAX});
1225
1
        txn->remove(tablet_compaction_key_begin, tablet_compaction_key_end);
1226
1
        LOG(WARNING) << "remove mow tablet compaction kv, begin="
1227
1
                     << hex(tablet_compaction_key_begin)
1228
1
                     << " end=" << hex(tablet_compaction_key_end) << " db_id=" << db_id
1229
1
                     << " table_id=" << table_id;
1230
1
        err = txn->commit();
1231
1
        if (err != TxnErrorCode::TXN_OK) {
1232
0
            return -1;
1233
0
        }
1234
1
        ++num_recycled;
1235
1
        is_recycled = true;
1236
1
        return 0;
1237
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1238
1239
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
1240
12
}
1241
1242
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, int64_t partition_id,
1243
37
                                      bool is_empty_tablet) {
1244
37
    int64_t num_scanned = 0;
1245
37
    std::atomic_long num_recycled = 0;
1246
1247
37
    std::string tablet_key_begin, tablet_key_end;
1248
37
    std::string stats_key_begin, stats_key_end;
1249
37
    std::string job_key_begin, job_key_end;
1250
1251
37
    std::string tablet_belongs;
1252
37
    if (partition_id > 0) {
1253
        // recycle tablets in a partition belonging to the index
1254
30
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
1255
30
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
1256
30
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
1257
30
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
1258
30
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
1259
30
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
1260
30
        tablet_belongs = "partition";
1261
30
    } else {
1262
        // recycle tablets in the index
1263
7
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
1264
7
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
1265
7
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
1266
7
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
1267
7
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
1268
7
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
1269
7
        tablet_belongs = "index";
1270
7
    }
1271
1272
37
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
1273
37
            .tag("table_id", table_id)
1274
37
            .tag("index_id", index_id)
1275
37
            .tag("partition_id", partition_id);
1276
1277
37
    auto start_time = steady_clock::now();
1278
1279
37
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1280
37
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1281
37
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1282
37
                .tag("instance_id", instance_id_)
1283
37
                .tag("table_id", table_id)
1284
37
                .tag("index_id", index_id)
1285
37
                .tag("partition_id", partition_id)
1286
37
                .tag("num_scanned", num_scanned)
1287
37
                .tag("num_recycled", num_recycled);
1288
37
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_1clEPi
Line
Count
Source
1279
37
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1280
37
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1281
37
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1282
37
                .tag("instance_id", instance_id_)
1283
37
                .tag("table_id", table_id)
1284
37
                .tag("index_id", index_id)
1285
37
                .tag("partition_id", partition_id)
1286
37
                .tag("num_scanned", num_scanned)
1287
37
                .tag("num_recycled", num_recycled);
1288
37
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_1clEPi
1289
1290
    // The first string_view represents the tablet key which has been recycled
1291
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
1292
37
    using TabletKeyPair = std::pair<std::string_view, bool>;
1293
37
    SyncExecutor<TabletKeyPair> sync_executor(
1294
37
            _thread_pool_group.recycle_tablet_pool,
1295
37
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
1296
37
                        index_id, partition_id),
1297
231
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1297
231
            [](const TabletKeyPair& k) { return k.first.empty(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
1298
1299
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
1300
37
    std::vector<std::string> tablet_idx_keys;
1301
37
    std::vector<std::string> init_rs_keys;
1302
231
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1303
231
        bool use_range_remove = true;
1304
231
        ++num_scanned;
1305
231
        doris::TabletMetaCloudPB tablet_meta_pb;
1306
231
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1307
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1308
0
            use_range_remove = false;
1309
0
            return -1;
1310
0
        }
1311
231
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1312
1313
231
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1314
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
1315
0
            return -1;
1316
0
        }
1317
1318
231
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1319
231
        if (!is_empty_tablet) {
1320
231
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1321
231
                               k]() mutable -> TabletKeyPair {
1322
231
                if (recycle_tablet(tid) != 0) {
1323
0
                    LOG_WARNING("failed to recycle tablet")
1324
0
                            .tag("instance_id", instance_id_)
1325
0
                            .tag("tablet_id", tid);
1326
0
                    range_move = false;
1327
0
                    return {std::string_view(), range_move};
1328
0
                }
1329
231
                ++num_recycled;
1330
231
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1331
231
                return {k, range_move};
1332
231
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE_clEv
Line
Count
Source
1321
231
                               k]() mutable -> TabletKeyPair {
1322
231
                if (recycle_tablet(tid) != 0) {
1323
0
                    LOG_WARNING("failed to recycle tablet")
1324
0
                            .tag("instance_id", instance_id_)
1325
0
                            .tag("tablet_id", tid);
1326
0
                    range_move = false;
1327
0
                    return {std::string_view(), range_move};
1328
0
                }
1329
231
                ++num_recycled;
1330
231
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1331
231
                return {k, range_move};
1332
231
            });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE_clEv
1333
231
        } else {
1334
            // Empty tablet only has a [0-1] init rowset
1335
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1336
0
            DCHECK([&]() {
1337
0
                std::unique_ptr<Transaction> txn;
1338
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1339
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1340
0
                    return false;
1341
0
                }
1342
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1343
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1344
0
                std::unique_ptr<RangeGetIterator> iter;
1345
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1346
0
                    err != TxnErrorCode::TXN_OK) {
1347
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1348
0
                    return false;
1349
0
                }
1350
0
                if (iter->has_next()) {
1351
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1352
0
                    return false;
1353
0
                }
1354
0
                return true;
1355
0
            }());
1356
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1357
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1358
0
                return {k, true};
1359
0
            });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE1_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_ENUlvE1_clEv
1360
0
            ++num_recycled;
1361
0
        }
1362
231
        return 0;
1363
231
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1302
231
    auto recycle_func = [&, is_empty_tablet, this](std::string_view k, std::string_view v) -> int {
1303
231
        bool use_range_remove = true;
1304
231
        ++num_scanned;
1305
231
        doris::TabletMetaCloudPB tablet_meta_pb;
1306
231
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1307
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1308
0
            use_range_remove = false;
1309
0
            return -1;
1310
0
        }
1311
231
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1312
1313
231
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1314
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
1315
0
            return -1;
1316
0
        }
1317
1318
231
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1319
231
        if (!is_empty_tablet) {
1320
231
            sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1321
231
                               k]() mutable -> TabletKeyPair {
1322
231
                if (recycle_tablet(tid) != 0) {
1323
231
                    LOG_WARNING("failed to recycle tablet")
1324
231
                            .tag("instance_id", instance_id_)
1325
231
                            .tag("tablet_id", tid);
1326
231
                    range_move = false;
1327
231
                    return {std::string_view(), range_move};
1328
231
                }
1329
231
                ++num_recycled;
1330
231
                LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1331
231
                return {k, range_move};
1332
231
            });
1333
231
        } else {
1334
            // Empty tablet only has a [0-1] init rowset
1335
0
            init_rs_keys.push_back(meta_rowset_key({instance_id_, tablet_id, 1}));
1336
0
            DCHECK([&]() {
1337
0
                std::unique_ptr<Transaction> txn;
1338
0
                if (TxnErrorCode err = txn_kv_->create_txn(&txn); err != TxnErrorCode::TXN_OK) {
1339
0
                    LOG_ERROR("failed to create txn").tag("err", err);
1340
0
                    return false;
1341
0
                }
1342
0
                auto rs_key_begin = meta_rowset_key({instance_id_, tablet_id, 2});
1343
0
                auto rs_key_end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
1344
0
                std::unique_ptr<RangeGetIterator> iter;
1345
0
                if (TxnErrorCode err = txn->get(rs_key_begin, rs_key_end, &iter, true, 1);
1346
0
                    err != TxnErrorCode::TXN_OK) {
1347
0
                    LOG_ERROR("failed to get kv").tag("err", err);
1348
0
                    return false;
1349
0
                }
1350
0
                if (iter->has_next()) {
1351
0
                    LOG_ERROR("tablet is not empty").tag("tablet_id", tablet_id);
1352
0
                    return false;
1353
0
                }
1354
0
                return true;
1355
0
            }());
1356
0
            sync_executor.add([k]() mutable -> TabletKeyPair {
1357
0
                LOG_INFO("k is {}, is empty {}", k, k.empty());
1358
0
                return {k, true};
1359
0
            });
1360
0
            ++num_recycled;
1361
0
        }
1362
231
        return 0;
1363
231
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
1364
1365
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
1366
37
    auto loop_done = [&, this]() -> int {
1367
37
        bool finished = true;
1368
37
        auto tablet_keys = sync_executor.when_all(&finished);
1369
37
        if (!finished) {
1370
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1371
0
            return -1;
1372
0
        }
1373
37
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1374
        // sort the vector using key's order
1375
37
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1376
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESG_EEDaS5_S8_
Line
Count
Source
1376
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESG_EEDaS5_S8_
1377
37
        bool use_range_remove = true;
1378
231
        for (auto& [_, remove] : tablet_keys) {
1379
231
            if (!remove) {
1380
0
                use_range_remove = remove;
1381
0
                break;
1382
0
            }
1383
231
        }
1384
37
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1385
37
            tablet_idx_keys.clear();
1386
37
            init_rs_keys.clear();
1387
37
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlPiE_clES3_
Line
Count
Source
1384
37
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1385
37
            tablet_idx_keys.clear();
1386
37
            init_rs_keys.clear();
1387
37
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEvENKUlPiE_clES3_
1388
37
        std::unique_ptr<Transaction> txn;
1389
37
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1390
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1391
0
            return -1;
1392
0
        }
1393
37
        std::string tablet_key_end;
1394
37
        if (!tablet_keys.empty()) {
1395
37
            if (use_range_remove) {
1396
37
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1397
37
                txn->remove(tablet_keys.front().first, tablet_key_end);
1398
37
            } else {
1399
0
                for (auto& [k, _] : tablet_keys) {
1400
0
                    txn->remove(k);
1401
0
                }
1402
0
            }
1403
37
        }
1404
231
        for (auto& k : tablet_idx_keys) {
1405
231
            txn->remove(k);
1406
231
        }
1407
37
        for (auto& k : init_rs_keys) {
1408
0
            txn->remove(k);
1409
0
        }
1410
37
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1411
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1412
0
                         << ", err=" << err;
1413
0
            return -1;
1414
0
        }
1415
37
        return 0;
1416
37
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEv
Line
Count
Source
1366
37
    auto loop_done = [&, this]() -> int {
1367
37
        bool finished = true;
1368
37
        auto tablet_keys = sync_executor.when_all(&finished);
1369
37
        if (!finished) {
1370
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1371
0
            return -1;
1372
0
        }
1373
37
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1374
        // sort the vector using key's order
1375
37
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1376
37
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1377
37
        bool use_range_remove = true;
1378
231
        for (auto& [_, remove] : tablet_keys) {
1379
231
            if (!remove) {
1380
0
                use_range_remove = remove;
1381
0
                break;
1382
0
            }
1383
231
        }
1384
37
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
1385
37
            tablet_idx_keys.clear();
1386
37
            init_rs_keys.clear();
1387
37
        });
1388
37
        std::unique_ptr<Transaction> txn;
1389
37
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1390
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1391
0
            return -1;
1392
0
        }
1393
37
        std::string tablet_key_end;
1394
37
        if (!tablet_keys.empty()) {
1395
37
            if (use_range_remove) {
1396
37
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1397
37
                txn->remove(tablet_keys.front().first, tablet_key_end);
1398
37
            } else {
1399
0
                for (auto& [k, _] : tablet_keys) {
1400
0
                    txn->remove(k);
1401
0
                }
1402
0
            }
1403
37
        }
1404
231
        for (auto& k : tablet_idx_keys) {
1405
231
            txn->remove(k);
1406
231
        }
1407
37
        for (auto& k : init_rs_keys) {
1408
0
            txn->remove(k);
1409
0
        }
1410
37
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1411
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1412
0
                         << ", err=" << err;
1413
0
            return -1;
1414
0
        }
1415
37
        return 0;
1416
37
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsElllbENK3$_0clEv
1417
1418
37
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
1419
37
                               std::move(loop_done));
1420
37
    if (ret != 0) {
1421
0
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
1422
0
        return ret;
1423
0
    }
1424
1425
    // directly remove tablet stats and tablet jobs of these dropped index or partition
1426
37
    std::unique_ptr<Transaction> txn;
1427
37
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1428
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
1429
0
        return -1;
1430
0
    }
1431
37
    txn->remove(stats_key_begin, stats_key_end);
1432
37
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
1433
37
                 << " end=" << hex(stats_key_end);
1434
37
    txn->remove(job_key_begin, job_key_end);
1435
37
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
1436
37
    std::string schema_key_begin, schema_key_end;
1437
37
    std::string schema_dict_key;
1438
37
    if (partition_id <= 0) {
1439
        // Delete schema kv of this index
1440
7
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
1441
7
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
1442
7
        txn->remove(schema_key_begin, schema_key_end);
1443
7
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
1444
7
                     << " end=" << hex(schema_key_end);
1445
7
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
1446
7
        txn->remove(schema_dict_key);
1447
7
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
1448
7
    }
1449
1450
37
    TxnErrorCode err = txn->commit();
1451
37
    if (err != TxnErrorCode::TXN_OK) {
1452
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
1453
0
                     << " err=" << err;
1454
0
        return -1;
1455
0
    }
1456
1457
37
    return ret;
1458
37
}
1459
1460
4.00k
int InstanceRecycler::delete_rowset_data(const doris::RowsetMetaCloudPB& rs_meta_pb) {
1461
4.00k
    int64_t num_segments = rs_meta_pb.num_segments();
1462
4.00k
    if (num_segments <= 0) return 0;
1463
4.00k
    if (!rs_meta_pb.has_tablet_schema()) {
1464
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
1465
0
                                  rs_meta_pb.rowset_id_v2());
1466
0
    }
1467
4.00k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
1468
4.00k
    if (it == accessor_map_.end()) {
1469
0
        LOG_WARNING("instance has no such resource id")
1470
0
                .tag("instance_id", instance_id_)
1471
0
                .tag("resource_id", rs_meta_pb.resource_id());
1472
0
        return -1;
1473
0
    }
1474
4.00k
    auto& accessor = it->second;
1475
4.00k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
1476
4.00k
    int64_t tablet_id = rs_meta_pb.tablet_id();
1477
    // process inverted indexes
1478
4.00k
    std::vector<std::pair<int64_t, std::string>> index_ids;
1479
4.00k
    index_ids.reserve(rs_meta_pb.tablet_schema().index_size());
1480
8.00k
    for (auto& i : rs_meta_pb.tablet_schema().index()) {
1481
8.00k
        if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
1482
8.00k
            index_ids.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
1483
8.00k
        }
1484
8.00k
    }
1485
4.00k
    std::vector<std::string> file_paths;
1486
4.00k
    auto tablet_schema = rs_meta_pb.tablet_schema();
1487
4.00k
    auto index_storage_format = InvertedIndexStorageFormatPB::V1;
1488
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
1489
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1490
20.0k
        if (tablet_schema.has_inverted_index_storage_format()) {
1491
10.0k
            index_storage_format = tablet_schema.inverted_index_storage_format();
1492
10.0k
        }
1493
20.0k
        if (index_storage_format == InvertedIndexStorageFormatPB::V1) {
1494
40.0k
            for (const auto& index_id : index_ids) {
1495
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
1496
40.0k
                                                            index_id.second));
1497
40.0k
            }
1498
20.0k
        } else if (!index_ids.empty()) {
1499
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1500
0
        }
1501
20.0k
    }
1502
    // TODO(AlexYue): seems could do do batch
1503
4.00k
    return accessor->delete_files(file_paths);
1504
4.00k
}
1505
1506
int InstanceRecycler::delete_rowset_data(const std::vector<doris::RowsetMetaCloudPB>& rowsets,
1507
29
                                         RowsetRecyclingState type) {
1508
29
    int ret = 0;
1509
    // resource_id -> file_paths
1510
29
    std::map<std::string, std::vector<std::string>> resource_file_paths;
1511
    // (resource_id, tablet_id, rowset_id)
1512
29
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
1513
1514
6.14k
    for (const auto& rs : rowsets) {
1515
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
1516
        // due to aborted schema change.
1517
6.14k
        if (type == RowsetRecyclingState::FORMAL_ROWSET) {
1518
3.12k
            std::lock_guard lock(recycled_tablets_mtx_);
1519
3.12k
            if (recycled_tablets_.count(rs.tablet_id())) {
1520
0
                continue; // Rowset data has already been deleted
1521
0
            }
1522
3.12k
        }
1523
1524
6.14k
        auto it = accessor_map_.find(rs.resource_id());
1525
        // possible if the accessor is not initilized correctly
1526
6.14k
        if (it == accessor_map_.end()) [[unlikely]] {
1527
1
            LOG_WARNING("instance has no such resource id")
1528
1
                    .tag("instance_id", instance_id_)
1529
1
                    .tag("resource_id", rs.resource_id());
1530
1
            ret = -1;
1531
1
            continue;
1532
1
        }
1533
1534
6.14k
        auto& file_paths = resource_file_paths[rs.resource_id()];
1535
6.14k
        const auto& rowset_id = rs.rowset_id_v2();
1536
6.14k
        int64_t tablet_id = rs.tablet_id();
1537
6.14k
        int64_t num_segments = rs.num_segments();
1538
6.14k
        if (num_segments <= 0) continue;
1539
1540
        // Process inverted indexes
1541
6.14k
        std::vector<std::pair<int64_t, std::string>> index_ids;
1542
        // default format as v1.
1543
6.14k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1544
6.14k
        int inverted_index_get_ret = 0;
1545
6.14k
        if (rs.has_tablet_schema()) {
1546
5.54k
            for (const auto& index : rs.tablet_schema().index()) {
1547
5.54k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
1548
5.54k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
1549
5.54k
                }
1550
5.54k
            }
1551
2.59k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
1552
2.56k
                index_format = rs.tablet_schema().inverted_index_storage_format();
1553
2.56k
            }
1554
3.55k
        } else {
1555
3.55k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
1556
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
1557
0
                                "instance_id="
1558
0
                             << instance_id_ << " tablet_id=" << tablet_id
1559
0
                             << " rowset_id=" << rowset_id;
1560
0
                ret = -1;
1561
0
                continue;
1562
0
            }
1563
3.55k
            InvertedIndexInfo index_info;
1564
3.55k
            inverted_index_get_ret =
1565
3.55k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
1566
3.55k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
1567
3.55k
                                     &inverted_index_get_ret);
1568
3.55k
            if (inverted_index_get_ret == 0) {
1569
3.05k
                index_format = index_info.first;
1570
3.05k
                index_ids = index_info.second;
1571
3.05k
            } else if (inverted_index_get_ret == 1) {
1572
                // 1. Schema kv not found means tablet has been recycled
1573
                // Maybe some tablet recycle failed by some bugs
1574
                // We need to delete again to double check
1575
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
1576
                // because we are uncertain about the inverted index information.
1577
                // If there are inverted indexes, some data might not be deleted,
1578
                // but this is acceptable as we have made our best effort to delete the data.
1579
503
                LOG_INFO(
1580
503
                        "delete rowset data schema kv not found, need to delete again to double "
1581
503
                        "check")
1582
503
                        .tag("instance_id", instance_id_)
1583
503
                        .tag("tablet_id", tablet_id)
1584
503
                        .tag("rowset", rs.ShortDebugString());
1585
                // Currently index_ids is guaranteed to be empty,
1586
                // but we clear it again here as a safeguard against future code changes
1587
                // that might cause index_ids to no longer be empty
1588
503
                index_format = InvertedIndexStorageFormatPB::V2;
1589
503
                index_ids.clear();
1590
503
            } else {
1591
0
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
1592
0
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
1593
0
                ret = -1;
1594
0
                continue;
1595
0
            }
1596
3.55k
        }
1597
6.14k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
1598
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
1599
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
1600
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
1601
5
            continue;
1602
5
        }
1603
36.8k
        for (int64_t i = 0; i < num_segments; ++i) {
1604
30.6k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
1605
30.6k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
1606
59.2k
                for (const auto& index_id : index_ids) {
1607
59.2k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
1608
59.2k
                                                                index_id.first, index_id.second));
1609
59.2k
                }
1610
28.1k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
1611
                // try to recycle inverted index v2 when get_ret == 1
1612
                // we treat schema not found as if it has a v2 format inverted index
1613
                // to reduce chance of data leakage
1614
2.50k
                if (inverted_index_get_ret == 1) {
1615
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
1616
2.50k
                            .tag("instance_id", instance_id_)
1617
2.50k
                            .tag("inverted index v2 path",
1618
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
1619
2.50k
                }
1620
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
1621
2.50k
            }
1622
30.6k
        }
1623
6.13k
    }
1624
1625
29
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
1626
29
                                                 "delete_rowset_data",
1627
34
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_0clERKi
Line
Count
Source
1627
34
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_0clERKi
1628
29
    for (auto& [resource_id, file_paths] : resource_file_paths) {
1629
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1630
29
            DCHECK(accessor_map_.count(*rid))
1631
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1632
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1633
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1634
29
                                     &accessor_map_);
1635
29
            if (!accessor_map_.contains(*rid)) {
1636
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1637
0
                        .tag("resource_id", resource_id)
1638
0
                        .tag("instance_id", instance_id_);
1639
0
                return -1;
1640
0
            }
1641
29
            auto& accessor = accessor_map_[*rid];
1642
29
            return accessor->delete_files(*paths);
1643
29
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_1clEv
Line
Count
Source
1629
29
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
1630
29
            DCHECK(accessor_map_.count(*rid))
1631
0
                    << "uninitilized accessor, instance_id=" << instance_id_
1632
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
1633
29
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
1634
29
                                     &accessor_map_);
1635
29
            if (!accessor_map_.contains(*rid)) {
1636
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
1637
0
                        .tag("resource_id", resource_id)
1638
0
                        .tag("instance_id", instance_id_);
1639
0
                return -1;
1640
0
            }
1641
29
            auto& accessor = accessor_map_[*rid];
1642
29
            return accessor->delete_files(*paths);
1643
29
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_1clEv
1644
29
    }
1645
29
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
1646
5
        LOG_INFO(
1647
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
1648
5
                "resource_id={}, tablet_id={}, instance_id={}",
1649
5
                rowset_id, resource_id, tablet_id, instance_id_);
1650
5
        concurrent_delete_executor.add(
1651
5
                [&]() -> int { return delete_rowset_data(resource_id, tablet_id, rowset_id); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_2clEv
Line
Count
Source
1651
5
                [&]() -> int { return delete_rowset_data(resource_id, tablet_id, rowset_id); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt6vectorINS_17RowsetMetaCloudPBESaIS3_EENS0_20RowsetRecyclingStateEENK3$_2clEv
1652
5
    }
1653
29
    bool finished = true;
1654
29
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
1655
34
    for (int r : rets) {
1656
34
        if (r != 0) {
1657
0
            ret = -1;
1658
0
            break;
1659
0
        }
1660
34
    }
1661
29
    ret = finished ? ret : -1;
1662
29
    return ret;
1663
29
}
1664
1665
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
1666
2.90k
                                         const std::string& rowset_id) {
1667
2.90k
    auto it = accessor_map_.find(resource_id);
1668
2.90k
    if (it == accessor_map_.end()) {
1669
0
        LOG_WARNING("instance has no such resource id")
1670
0
                .tag("instance_id", instance_id_)
1671
0
                .tag("resource_id", resource_id)
1672
0
                .tag("tablet_id", tablet_id)
1673
0
                .tag("rowset_id", rowset_id);
1674
0
        return -1;
1675
0
    }
1676
2.90k
    auto& accessor = it->second;
1677
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
1678
2.90k
}
1679
1680
234
int InstanceRecycler::recycle_tablet(int64_t tablet_id) {
1681
234
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
1682
234
            .tag("instance_id", instance_id_)
1683
234
            .tag("tablet_id", tablet_id);
1684
1685
234
    int ret = 0;
1686
234
    auto start_time = steady_clock::now();
1687
1688
    // collect resource ids
1689
234
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
1690
234
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
1691
234
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
1692
234
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
1693
1694
234
    std::set<std::string> resource_ids;
1695
234
    int64_t recycle_rowsets_number = 0;
1696
234
    int64_t recycle_segments_number = 0;
1697
234
    int64_t recycle_rowsets_data_size = 0;
1698
234
    int64_t recycle_rowsets_index_size = 0;
1699
234
    int64_t max_rowset_version = 0;
1700
234
    int64_t min_rowset_creation_time = INT64_MAX;
1701
234
    int64_t max_rowset_creation_time = 0;
1702
234
    int64_t min_rowset_expiration_time = INT64_MAX;
1703
234
    int64_t max_rowset_expiration_time = 0;
1704
1705
234
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1706
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1707
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
1708
234
                .tag("instance_id", instance_id_)
1709
234
                .tag("tablet_id", tablet_id)
1710
234
                .tag("recycle rowsets number", recycle_rowsets_number)
1711
234
                .tag("recycle segments number", recycle_segments_number)
1712
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
1713
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
1714
234
                .tag("max rowset version", max_rowset_version)
1715
234
                .tag("min rowset creation time", min_rowset_creation_time)
1716
234
                .tag("max rowset creation time", max_rowset_creation_time)
1717
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
1718
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
1719
234
                .tag("ret", ret);
1720
234
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_1clEPi
Line
Count
Source
1705
234
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1706
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1707
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
1708
234
                .tag("instance_id", instance_id_)
1709
234
                .tag("tablet_id", tablet_id)
1710
234
                .tag("recycle rowsets number", recycle_rowsets_number)
1711
234
                .tag("recycle segments number", recycle_segments_number)
1712
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
1713
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
1714
234
                .tag("max rowset version", max_rowset_version)
1715
234
                .tag("min rowset creation time", min_rowset_creation_time)
1716
234
                .tag("max rowset creation time", max_rowset_creation_time)
1717
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
1718
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
1719
234
                .tag("ret", ret);
1720
234
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_1clEPi
1721
1722
234
    std::unique_ptr<Transaction> txn;
1723
234
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1724
0
        LOG_WARNING("failed to recycle tablet ")
1725
0
                .tag("tablet id", tablet_id)
1726
0
                .tag("instance_id", instance_id_)
1727
0
                .tag("reason", "failed to create txn");
1728
0
        ret = -1;
1729
0
    }
1730
234
    GetRowsetResponse resp;
1731
234
    std::string msg;
1732
234
    MetaServiceCode code = MetaServiceCode::OK;
1733
    // get rowsets in tablet
1734
234
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
1735
234
                        tablet_id, code, msg, &resp);
1736
234
    if (code != MetaServiceCode::OK) {
1737
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
1738
0
                .tag("tablet id", tablet_id)
1739
0
                .tag("msg", msg)
1740
0
                .tag("code", code)
1741
0
                .tag("instance id", instance_id_);
1742
0
        ret = -1;
1743
0
    }
1744
234
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
1745
1746
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
1747
        /*
1748
        * For compatibility, we skip the loop for [0-1] here. 
1749
        * The purpose of this loop is to delete object files,
1750
        * and since [0-1] only has meta and doesn't have object files, 
1751
        * skipping it doesn't affect system correctness. 
1752
        *
1753
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below 
1754
        * would return error -1 directly, causing the recycle operation to fail.
1755
        *
1756
        * [0-1] doesn't have resource id is a bug.
1757
        * In the future, we will fix this problem, after that,
1758
        * we can remove this if statement.
1759
        *
1760
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
1761
        */
1762
1763
2.50k
        if (rs_meta.end_version() == 1) {
1764
            // Assert that [0-1] has no resource_id to make sure
1765
            // this if statement will not be forgetted to remove
1766
            // when the resource id bug is fixed
1767
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
1768
0
            recycle_rowsets_number += 1;
1769
0
            continue;
1770
0
        }
1771
2.50k
        if (!rs_meta.has_resource_id()) {
1772
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
1773
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
1774
1
                    .tag("instance_id", instance_id_)
1775
1
                    .tag("tablet_id", tablet_id);
1776
1
            return -1;
1777
1
        }
1778
18.4E
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
1779
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
1780
        // possible if the accessor is not initilized correctly
1781
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
1782
1
            LOG_WARNING(
1783
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
1784
1
                    "recycle process")
1785
1
                    .tag("tablet id", tablet_id)
1786
1
                    .tag("instance_id", instance_id_)
1787
1
                    .tag("resource_id", rs_meta.resource_id())
1788
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
1789
1
            return -1;
1790
1
        }
1791
2.50k
        recycle_rowsets_number += 1;
1792
2.50k
        recycle_segments_number += rs_meta.num_segments();
1793
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
1794
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
1795
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
1796
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
1797
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
1798
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
1799
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
1800
2.50k
        resource_ids.emplace(rs_meta.resource_id());
1801
2.50k
    }
1802
1803
232
    LOG_INFO("recycle tablet start to delete object")
1804
232
            .tag("instance id", instance_id_)
1805
232
            .tag("tablet id", tablet_id)
1806
232
            .tag("recycle tablet resource ids are",
1807
232
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
1808
232
                                 [](const std::string& a, const std::string& b) {
1809
203
                                     return a.empty() ? b : a + "," + b;
1810
203
                                 }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_0clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESA_
Line
Count
Source
1808
203
                                 [](const std::string& a, const std::string& b) {
1809
203
                                     return a.empty() ? b : a + "," + b;
1810
203
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_0clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESA_
1811
1812
232
    SyncExecutor<int> concurrent_delete_executor(
1813
232
            _thread_pool_group.s3_producer_pool,
1814
232
            fmt::format("delete tablet {} s3 rowset", tablet_id),
1815
232
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_2clERKi
Line
Count
Source
1815
203
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_2clERKi
1816
1817
    // delete all rowset data in this tablet
1818
    // ATTN: there may be data leak if not all accessor initilized successfully
1819
    //       partial data deleted if the tablet is stored cross-storage vault
1820
    //       vault id is not attached to TabletMeta...
1821
232
    for (const auto& resource_id : resource_ids) {
1822
203
        concurrent_delete_executor.add([&, accessor_ptr = accessor_map_[resource_id]]() {
1823
203
            if (accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)) != 0) {
1824
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
1825
1
                             << " path=" << accessor_ptr->uri();
1826
1
                return -1;
1827
1
            }
1828
202
            return 0;
1829
203
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_3clEv
Line
Count
Source
1822
203
        concurrent_delete_executor.add([&, accessor_ptr = accessor_map_[resource_id]]() {
1823
203
            if (accessor_ptr->delete_directory(tablet_path_prefix(tablet_id)) != 0) {
1824
1
                LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
1825
1
                             << " path=" << accessor_ptr->uri();
1826
1
                return -1;
1827
1
            }
1828
202
            return 0;
1829
203
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElENK3$_3clEv
1830
203
    }
1831
1832
232
    bool finished = true;
1833
232
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
1834
232
    for (int r : rets) {
1835
203
        if (r != 0) {
1836
1
            ret = -1;
1837
1
        }
1838
203
    }
1839
1840
232
    ret = finished ? ret : -1;
1841
1842
232
    if (ret != 0) { // failed recycle tablet data
1843
1
        LOG_WARNING("ret!=0")
1844
1
                .tag("finished", finished)
1845
1
                .tag("ret", ret)
1846
1
                .tag("instance_id", instance_id_)
1847
1
                .tag("tablet_id", tablet_id);
1848
1
        return ret;
1849
1
    }
1850
1851
231
    txn.reset();
1852
231
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1853
0
        LOG_WARNING("failed to recycle tablet ")
1854
0
                .tag("tablet id", tablet_id)
1855
0
                .tag("instance_id", instance_id_)
1856
0
                .tag("reason", "failed to create txn");
1857
0
        ret = -1;
1858
0
    }
1859
    // delete all rowset kv in this tablet
1860
231
    txn->remove(rs_key0, rs_key1);
1861
231
    txn->remove(recyc_rs_key0, recyc_rs_key1);
1862
1863
    // remove delete bitmap for MoW table
1864
231
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
1865
231
    txn->remove(pending_key);
1866
231
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
1867
231
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
1868
231
    txn->remove(delete_bitmap_start, delete_bitmap_end);
1869
1870
231
    TxnErrorCode err = txn->commit();
1871
231
    if (err != TxnErrorCode::TXN_OK) {
1872
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
1873
0
        ret = -1;
1874
0
    }
1875
1876
231
    if (ret == 0) {
1877
        // All object files under tablet have been deleted
1878
231
        std::lock_guard lock(recycled_tablets_mtx_);
1879
231
        recycled_tablets_.insert(tablet_id);
1880
231
    }
1881
1882
231
    return ret;
1883
232
}
1884
1885
13
int InstanceRecycler::recycle_rowsets() {
1886
13
    const std::string task_name = "recycle_rowsets";
1887
13
    int64_t num_scanned = 0;
1888
13
    int64_t num_expired = 0;
1889
13
    int64_t num_prepare = 0;
1890
13
    int64_t num_compacted = 0;
1891
13
    int64_t num_empty_rowset = 0;
1892
13
    size_t total_rowset_key_size = 0;
1893
13
    size_t total_rowset_value_size = 0;
1894
13
    size_t expired_rowset_size = 0;
1895
13
    std::atomic_long num_recycled = 0;
1896
1897
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
1898
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
1899
13
    std::string recyc_rs_key0;
1900
13
    std::string recyc_rs_key1;
1901
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
1902
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
1903
1904
13
    LOG_INFO("begin to recycle rowsets").tag("instance_id", instance_id_);
1905
1906
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1907
13
    register_recycle_task(task_name, start_time);
1908
1909
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1910
13
        unregister_recycle_task(task_name);
1911
13
        int64_t cost =
1912
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1913
13
        LOG_INFO("recycle rowsets finished, cost={}s", cost)
1914
13
                .tag("instance_id", instance_id_)
1915
13
                .tag("num_scanned", num_scanned)
1916
13
                .tag("num_expired", num_expired)
1917
13
                .tag("num_recycled", num_recycled)
1918
13
                .tag("num_recycled.prepare", num_prepare)
1919
13
                .tag("num_recycled.compacted", num_compacted)
1920
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
1921
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
1922
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
1923
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
1924
13
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEPi
Line
Count
Source
1909
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
1910
13
        unregister_recycle_task(task_name);
1911
13
        int64_t cost =
1912
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1913
13
        LOG_INFO("recycle rowsets finished, cost={}s", cost)
1914
13
                .tag("instance_id", instance_id_)
1915
13
                .tag("num_scanned", num_scanned)
1916
13
                .tag("num_expired", num_expired)
1917
13
                .tag("num_recycled", num_recycled)
1918
13
                .tag("num_recycled.prepare", num_prepare)
1919
13
                .tag("num_recycled.compacted", num_compacted)
1920
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
1921
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
1922
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
1923
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
1924
13
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEPi
1925
1926
13
    std::vector<std::string> rowset_keys;
1927
13
    std::vector<doris::RowsetMetaCloudPB> rowsets;
1928
1929
    // Store keys of rowset recycled by background workers
1930
13
    std::mutex async_recycled_rowset_keys_mutex;
1931
13
    std::vector<std::string> async_recycled_rowset_keys;
1932
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
1933
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
1934
13
    worker_pool->start();
1935
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
1936
900
                                            int64_t tablet_id, const std::string& rowset_id) {
1937
        // Try to delete rowset data in background thread
1938
900
        int ret = worker_pool->submit_with_timeout(
1939
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
1940
782
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1941
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1942
0
                        return;
1943
0
                    }
1944
782
                    std::vector<std::string> keys;
1945
782
                    {
1946
782
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
1947
782
                        async_recycled_rowset_keys.push_back(std::move(key));
1948
782
                        if (async_recycled_rowset_keys.size() > 100) {
1949
7
                            keys.swap(async_recycled_rowset_keys);
1950
7
                        }
1951
782
                    }
1952
782
                    if (keys.empty()) return;
1953
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
1954
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
1955
0
                                     << instance_id_;
1956
7
                    } else {
1957
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
1958
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
1959
7
                                           num_recycled, start_time);
1960
7
                    }
1961
7
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
1939
782
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
1940
782
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1941
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1942
0
                        return;
1943
0
                    }
1944
782
                    std::vector<std::string> keys;
1945
782
                    {
1946
782
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
1947
782
                        async_recycled_rowset_keys.push_back(std::move(key));
1948
782
                        if (async_recycled_rowset_keys.size() > 100) {
1949
7
                            keys.swap(async_recycled_rowset_keys);
1950
7
                        }
1951
782
                    }
1952
782
                    if (keys.empty()) return;
1953
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
1954
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
1955
0
                                     << instance_id_;
1956
7
                    } else {
1957
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
1958
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
1959
7
                                           num_recycled, start_time);
1960
7
                    }
1961
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
1962
900
                0);
1963
900
        if (ret == 0) return 0;
1964
        // Submit task failed, delete rowset data in current thread
1965
118
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1966
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1967
0
            return -1;
1968
0
        }
1969
118
        rowset_keys.push_back(std::move(key));
1970
118
        return 0;
1971
118
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
1936
900
                                            int64_t tablet_id, const std::string& rowset_id) {
1937
        // Try to delete rowset data in background thread
1938
900
        int ret = worker_pool->submit_with_timeout(
1939
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
1940
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1941
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1942
900
                        return;
1943
900
                    }
1944
900
                    std::vector<std::string> keys;
1945
900
                    {
1946
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
1947
900
                        async_recycled_rowset_keys.push_back(std::move(key));
1948
900
                        if (async_recycled_rowset_keys.size() > 100) {
1949
900
                            keys.swap(async_recycled_rowset_keys);
1950
900
                        }
1951
900
                    }
1952
900
                    if (keys.empty()) return;
1953
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
1954
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
1955
900
                                     << instance_id_;
1956
900
                    } else {
1957
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
1958
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
1959
900
                                           num_recycled, start_time);
1960
900
                    }
1961
900
                },
1962
900
                0);
1963
900
        if (ret == 0) return 0;
1964
        // Submit task failed, delete rowset data in current thread
1965
118
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
1966
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
1967
0
            return -1;
1968
0
        }
1969
118
        rowset_keys.push_back(std::move(key));
1970
118
        return 0;
1971
118
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
1972
1973
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1974
1975
4.00k
    auto calc_expiration = [&earlest_ts, this](const RecycleRowsetPB& rs) {
1976
4.00k
        if (config::force_immediate_recycle) {
1977
0
            return 0L;
1978
0
        }
1979
        // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1980
4.00k
        int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1981
4.00k
        int64_t retention_seconds = config::retention_seconds;
1982
4.00k
        if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1983
3.10k
            retention_seconds =
1984
3.10k
                    std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1985
3.10k
        }
1986
4.00k
        int64_t final_expiration = expiration + retention_seconds;
1987
4.00k
        if (earlest_ts > final_expiration) {
1988
2
            earlest_ts = final_expiration;
1989
2
            g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, earlest_ts);
1990
2
        }
1991
4.00k
        return final_expiration;
1992
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clERKNS0_15RecycleRowsetPBE
Line
Count
Source
1975
4.00k
    auto calc_expiration = [&earlest_ts, this](const RecycleRowsetPB& rs) {
1976
4.00k
        if (config::force_immediate_recycle) {
1977
0
            return 0L;
1978
0
        }
1979
        // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1980
4.00k
        int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1981
4.00k
        int64_t retention_seconds = config::retention_seconds;
1982
4.00k
        if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1983
3.10k
            retention_seconds =
1984
3.10k
                    std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1985
3.10k
        }
1986
4.00k
        int64_t final_expiration = expiration + retention_seconds;
1987
4.00k
        if (earlest_ts > final_expiration) {
1988
2
            earlest_ts = final_expiration;
1989
2
            g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, earlest_ts);
1990
2
        }
1991
4.00k
        return final_expiration;
1992
4.00k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clERKNS0_15RecycleRowsetPBE
1993
1994
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
1995
4.00k
        ++num_scanned;
1996
4.00k
        total_rowset_key_size += k.size();
1997
4.00k
        total_rowset_value_size += v.size();
1998
4.00k
        RecycleRowsetPB rowset;
1999
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2000
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2001
0
            return -1;
2002
0
        }
2003
2004
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2005
0
                   << " num_expired=" << num_expired << " expiration=" << calc_expiration(rowset)
2006
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2007
4.00k
        int64_t current_time = ::time(nullptr);
2008
4.00k
        if (current_time < calc_expiration(rowset)) { // not expired
2009
0
            return 0;
2010
0
        }
2011
4.00k
        ++num_expired;
2012
4.00k
        expired_rowset_size += v.size();
2013
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2014
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2015
                // in old version, keep this key-value pair and it needs to be checked manually
2016
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2017
0
                return -1;
2018
0
            }
2019
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2020
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2021
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2022
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2023
0
                rowset_keys.emplace_back(k);
2024
0
                return -1;
2025
0
            }
2026
            // decode rowset_id
2027
250
            auto k1 = k;
2028
250
            k1.remove_prefix(1);
2029
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2030
250
            decode_key(&k1, &out);
2031
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2032
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2033
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2034
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2035
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2036
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2037
0
                return -1;
2038
0
            }
2039
250
            return 0;
2040
250
        }
2041
        // TODO(plat1ko): check rowset not referenced
2042
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2043
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2044
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2045
0
                LOG_INFO("recycle rowset that has empty resource id");
2046
0
            } else {
2047
                // other situations, keep this key-value pair and it needs to be checked manually
2048
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2049
0
                return -1;
2050
0
            }
2051
0
        }
2052
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2053
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2054
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2055
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2056
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2057
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2058
3.75k
                  << " rowset_meta_size=" << v.size()
2059
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2060
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2061
            // unable to calculate file path, can only be deleted by rowset id prefix
2062
650
            num_prepare += 1;
2063
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2064
650
                                             rowset_meta->tablet_id(),
2065
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2066
0
                return -1;
2067
0
            }
2068
3.10k
        } else {
2069
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2070
3.10k
            rowset_keys.emplace_back(k);
2071
3.10k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
2072
3.10k
                rowsets.push_back(std::move(*rowset_meta));
2073
3.10k
            } else {
2074
0
                ++num_empty_rowset;
2075
0
            }
2076
3.10k
        }
2077
3.75k
        return 0;
2078
3.75k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1994
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
1995
4.00k
        ++num_scanned;
1996
4.00k
        total_rowset_key_size += k.size();
1997
4.00k
        total_rowset_value_size += v.size();
1998
4.00k
        RecycleRowsetPB rowset;
1999
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2000
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2001
0
            return -1;
2002
0
        }
2003
2004
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2005
0
                   << " num_expired=" << num_expired << " expiration=" << calc_expiration(rowset)
2006
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
2007
4.00k
        int64_t current_time = ::time(nullptr);
2008
4.00k
        if (current_time < calc_expiration(rowset)) { // not expired
2009
0
            return 0;
2010
0
        }
2011
4.00k
        ++num_expired;
2012
4.00k
        expired_rowset_size += v.size();
2013
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
2014
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
2015
                // in old version, keep this key-value pair and it needs to be checked manually
2016
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2017
0
                return -1;
2018
0
            }
2019
250
            if (rowset.resource_id().empty()) [[unlikely]] {
2020
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2021
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2022
0
                          << hex(k) << " value=" << proto_to_json(rowset);
2023
0
                rowset_keys.emplace_back(k);
2024
0
                return -1;
2025
0
            }
2026
            // decode rowset_id
2027
250
            auto k1 = k;
2028
250
            k1.remove_prefix(1);
2029
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2030
250
            decode_key(&k1, &out);
2031
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2032
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2033
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2034
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
2035
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
2036
250
                                             rowset.tablet_id(), rowset_id) != 0) {
2037
0
                return -1;
2038
0
            }
2039
250
            return 0;
2040
250
        }
2041
        // TODO(plat1ko): check rowset not referenced
2042
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
2043
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
2044
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
2045
0
                LOG_INFO("recycle rowset that has empty resource id");
2046
0
            } else {
2047
                // other situations, keep this key-value pair and it needs to be checked manually
2048
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2049
0
                return -1;
2050
0
            }
2051
0
        }
2052
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2053
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
2054
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
2055
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
2056
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
2057
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
2058
3.75k
                  << " rowset_meta_size=" << v.size()
2059
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
2060
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
2061
            // unable to calculate file path, can only be deleted by rowset id prefix
2062
650
            num_prepare += 1;
2063
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
2064
650
                                             rowset_meta->tablet_id(),
2065
650
                                             rowset_meta->rowset_id_v2()) != 0) {
2066
0
                return -1;
2067
0
            }
2068
3.10k
        } else {
2069
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
2070
3.10k
            rowset_keys.emplace_back(k);
2071
3.10k
            if (rowset_meta->num_segments() > 0) { // Skip empty rowset
2072
3.10k
                rowsets.push_back(std::move(*rowset_meta));
2073
3.10k
            } else {
2074
0
                ++num_empty_rowset;
2075
0
            }
2076
3.10k
        }
2077
3.75k
        return 0;
2078
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
2079
2080
21
    auto loop_done = [&]() -> int {
2081
21
        std::vector<std::string> rowset_keys_to_delete;
2082
21
        std::vector<doris::RowsetMetaCloudPB> rowsets_to_delete;
2083
21
        rowset_keys_to_delete.swap(rowset_keys);
2084
21
        rowsets_to_delete.swap(rowsets);
2085
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2086
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2087
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) {
2088
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2089
0
                return;
2090
0
            }
2091
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2092
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2093
0
                return;
2094
0
            }
2095
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2096
21
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
2086
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2087
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) {
2088
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2089
0
                return;
2090
0
            }
2091
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2092
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2093
0
                return;
2094
0
            }
2095
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2096
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
2097
21
        return 0;
2098
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
2080
21
    auto loop_done = [&]() -> int {
2081
21
        std::vector<std::string> rowset_keys_to_delete;
2082
21
        std::vector<doris::RowsetMetaCloudPB> rowsets_to_delete;
2083
21
        rowset_keys_to_delete.swap(rowset_keys);
2084
21
        rowsets_to_delete.swap(rowsets);
2085
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
2086
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
2087
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET) != 0) {
2088
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
2089
21
                return;
2090
21
            }
2091
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
2092
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2093
21
                return;
2094
21
            }
2095
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
2096
21
        });
2097
21
        return 0;
2098
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
2099
2100
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
2101
13
                               std::move(loop_done));
2102
13
    worker_pool->stop();
2103
2104
13
    if (!async_recycled_rowset_keys.empty()) {
2105
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
2106
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
2107
0
            return -1;
2108
2
        } else {
2109
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
2110
2
        }
2111
2
    }
2112
13
    return ret;
2113
13
}
2114
2115
3.02k
bool is_txn_aborted(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id, int64_t txn_id) {
2116
3.02k
    std::unique_ptr<Transaction> txn;
2117
3.02k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2118
3.02k
    if (err != TxnErrorCode::TXN_OK) {
2119
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
2120
0
        return false;
2121
0
    }
2122
2123
3.02k
    std::string index_val;
2124
3.02k
    const std::string index_key = txn_index_key({instance_id, txn_id});
2125
3.02k
    err = txn->get(index_key, &index_val);
2126
3.02k
    if (err != TxnErrorCode::TXN_OK) {
2127
3.02k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2128
            // txn has been recycled;
2129
3.02k
            return true;
2130
3.02k
        }
2131
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
2132
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
2133
0
                     << " err=" << err;
2134
0
        return false;
2135
3.02k
    }
2136
2137
0
    TxnIndexPB index_pb;
2138
0
    if (!index_pb.ParseFromString(index_val)) {
2139
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
2140
0
                     << " instance_id=" << instance_id;
2141
0
        return false;
2142
0
    }
2143
2144
0
    DCHECK(index_pb.has_tablet_index() == true);
2145
0
    DCHECK(index_pb.tablet_index().has_db_id() == true);
2146
0
    int64_t db_id = index_pb.tablet_index().db_id();
2147
2148
0
    std::string info_val;
2149
0
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
2150
0
    err = txn->get(info_key, &info_val);
2151
0
    if (err != TxnErrorCode::TXN_OK) {
2152
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
2153
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
2154
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
2155
0
                     << " err=" << err;
2156
0
        return false;
2157
0
    }
2158
2159
0
    TxnInfoPB txn_info;
2160
0
    if (!txn_info.ParseFromString(info_val)) {
2161
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
2162
0
                     << " instance_id=" << instance_id;
2163
0
        return false;
2164
0
    }
2165
0
    DCHECK(txn_info.txn_id() == txn_id);
2166
0
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status()) {
2167
0
        return true;
2168
0
    }
2169
0
    return false;
2170
0
}
2171
2172
13
int InstanceRecycler::recycle_tmp_rowsets() {
2173
13
    const std::string task_name = "recycle_tmp_rowsets";
2174
13
    int64_t num_scanned = 0;
2175
13
    int64_t num_expired = 0;
2176
13
    int64_t num_recycled = 0;
2177
13
    size_t expired_rowset_size = 0;
2178
13
    size_t total_rowset_key_size = 0;
2179
13
    size_t total_rowset_value_size = 0;
2180
2181
13
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
2182
13
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
2183
13
    std::string tmp_rs_key0;
2184
13
    std::string tmp_rs_key1;
2185
13
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
2186
13
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
2187
2188
13
    LOG_INFO("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
2189
2190
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2191
13
    register_recycle_task(task_name, start_time);
2192
2193
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2194
13
        unregister_recycle_task(task_name);
2195
13
        int64_t cost =
2196
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2197
13
        LOG_INFO("recycle tmp rowsets finished, cost={}s", cost)
2198
13
                .tag("instance_id", instance_id_)
2199
13
                .tag("num_scanned", num_scanned)
2200
13
                .tag("num_expired", num_expired)
2201
13
                .tag("num_recycled", num_recycled)
2202
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2203
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2204
13
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2205
13
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEPi
Line
Count
Source
2193
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2194
13
        unregister_recycle_task(task_name);
2195
13
        int64_t cost =
2196
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2197
13
        LOG_INFO("recycle tmp rowsets finished, cost={}s", cost)
2198
13
                .tag("instance_id", instance_id_)
2199
13
                .tag("num_scanned", num_scanned)
2200
13
                .tag("num_expired", num_expired)
2201
13
                .tag("num_recycled", num_recycled)
2202
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2203
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2204
13
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
2205
13
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEPi
2206
2207
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
2208
13
    std::vector<std::string_view> tmp_rowset_keys;
2209
13
    std::vector<doris::RowsetMetaCloudPB> tmp_rowsets;
2210
2211
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2212
3.02k
    auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) {
2213
        // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
2214
        //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
2215
        //  duration or timeout always < `retention_time` in practice.
2216
3.02k
        int64_t expiration =
2217
3.02k
                rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time();
2218
3.02k
        expiration = config::force_immediate_recycle ? 0 : expiration;
2219
3.02k
        int64_t final_expiration = expiration + config::retention_seconds;
2220
3.02k
        if (earlest_ts > final_expiration) {
2221
3
            earlest_ts = final_expiration;
2222
3
            g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts);
2223
3
        }
2224
3.02k
        return final_expiration;
2225
3.02k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
2212
3.02k
    auto calc_expiration = [&earlest_ts, this](const doris::RowsetMetaCloudPB& rowset) {
2213
        // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
2214
        //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
2215
        //  duration or timeout always < `retention_time` in practice.
2216
3.02k
        int64_t expiration =
2217
3.02k
                rowset.txn_expiration() > 0 ? rowset.txn_expiration() : rowset.creation_time();
2218
3.02k
        expiration = config::force_immediate_recycle ? 0 : expiration;
2219
3.02k
        int64_t final_expiration = expiration + config::retention_seconds;
2220
3.02k
        if (earlest_ts > final_expiration) {
2221
3
            earlest_ts = final_expiration;
2222
3
            g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, earlest_ts);
2223
3
        }
2224
3.02k
        return final_expiration;
2225
3.02k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clERKNS_17RowsetMetaCloudPBE
2226
2227
13
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
2228
13
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
2229
13
                             &calc_expiration,
2230
3.02k
                             this](std::string_view k, std::string_view v) -> int {
2231
3.02k
        ++num_scanned;
2232
3.02k
        total_rowset_key_size += k.size();
2233
3.02k
        total_rowset_value_size += v.size();
2234
3.02k
        doris::RowsetMetaCloudPB rowset;
2235
3.02k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2236
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2237
0
            return -1;
2238
0
        }
2239
3.02k
        int64_t expiration = calc_expiration(rowset);
2240
3.02k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2241
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2242
0
                   << " txn_expiration=" << rowset.txn_expiration()
2243
0
                   << " rowset_creation_time=" << rowset.creation_time();
2244
3.02k
        int64_t current_time = ::time(nullptr);
2245
3.02k
        if (current_time < expiration) { // not expired
2246
0
            return 0;
2247
0
        }
2248
2249
3.02k
        if (!is_txn_aborted(txn_kv_, instance_id_, rowset.txn_id())) {
2250
0
            return 0;
2251
0
        }
2252
2253
3.02k
        ++num_expired;
2254
3.02k
        expired_rowset_size += v.size();
2255
3.02k
        if (!rowset.has_resource_id()) {
2256
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2257
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2258
0
                return -1;
2259
0
            }
2260
            // might be a delete pred rowset
2261
0
            tmp_rowset_keys.push_back(k);
2262
0
            return 0;
2263
0
        }
2264
        // TODO(plat1ko): check rowset not referenced
2265
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2266
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2267
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2268
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2269
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2270
3.02k
                  << " num_expired=" << num_expired;
2271
2272
3.02k
        tmp_rowset_keys.push_back(k);
2273
3.02k
        if (rowset.num_segments() > 0) { // Skip empty rowset
2274
3.02k
            tmp_rowsets.push_back(std::move(rowset));
2275
3.02k
        }
2276
3.02k
        return 0;
2277
3.02k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2230
3.02k
                             this](std::string_view k, std::string_view v) -> int {
2231
3.02k
        ++num_scanned;
2232
3.02k
        total_rowset_key_size += k.size();
2233
3.02k
        total_rowset_value_size += v.size();
2234
3.02k
        doris::RowsetMetaCloudPB rowset;
2235
3.02k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
2236
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
2237
0
            return -1;
2238
0
        }
2239
3.02k
        int64_t expiration = calc_expiration(rowset);
2240
3.02k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
2241
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
2242
0
                   << " txn_expiration=" << rowset.txn_expiration()
2243
0
                   << " rowset_creation_time=" << rowset.creation_time();
2244
3.02k
        int64_t current_time = ::time(nullptr);
2245
3.02k
        if (current_time < expiration) { // not expired
2246
0
            return 0;
2247
0
        }
2248
2249
3.02k
        if (!is_txn_aborted(txn_kv_, instance_id_, rowset.txn_id())) {
2250
0
            return 0;
2251
0
        }
2252
2253
3.02k
        ++num_expired;
2254
3.02k
        expired_rowset_size += v.size();
2255
3.02k
        if (!rowset.has_resource_id()) {
2256
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
2257
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
2258
0
                return -1;
2259
0
            }
2260
            // might be a delete pred rowset
2261
0
            tmp_rowset_keys.push_back(k);
2262
0
            return 0;
2263
0
        }
2264
        // TODO(plat1ko): check rowset not referenced
2265
3.02k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
2266
3.02k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
2267
3.02k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
2268
3.02k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
2269
3.02k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
2270
3.02k
                  << " num_expired=" << num_expired;
2271
2272
3.02k
        tmp_rowset_keys.push_back(k);
2273
3.02k
        if (rowset.num_segments() > 0) { // Skip empty rowset
2274
3.02k
            tmp_rowsets.push_back(std::move(rowset));
2275
3.02k
        }
2276
3.02k
        return 0;
2277
3.02k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
2278
2279
13
    auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int {
2280
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2281
3
            tmp_rowset_keys.clear();
2282
3
            tmp_rowsets.clear();
2283
3
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
2280
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2281
3
            tmp_rowset_keys.clear();
2282
3
            tmp_rowsets.clear();
2283
3
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENKUlPiE_clES3_
2284
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) {
2285
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2286
0
            return -1;
2287
0
        }
2288
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2289
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2290
0
            return -1;
2291
0
        }
2292
3
        num_recycled += tmp_rowset_keys.size();
2293
3
        return 0;
2294
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
2279
3
    auto loop_done = [&tmp_rowset_keys, &tmp_rowsets, &num_recycled, this]() -> int {
2280
3
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [&](int*) {
2281
3
            tmp_rowset_keys.clear();
2282
3
            tmp_rowsets.clear();
2283
3
        });
2284
3
        if (delete_rowset_data(tmp_rowsets, RowsetRecyclingState::TMP_ROWSET) != 0) {
2285
0
            LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
2286
0
            return -1;
2287
0
        }
2288
3
        if (txn_remove(txn_kv_.get(), tmp_rowset_keys) != 0) {
2289
0
            LOG(WARNING) << "failed to delete tmp rowset kv, instance_id=" << instance_id_;
2290
0
            return -1;
2291
0
        }
2292
3
        num_recycled += tmp_rowset_keys.size();
2293
3
        return 0;
2294
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
2295
2296
13
    return scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
2297
13
                            std::move(loop_done));
2298
13
}
2299
2300
int InstanceRecycler::scan_and_recycle(
2301
        std::string begin, std::string_view end,
2302
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
2303
157
        std::function<int()> loop_done) {
2304
157
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
2305
157
    int ret = 0;
2306
157
    int64_t cnt = 0;
2307
157
    int get_range_retried = 0;
2308
157
    std::string err;
2309
157
    std::unique_ptr<int, std::function<void(int*)>> defer_log(
2310
157
            (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) {
2311
157
                LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2312
157
                          << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2313
157
                          << " ret=" << ret << " err=" << err;
2314
157
            });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEPi
Line
Count
Source
2310
155
            (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) {
2311
155
                LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2312
155
                          << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2313
155
                          << " ret=" << ret << " err=" << err;
2314
155
            });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEPi
Line
Count
Source
2310
2
            (int*)0x01, [begin, end, &err, &ret, &cnt, &get_range_retried](int*) {
2311
2
                LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
2312
2
                          << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
2313
2
                          << " ret=" << ret << " err=" << err;
2314
2
            });
2315
2316
157
    std::unique_ptr<RangeGetIterator> it;
2317
177
    do {
2318
177
        if (get_range_retried > 1000) {
2319
0
            err = "txn_get exceeds max retry, may not scan all keys";
2320
0
            ret = -1;
2321
0
            return -1;
2322
0
        }
2323
177
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
2324
177
        if (get_ret != 0) { // txn kv may complain "Request for future version"
2325
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
2326
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
2327
0
                         << " get_range_retried=" << get_range_retried;
2328
0
            ++get_range_retried;
2329
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
2330
0
            continue; // try again
2331
0
        }
2332
177
        if (!it->has_next()) {
2333
91
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
2334
91
            break; // scan finished
2335
91
        }
2336
37.3k
        while (it->has_next()) {
2337
37.2k
            ++cnt;
2338
            // recycle corresponding resources
2339
37.2k
            auto [k, v] = it->next();
2340
37.2k
            if (!it->has_next()) {
2341
86
                begin = k;
2342
86
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
2343
86
            }
2344
            // if we want to continue scanning, the recycle_func should not return non-zero
2345
37.2k
            if (recycle_func(k, v) != 0) {
2346
0
                err = "recycle_func error";
2347
0
                ret = -1;
2348
0
            }
2349
37.2k
        }
2350
86
        begin.push_back('\x00'); // Update to next smallest key for iteration
2351
        // if we want to continue scanning, the recycle_func should not return non-zero
2352
86
        if (loop_done && loop_done() != 0) {
2353
2
            err = "loop_done error";
2354
2
            ret = -1;
2355
2
        }
2356
86
    } while (it->more() && !stopped());
2357
157
    return ret;
2358
157
}
2359
2360
17
int InstanceRecycler::abort_timeout_txn() {
2361
17
    const std::string task_name = "abort_timeout_txn";
2362
17
    int64_t num_scanned = 0;
2363
17
    int64_t num_timeout = 0;
2364
17
    int64_t num_abort = 0;
2365
17
    int64_t num_advance = 0;
2366
2367
17
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
2368
17
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
2369
17
    std::string begin_txn_running_key;
2370
17
    std::string end_txn_running_key;
2371
17
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
2372
17
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
2373
2374
17
    LOG_INFO("begin to abort timeout txn").tag("instance_id", instance_id_);
2375
2376
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2377
17
    register_recycle_task(task_name, start_time);
2378
2379
17
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2380
17
        unregister_recycle_task(task_name);
2381
17
        int64_t cost =
2382
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2383
17
        LOG_INFO("end to abort timeout txn, cost={}s", cost)
2384
17
                .tag("instance_id", instance_id_)
2385
17
                .tag("num_scanned", num_scanned)
2386
17
                .tag("num_timeout", num_timeout)
2387
17
                .tag("num_abort", num_abort)
2388
17
                .tag("num_advance", num_advance);
2389
17
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEPi
Line
Count
Source
2379
16
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2380
16
        unregister_recycle_task(task_name);
2381
16
        int64_t cost =
2382
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2383
16
        LOG_INFO("end to abort timeout txn, cost={}s", cost)
2384
16
                .tag("instance_id", instance_id_)
2385
16
                .tag("num_scanned", num_scanned)
2386
16
                .tag("num_timeout", num_timeout)
2387
16
                .tag("num_abort", num_abort)
2388
16
                .tag("num_advance", num_advance);
2389
16
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEPi
Line
Count
Source
2379
1
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2380
1
        unregister_recycle_task(task_name);
2381
1
        int64_t cost =
2382
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2383
1
        LOG_INFO("end to abort timeout txn, cost={}s", cost)
2384
1
                .tag("instance_id", instance_id_)
2385
1
                .tag("num_scanned", num_scanned)
2386
1
                .tag("num_timeout", num_timeout)
2387
1
                .tag("num_abort", num_abort)
2388
1
                .tag("num_advance", num_advance);
2389
1
    });
2390
2391
17
    int64_t current_time =
2392
17
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2393
2394
17
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
2395
17
                                  &current_time,
2396
17
                                  this](std::string_view k, std::string_view v) -> int {
2397
7
        ++num_scanned;
2398
2399
7
        std::unique_ptr<Transaction> txn;
2400
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2401
7
        if (err != TxnErrorCode::TXN_OK) {
2402
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2403
0
            return -1;
2404
0
        }
2405
7
        std::string_view k1 = k;
2406
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2407
7
        k1.remove_prefix(1); // Remove key space
2408
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2409
7
        if (decode_key(&k1, &out) != 0) {
2410
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2411
0
            return -1;
2412
0
        }
2413
7
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2414
7
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2415
7
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2416
        // Update txn_info
2417
7
        std::string txn_inf_key, txn_inf_val;
2418
7
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2419
7
        err = txn->get(txn_inf_key, &txn_inf_val);
2420
7
        if (err != TxnErrorCode::TXN_OK) {
2421
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2422
0
            return -1;
2423
0
        }
2424
7
        TxnInfoPB txn_info;
2425
7
        if (!txn_info.ParseFromString(txn_inf_val)) {
2426
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2427
0
            return -1;
2428
0
        }
2429
2430
7
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2431
1
            txn.reset();
2432
1
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2433
1
            std::shared_ptr<TxnLazyCommitTask> task =
2434
1
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2435
1
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2436
1
            if (ret.first != MetaServiceCode::OK) {
2437
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2438
0
                             << "msg=" << ret.second;
2439
0
                return -1;
2440
0
            }
2441
1
            ++num_advance;
2442
1
            return 0;
2443
6
        } else {
2444
6
            TxnRunningPB txn_running_pb;
2445
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2446
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2447
0
                return -1;
2448
0
            }
2449
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2450
4
                return 0;
2451
4
            }
2452
2
            ++num_timeout;
2453
2454
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2455
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2456
2
            txn_info.set_finish_time(current_time);
2457
2
            txn_info.set_reason("timeout");
2458
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2459
2
            txn_inf_val.clear();
2460
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2461
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2462
0
                return -1;
2463
0
            }
2464
2
            txn->put(txn_inf_key, txn_inf_val);
2465
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2466
            // Put recycle txn key
2467
2
            std::string recyc_txn_key, recyc_txn_val;
2468
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2469
2
            RecycleTxnPB recycle_txn_pb;
2470
2
            recycle_txn_pb.set_creation_time(current_time);
2471
2
            recycle_txn_pb.set_label(txn_info.label());
2472
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2473
0
                LOG_WARNING("failed to serialize txn recycle info")
2474
0
                        .tag("key", hex(k))
2475
0
                        .tag("db_id", db_id)
2476
0
                        .tag("txn_id", txn_id);
2477
0
                return -1;
2478
0
            }
2479
2
            txn->put(recyc_txn_key, recyc_txn_val);
2480
            // Remove txn running key
2481
2
            txn->remove(k);
2482
2
            err = txn->commit();
2483
2
            if (err != TxnErrorCode::TXN_OK) {
2484
0
                LOG_WARNING("failed to commit txn err={}", err)
2485
0
                        .tag("key", hex(k))
2486
0
                        .tag("db_id", db_id)
2487
0
                        .tag("txn_id", txn_id);
2488
0
                return -1;
2489
0
            }
2490
2
            ++num_abort;
2491
2
        }
2492
2493
2
        return 0;
2494
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2396
6
                                  this](std::string_view k, std::string_view v) -> int {
2397
6
        ++num_scanned;
2398
2399
6
        std::unique_ptr<Transaction> txn;
2400
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2401
6
        if (err != TxnErrorCode::TXN_OK) {
2402
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2403
0
            return -1;
2404
0
        }
2405
6
        std::string_view k1 = k;
2406
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2407
6
        k1.remove_prefix(1); // Remove key space
2408
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2409
6
        if (decode_key(&k1, &out) != 0) {
2410
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2411
0
            return -1;
2412
0
        }
2413
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2414
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2415
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2416
        // Update txn_info
2417
6
        std::string txn_inf_key, txn_inf_val;
2418
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2419
6
        err = txn->get(txn_inf_key, &txn_inf_val);
2420
6
        if (err != TxnErrorCode::TXN_OK) {
2421
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2422
0
            return -1;
2423
0
        }
2424
6
        TxnInfoPB txn_info;
2425
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
2426
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2427
0
            return -1;
2428
0
        }
2429
2430
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2431
0
            txn.reset();
2432
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2433
0
            std::shared_ptr<TxnLazyCommitTask> task =
2434
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2435
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2436
0
            if (ret.first != MetaServiceCode::OK) {
2437
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2438
0
                             << "msg=" << ret.second;
2439
0
                return -1;
2440
0
            }
2441
0
            ++num_advance;
2442
0
            return 0;
2443
6
        } else {
2444
6
            TxnRunningPB txn_running_pb;
2445
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2446
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2447
0
                return -1;
2448
0
            }
2449
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2450
4
                return 0;
2451
4
            }
2452
2
            ++num_timeout;
2453
2454
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2455
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2456
2
            txn_info.set_finish_time(current_time);
2457
2
            txn_info.set_reason("timeout");
2458
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2459
2
            txn_inf_val.clear();
2460
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2461
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2462
0
                return -1;
2463
0
            }
2464
2
            txn->put(txn_inf_key, txn_inf_val);
2465
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2466
            // Put recycle txn key
2467
2
            std::string recyc_txn_key, recyc_txn_val;
2468
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2469
2
            RecycleTxnPB recycle_txn_pb;
2470
2
            recycle_txn_pb.set_creation_time(current_time);
2471
2
            recycle_txn_pb.set_label(txn_info.label());
2472
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2473
0
                LOG_WARNING("failed to serialize txn recycle info")
2474
0
                        .tag("key", hex(k))
2475
0
                        .tag("db_id", db_id)
2476
0
                        .tag("txn_id", txn_id);
2477
0
                return -1;
2478
0
            }
2479
2
            txn->put(recyc_txn_key, recyc_txn_val);
2480
            // Remove txn running key
2481
2
            txn->remove(k);
2482
2
            err = txn->commit();
2483
2
            if (err != TxnErrorCode::TXN_OK) {
2484
0
                LOG_WARNING("failed to commit txn err={}", err)
2485
0
                        .tag("key", hex(k))
2486
0
                        .tag("db_id", db_id)
2487
0
                        .tag("txn_id", txn_id);
2488
0
                return -1;
2489
0
            }
2490
2
            ++num_abort;
2491
2
        }
2492
2493
2
        return 0;
2494
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2396
1
                                  this](std::string_view k, std::string_view v) -> int {
2397
1
        ++num_scanned;
2398
2399
1
        std::unique_ptr<Transaction> txn;
2400
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2401
1
        if (err != TxnErrorCode::TXN_OK) {
2402
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2403
0
            return -1;
2404
0
        }
2405
1
        std::string_view k1 = k;
2406
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
2407
1
        k1.remove_prefix(1); // Remove key space
2408
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2409
1
        if (decode_key(&k1, &out) != 0) {
2410
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
2411
0
            return -1;
2412
0
        }
2413
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2414
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2415
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2416
        // Update txn_info
2417
1
        std::string txn_inf_key, txn_inf_val;
2418
1
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
2419
1
        err = txn->get(txn_inf_key, &txn_inf_val);
2420
1
        if (err != TxnErrorCode::TXN_OK) {
2421
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
2422
0
            return -1;
2423
0
        }
2424
1
        TxnInfoPB txn_info;
2425
1
        if (!txn_info.ParseFromString(txn_inf_val)) {
2426
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
2427
0
            return -1;
2428
0
        }
2429
2430
1
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
2431
1
            txn.reset();
2432
1
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
2433
1
            std::shared_ptr<TxnLazyCommitTask> task =
2434
1
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
2435
1
            std::pair<MetaServiceCode, std::string> ret = task->wait();
2436
1
            if (ret.first != MetaServiceCode::OK) {
2437
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
2438
0
                             << "msg=" << ret.second;
2439
0
                return -1;
2440
0
            }
2441
1
            ++num_advance;
2442
1
            return 0;
2443
1
        } else {
2444
0
            TxnRunningPB txn_running_pb;
2445
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
2446
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2447
0
                return -1;
2448
0
            }
2449
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
2450
0
                return 0;
2451
0
            }
2452
0
            ++num_timeout;
2453
2454
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
2455
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
2456
0
            txn_info.set_finish_time(current_time);
2457
0
            txn_info.set_reason("timeout");
2458
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
2459
0
            txn_inf_val.clear();
2460
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
2461
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
2462
0
                return -1;
2463
0
            }
2464
0
            txn->put(txn_inf_key, txn_inf_val);
2465
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
2466
            // Put recycle txn key
2467
0
            std::string recyc_txn_key, recyc_txn_val;
2468
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
2469
0
            RecycleTxnPB recycle_txn_pb;
2470
0
            recycle_txn_pb.set_creation_time(current_time);
2471
0
            recycle_txn_pb.set_label(txn_info.label());
2472
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
2473
0
                LOG_WARNING("failed to serialize txn recycle info")
2474
0
                        .tag("key", hex(k))
2475
0
                        .tag("db_id", db_id)
2476
0
                        .tag("txn_id", txn_id);
2477
0
                return -1;
2478
0
            }
2479
0
            txn->put(recyc_txn_key, recyc_txn_val);
2480
            // Remove txn running key
2481
0
            txn->remove(k);
2482
0
            err = txn->commit();
2483
0
            if (err != TxnErrorCode::TXN_OK) {
2484
0
                LOG_WARNING("failed to commit txn err={}", err)
2485
0
                        .tag("key", hex(k))
2486
0
                        .tag("db_id", db_id)
2487
0
                        .tag("txn_id", txn_id);
2488
0
                return -1;
2489
0
            }
2490
0
            ++num_abort;
2491
0
        }
2492
2493
0
        return 0;
2494
1
    };
2495
2496
17
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
2497
17
                            std::move(handle_txn_running_kv));
2498
17
}
2499
2500
17
int InstanceRecycler::recycle_expired_txn_label() {
2501
17
    const std::string task_name = "recycle_expired_txn_label";
2502
17
    int64_t num_scanned = 0;
2503
17
    int64_t num_expired = 0;
2504
17
    int64_t num_recycled = 0;
2505
17
    int ret = 0;
2506
2507
17
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
2508
17
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
2509
17
    std::string begin_recycle_txn_key;
2510
17
    std::string end_recycle_txn_key;
2511
17
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
2512
17
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
2513
17
    std::vector<std::string> recycle_txn_info_keys;
2514
2515
17
    LOG_INFO("begin to recycle expired txn").tag("instance_id", instance_id_);
2516
2517
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2518
17
    register_recycle_task(task_name, start_time);
2519
17
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2520
17
        unregister_recycle_task(task_name);
2521
17
        int64_t cost =
2522
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2523
17
        LOG_INFO("end to recycle expired txn, cost={}s", cost)
2524
17
                .tag("instance_id", instance_id_)
2525
17
                .tag("num_scanned", num_scanned)
2526
17
                .tag("num_expired", num_expired)
2527
17
                .tag("num_recycled", num_recycled);
2528
17
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEPi
Line
Count
Source
2519
16
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2520
16
        unregister_recycle_task(task_name);
2521
16
        int64_t cost =
2522
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2523
16
        LOG_INFO("end to recycle expired txn, cost={}s", cost)
2524
16
                .tag("instance_id", instance_id_)
2525
16
                .tag("num_scanned", num_scanned)
2526
16
                .tag("num_expired", num_expired)
2527
16
                .tag("num_recycled", num_recycled);
2528
16
    });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEPi
Line
Count
Source
2519
1
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2520
1
        unregister_recycle_task(task_name);
2521
1
        int64_t cost =
2522
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2523
1
        LOG_INFO("end to recycle expired txn, cost={}s", cost)
2524
1
                .tag("instance_id", instance_id_)
2525
1
                .tag("num_scanned", num_scanned)
2526
1
                .tag("num_expired", num_expired)
2527
1
                .tag("num_recycled", num_recycled);
2528
1
    });
2529
2530
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2531
30.0k
    auto calc_expiration = [&earlest_ts, this](const RecycleTxnPB& recycle_txn_pb) {
2532
30.0k
        int64_t final_expiration =
2533
30.0k
                recycle_txn_pb.creation_time() + config::label_keep_max_second * 1000L;
2534
30.0k
        if (earlest_ts > final_expiration / 1000) {
2535
7
            earlest_ts = final_expiration / 1000;
2536
7
            g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, earlest_ts);
2537
7
        }
2538
30.0k
        return final_expiration;
2539
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNS0_12RecycleTxnPBE
Line
Count
Source
2531
30.0k
    auto calc_expiration = [&earlest_ts, this](const RecycleTxnPB& recycle_txn_pb) {
2532
30.0k
        int64_t final_expiration =
2533
30.0k
                recycle_txn_pb.creation_time() + config::label_keep_max_second * 1000L;
2534
30.0k
        if (earlest_ts > final_expiration / 1000) {
2535
6
            earlest_ts = final_expiration / 1000;
2536
6
            g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, earlest_ts);
2537
6
        }
2538
30.0k
        return final_expiration;
2539
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNS0_12RecycleTxnPBE
Line
Count
Source
2531
1
    auto calc_expiration = [&earlest_ts, this](const RecycleTxnPB& recycle_txn_pb) {
2532
1
        int64_t final_expiration =
2533
1
                recycle_txn_pb.creation_time() + config::label_keep_max_second * 1000L;
2534
1
        if (earlest_ts > final_expiration / 1000) {
2535
1
            earlest_ts = final_expiration / 1000;
2536
1
            g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, earlest_ts);
2537
1
        }
2538
1
        return final_expiration;
2539
1
    };
2540
2541
17
    SyncExecutor<int> concurrent_delete_executor(
2542
17
            _thread_pool_group.s3_producer_pool,
2543
17
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
2544
23.0k
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clERKi
Line
Count
Source
2544
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clERKi
Line
Count
Source
2544
1
            [](const int& ret) { return ret != 0; });
2545
2546
17
    int64_t current_time_ms =
2547
17
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2548
2549
30.0k
    auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int {
2550
30.0k
        ++num_scanned;
2551
30.0k
        RecycleTxnPB recycle_txn_pb;
2552
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2553
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2554
0
            return -1;
2555
0
        }
2556
30.0k
        if ((config::force_immediate_recycle) ||
2557
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2558
30.0k
            (calc_expiration(recycle_txn_pb) <= current_time_ms)) {
2559
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2560
23.0k
            num_expired++;
2561
23.0k
            recycle_txn_info_keys.emplace_back(k);
2562
23.0k
        }
2563
30.0k
        return 0;
2564
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2549
30.0k
    auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int {
2550
30.0k
        ++num_scanned;
2551
30.0k
        RecycleTxnPB recycle_txn_pb;
2552
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2553
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2554
0
            return -1;
2555
0
        }
2556
30.0k
        if ((config::force_immediate_recycle) ||
2557
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2558
30.0k
            (calc_expiration(recycle_txn_pb) <= current_time_ms)) {
2559
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2560
23.0k
            num_expired++;
2561
23.0k
            recycle_txn_info_keys.emplace_back(k);
2562
23.0k
        }
2563
30.0k
        return 0;
2564
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2549
1
    auto handle_recycle_txn_kv = [&](std::string_view k, std::string_view v) -> int {
2550
1
        ++num_scanned;
2551
1
        RecycleTxnPB recycle_txn_pb;
2552
1
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
2553
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
2554
0
            return -1;
2555
0
        }
2556
1
        if ((config::force_immediate_recycle) ||
2557
1
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
2558
1
            (calc_expiration(recycle_txn_pb) <= current_time_ms)) {
2559
1
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
2560
1
            num_expired++;
2561
1
            recycle_txn_info_keys.emplace_back(k);
2562
1
        }
2563
1
        return 0;
2564
1
    };
2565
2566
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2567
23.0k
        std::string_view k1 = k;
2568
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2569
23.0k
        k1.remove_prefix(1); // Remove key space
2570
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2571
23.0k
        int ret = decode_key(&k1, &out);
2572
23.0k
        if (ret != 0) {
2573
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2574
0
            return -1;
2575
0
        }
2576
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2577
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2578
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2579
23.0k
        std::unique_ptr<Transaction> txn;
2580
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2581
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2582
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2583
0
            return -1;
2584
0
        }
2585
        // Remove txn index kv
2586
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
2587
23.0k
        txn->remove(index_key);
2588
        // Remove txn info kv
2589
23.0k
        std::string info_key, info_val;
2590
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2591
23.0k
        err = txn->get(info_key, &info_val);
2592
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2593
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2594
0
            return -1;
2595
0
        }
2596
23.0k
        TxnInfoPB txn_info;
2597
23.0k
        if (!txn_info.ParseFromString(info_val)) {
2598
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2599
0
            return -1;
2600
0
        }
2601
23.0k
        txn->remove(info_key);
2602
        // Remove sub txn index kvs
2603
23.0k
        std::vector<std::string> sub_txn_index_keys;
2604
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2605
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2606
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
2607
22.9k
        }
2608
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2609
22.9k
            txn->remove(sub_txn_index_key);
2610
22.9k
        }
2611
        // Update txn label
2612
23.0k
        std::string label_key, label_val;
2613
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2614
23.0k
        err = txn->get(label_key, &label_val);
2615
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2617
0
                         << " err=" << err;
2618
0
            return -1;
2619
0
        }
2620
23.0k
        TxnLabelPB txn_label;
2621
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2622
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2623
0
            return -1;
2624
0
        }
2625
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2626
23.0k
        if (it != txn_label.txn_ids().end()) {
2627
23.0k
            txn_label.mutable_txn_ids()->erase(it);
2628
23.0k
        }
2629
23.0k
        if (txn_label.txn_ids().empty()) {
2630
23.0k
            txn->remove(label_key);
2631
23.0k
        } else {
2632
0
            if (!txn_label.SerializeToString(&label_val)) {
2633
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2634
0
                return -1;
2635
0
            }
2636
0
            txn->atomic_set_ver_value(label_key, label_val);
2637
0
        }
2638
        // Remove recycle txn kv
2639
23.0k
        txn->remove(k);
2640
23.0k
        err = txn->commit();
2641
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2642
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
2643
0
            return -1;
2644
0
        }
2645
23.0k
        ++num_recycled;
2646
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
2647
23.0k
        return 0;
2648
23.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_5clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
2566
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2567
23.0k
        std::string_view k1 = k;
2568
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2569
23.0k
        k1.remove_prefix(1); // Remove key space
2570
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2571
23.0k
        int ret = decode_key(&k1, &out);
2572
23.0k
        if (ret != 0) {
2573
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2574
0
            return -1;
2575
0
        }
2576
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2577
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2578
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2579
23.0k
        std::unique_ptr<Transaction> txn;
2580
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2581
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2582
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2583
0
            return -1;
2584
0
        }
2585
        // Remove txn index kv
2586
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
2587
23.0k
        txn->remove(index_key);
2588
        // Remove txn info kv
2589
23.0k
        std::string info_key, info_val;
2590
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2591
23.0k
        err = txn->get(info_key, &info_val);
2592
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2593
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2594
0
            return -1;
2595
0
        }
2596
23.0k
        TxnInfoPB txn_info;
2597
23.0k
        if (!txn_info.ParseFromString(info_val)) {
2598
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2599
0
            return -1;
2600
0
        }
2601
23.0k
        txn->remove(info_key);
2602
        // Remove sub txn index kvs
2603
23.0k
        std::vector<std::string> sub_txn_index_keys;
2604
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2605
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2606
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
2607
22.9k
        }
2608
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2609
22.9k
            txn->remove(sub_txn_index_key);
2610
22.9k
        }
2611
        // Update txn label
2612
23.0k
        std::string label_key, label_val;
2613
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2614
23.0k
        err = txn->get(label_key, &label_val);
2615
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2617
0
                         << " err=" << err;
2618
0
            return -1;
2619
0
        }
2620
23.0k
        TxnLabelPB txn_label;
2621
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2622
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2623
0
            return -1;
2624
0
        }
2625
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2626
23.0k
        if (it != txn_label.txn_ids().end()) {
2627
23.0k
            txn_label.mutable_txn_ids()->erase(it);
2628
23.0k
        }
2629
23.0k
        if (txn_label.txn_ids().empty()) {
2630
23.0k
            txn->remove(label_key);
2631
23.0k
        } else {
2632
0
            if (!txn_label.SerializeToString(&label_val)) {
2633
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2634
0
                return -1;
2635
0
            }
2636
0
            txn->atomic_set_ver_value(label_key, label_val);
2637
0
        }
2638
        // Remove recycle txn kv
2639
23.0k
        txn->remove(k);
2640
23.0k
        err = txn->commit();
2641
23.0k
        if (err != TxnErrorCode::TXN_OK) {
2642
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
2643
0
            return -1;
2644
0
        }
2645
23.0k
        ++num_recycled;
2646
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
2647
23.0k
        return 0;
2648
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_5clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
2566
1
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
2567
1
        std::string_view k1 = k;
2568
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
2569
1
        k1.remove_prefix(1); // Remove key space
2570
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2571
1
        int ret = decode_key(&k1, &out);
2572
1
        if (ret != 0) {
2573
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
2574
0
            return -1;
2575
0
        }
2576
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
2577
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
2578
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
2579
1
        std::unique_ptr<Transaction> txn;
2580
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2581
1
        if (err != TxnErrorCode::TXN_OK) {
2582
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
2583
0
            return -1;
2584
0
        }
2585
        // Remove txn index kv
2586
1
        auto index_key = txn_index_key({instance_id_, txn_id});
2587
1
        txn->remove(index_key);
2588
        // Remove txn info kv
2589
1
        std::string info_key, info_val;
2590
1
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
2591
1
        err = txn->get(info_key, &info_val);
2592
1
        if (err != TxnErrorCode::TXN_OK) {
2593
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
2594
0
            return -1;
2595
0
        }
2596
1
        TxnInfoPB txn_info;
2597
1
        if (!txn_info.ParseFromString(info_val)) {
2598
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
2599
0
            return -1;
2600
0
        }
2601
1
        txn->remove(info_key);
2602
        // Remove sub txn index kvs
2603
1
        std::vector<std::string> sub_txn_index_keys;
2604
1
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
2605
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
2606
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
2607
0
        }
2608
1
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
2609
0
            txn->remove(sub_txn_index_key);
2610
0
        }
2611
        // Update txn label
2612
1
        std::string label_key, label_val;
2613
1
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
2614
1
        err = txn->get(label_key, &label_val);
2615
1
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
2617
0
                         << " err=" << err;
2618
0
            return -1;
2619
0
        }
2620
1
        TxnLabelPB txn_label;
2621
1
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
2622
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
2623
0
            return -1;
2624
0
        }
2625
1
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
2626
1
        if (it != txn_label.txn_ids().end()) {
2627
1
            txn_label.mutable_txn_ids()->erase(it);
2628
1
        }
2629
1
        if (txn_label.txn_ids().empty()) {
2630
1
            txn->remove(label_key);
2631
1
        } else {
2632
0
            if (!txn_label.SerializeToString(&label_val)) {
2633
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
2634
0
                return -1;
2635
0
            }
2636
0
            txn->atomic_set_ver_value(label_key, label_val);
2637
0
        }
2638
        // Remove recycle txn kv
2639
1
        txn->remove(k);
2640
1
        err = txn->commit();
2641
1
        if (err != TxnErrorCode::TXN_OK) {
2642
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
2643
0
            return -1;
2644
0
        }
2645
1
        ++num_recycled;
2646
1
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
2647
1
        return 0;
2648
1
    };
2649
2650
17
    auto loop_done = [&]() -> int {
2651
8
        std::unique_ptr<int, std::function<void(int*)>> defer(
2652
8
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlPiE_clES3_
Line
Count
Source
2652
7
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlPiE_clES3_
Line
Count
Source
2652
1
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
2653
8
        TEST_SYNC_POINT_CALLBACK(
2654
8
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
2655
8
                &recycle_txn_info_keys);
2656
23.0k
        for (const auto& k : recycle_txn_info_keys) {
2657
23.0k
            concurrent_delete_executor.add([&]() {
2658
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
2659
0
                    LOG_WARNING("failed to delete recycle txn kv")
2660
0
                            .tag("instance id", instance_id_)
2661
0
                            .tag("key", hex(k));
2662
0
                    return -1;
2663
0
                }
2664
23.0k
                return 0;
2665
23.0k
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlvE_clEv
Line
Count
Source
2657
23.0k
            concurrent_delete_executor.add([&]() {
2658
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
2659
0
                    LOG_WARNING("failed to delete recycle txn kv")
2660
0
                            .tag("instance id", instance_id_)
2661
0
                            .tag("key", hex(k));
2662
0
                    return -1;
2663
0
                }
2664
23.0k
                return 0;
2665
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEvENKUlvE_clEv
Line
Count
Source
2657
1
            concurrent_delete_executor.add([&]() {
2658
1
                if (delete_recycle_txn_kv(k) != 0) {
2659
0
                    LOG_WARNING("failed to delete recycle txn kv")
2660
0
                            .tag("instance id", instance_id_)
2661
0
                            .tag("key", hex(k));
2662
0
                    return -1;
2663
0
                }
2664
1
                return 0;
2665
1
            });
2666
23.0k
        }
2667
8
        bool finished = true;
2668
8
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2669
23.0k
        for (int r : rets) {
2670
23.0k
            if (r != 0) {
2671
0
                ret = -1;
2672
0
            }
2673
23.0k
        }
2674
2675
8
        ret = finished ? ret : -1;
2676
2677
8
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
2678
2679
8
        if (ret != 0) {
2680
2
            LOG_WARNING("recycle txn kv ret!=0")
2681
2
                    .tag("finished", finished)
2682
2
                    .tag("ret", ret)
2683
2
                    .tag("instance_id", instance_id_);
2684
2
            return ret;
2685
2
        }
2686
6
        return ret;
2687
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEv
Line
Count
Source
2650
7
    auto loop_done = [&]() -> int {
2651
7
        std::unique_ptr<int, std::function<void(int*)>> defer(
2652
7
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
2653
7
        TEST_SYNC_POINT_CALLBACK(
2654
7
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
2655
7
                &recycle_txn_info_keys);
2656
23.0k
        for (const auto& k : recycle_txn_info_keys) {
2657
23.0k
            concurrent_delete_executor.add([&]() {
2658
23.0k
                if (delete_recycle_txn_kv(k) != 0) {
2659
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
2660
23.0k
                            .tag("instance id", instance_id_)
2661
23.0k
                            .tag("key", hex(k));
2662
23.0k
                    return -1;
2663
23.0k
                }
2664
23.0k
                return 0;
2665
23.0k
            });
2666
23.0k
        }
2667
7
        bool finished = true;
2668
7
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2669
23.0k
        for (int r : rets) {
2670
23.0k
            if (r != 0) {
2671
0
                ret = -1;
2672
0
            }
2673
23.0k
        }
2674
2675
7
        ret = finished ? ret : -1;
2676
2677
7
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
2678
2679
7
        if (ret != 0) {
2680
2
            LOG_WARNING("recycle txn kv ret!=0")
2681
2
                    .tag("finished", finished)
2682
2
                    .tag("ret", ret)
2683
2
                    .tag("instance_id", instance_id_);
2684
2
            return ret;
2685
2
        }
2686
5
        return ret;
2687
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clEv
Line
Count
Source
2650
1
    auto loop_done = [&]() -> int {
2651
1
        std::unique_ptr<int, std::function<void(int*)>> defer(
2652
1
                (int*)0x01, [&](int*) { recycle_txn_info_keys.clear(); });
2653
1
        TEST_SYNC_POINT_CALLBACK(
2654
1
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
2655
1
                &recycle_txn_info_keys);
2656
1
        for (const auto& k : recycle_txn_info_keys) {
2657
1
            concurrent_delete_executor.add([&]() {
2658
1
                if (delete_recycle_txn_kv(k) != 0) {
2659
1
                    LOG_WARNING("failed to delete recycle txn kv")
2660
1
                            .tag("instance id", instance_id_)
2661
1
                            .tag("key", hex(k));
2662
1
                    return -1;
2663
1
                }
2664
1
                return 0;
2665
1
            });
2666
1
        }
2667
1
        bool finished = true;
2668
1
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2669
1
        for (int r : rets) {
2670
1
            if (r != 0) {
2671
0
                ret = -1;
2672
0
            }
2673
1
        }
2674
2675
1
        ret = finished ? ret : -1;
2676
2677
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
2678
2679
1
        if (ret != 0) {
2680
0
            LOG_WARNING("recycle txn kv ret!=0")
2681
0
                    .tag("finished", finished)
2682
0
                    .tag("ret", ret)
2683
0
                    .tag("instance_id", instance_id_);
2684
0
            return ret;
2685
0
        }
2686
1
        return ret;
2687
1
    };
2688
2689
17
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
2690
17
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
2691
17
}
2692
2693
struct CopyJobIdTuple {
2694
    std::string instance_id;
2695
    std::string stage_id;
2696
    long table_id;
2697
    std::string copy_id;
2698
    std::string stage_path;
2699
};
2700
struct BatchObjStoreAccessor {
2701
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
2702
                          TxnKv* txn_kv)
2703
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
2704
3
    ~BatchObjStoreAccessor() {
2705
3
        if (!paths_.empty()) {
2706
3
            consume();
2707
3
        }
2708
3
    }
2709
2710
    /**
2711
    * To implicitely do batch work and submit the batch delete task to s3
2712
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
2713
    *
2714
    * @param copy_job The protubuf struct consists of the copy job files.
2715
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
2716
    *            it would last until we finish the delete task, here we need pass one string value
2717
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
2718
    */
2719
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
2720
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
2721
5
        auto& file_keys = copy_file_keys_[key];
2722
5
        file_keys.log_trace =
2723
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
2724
5
                            instance_id, stage_id, table_id, copy_id, path);
2725
5
        std::string_view log_trace = file_keys.log_trace;
2726
2.03k
        for (const auto& file : copy_job.object_files()) {
2727
2.03k
            auto relative_path = file.relative_path();
2728
2.03k
            paths_.push_back(relative_path);
2729
2.03k
            file_keys.keys.push_back(copy_file_key(
2730
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
2731
2.03k
            LOG_INFO(log_trace)
2732
2.03k
                    .tag("relative_path", relative_path)
2733
2.03k
                    .tag("batch_count", batch_count_);
2734
2.03k
        }
2735
5
        LOG_INFO(log_trace)
2736
5
                .tag("objects_num", copy_job.object_files().size())
2737
5
                .tag("batch_count", batch_count_);
2738
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
2739
        // recommend using delete objects when objects num is less than 10)
2740
5
        if (paths_.size() < 1000) {
2741
3
            return;
2742
3
        }
2743
2
        consume();
2744
2
    }
2745
2746
private:
2747
5
    void consume() {
2748
5
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, [this](int*) {
2749
5
            paths_.clear();
2750
5
            copy_file_keys_.clear();
2751
5
            batch_count_++;
2752
5
        });
2753
5
        LOG_INFO("begin to delete {} internal stage objects in batch {}", paths_.size(),
2754
5
                 batch_count_);
2755
5
        StopWatch sw;
2756
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
2757
5
        if (0 != accessor_->delete_files(paths_)) {
2758
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
2759
2
                        paths_.size(), batch_count_, sw.elapsed_us());
2760
2
            return;
2761
2
        }
2762
3
        LOG_INFO("succeed to delete {} internal stage objects in batch {} and it takes {} us",
2763
3
                 paths_.size(), batch_count_, sw.elapsed_us());
2764
        // delete fdb's keys
2765
3
        for (auto& file_keys : copy_file_keys_) {
2766
3
            auto& [log_trace, keys] = file_keys.second;
2767
3
            std::unique_ptr<Transaction> txn;
2768
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
2769
0
                LOG(WARNING) << "failed to create txn";
2770
0
                continue;
2771
0
            }
2772
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
2773
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
2774
            // limited, should not cause the txn commit failed.
2775
1.02k
            for (const auto& key : keys) {
2776
1.02k
                txn->remove(key);
2777
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
2778
1.02k
            }
2779
3
            txn->remove(file_keys.first);
2780
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
2781
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
2782
0
                continue;
2783
0
            }
2784
3
        }
2785
3
    }
2786
    std::shared_ptr<StorageVaultAccessor> accessor_;
2787
    // the path of the s3 files to be deleted
2788
    std::vector<std::string> paths_;
2789
    struct CopyFiles {
2790
        std::string log_trace;
2791
        std::vector<std::string> keys;
2792
    };
2793
    // pair<std::string, std::vector<std::string>>
2794
    // first: instance_id_ stage_id table_id query_id
2795
    // second: keys to be deleted
2796
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
2797
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
2798
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
2799
    // which can together uniquely identifies different tasks for tracing log
2800
    uint64_t& batch_count_;
2801
    TxnKv* txn_kv_;
2802
};
2803
2804
13
int InstanceRecycler::recycle_copy_jobs() {
2805
13
    int64_t num_scanned = 0;
2806
13
    int64_t num_finished = 0;
2807
13
    int64_t num_expired = 0;
2808
13
    int64_t num_recycled = 0;
2809
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
2810
13
    uint64_t batch_count = 0;
2811
13
    const std::string task_name = "recycle_copy_jobs";
2812
2813
13
    LOG_INFO("begin to recycle copy jobs").tag("instance_id", instance_id_);
2814
2815
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2816
13
    register_recycle_task(task_name, start_time);
2817
2818
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2819
13
        unregister_recycle_task(task_name);
2820
13
        int64_t cost =
2821
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2822
13
        LOG_INFO("recycle copy jobs finished, cost={}s", cost)
2823
13
                .tag("instance_id", instance_id_)
2824
13
                .tag("num_scanned", num_scanned)
2825
13
                .tag("num_finished", num_finished)
2826
13
                .tag("num_expired", num_expired)
2827
13
                .tag("num_recycled", num_recycled);
2828
13
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEPi
Line
Count
Source
2818
13
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
2819
13
        unregister_recycle_task(task_name);
2820
13
        int64_t cost =
2821
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2822
13
        LOG_INFO("recycle copy jobs finished, cost={}s", cost)
2823
13
                .tag("instance_id", instance_id_)
2824
13
                .tag("num_scanned", num_scanned)
2825
13
                .tag("num_finished", num_finished)
2826
13
                .tag("num_expired", num_expired)
2827
13
                .tag("num_recycled", num_recycled);
2828
13
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEPi
2829
2830
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
2831
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
2832
13
    std::string key0;
2833
13
    std::string key1;
2834
13
    copy_job_key(key_info0, &key0);
2835
13
    copy_job_key(key_info1, &key1);
2836
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
2837
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
2838
13
                         &batch_count, &stage_accessor_map, &task_name,
2839
16
                         this](std::string_view k, std::string_view v) -> int {
2840
16
        ++num_scanned;
2841
16
        CopyJobPB copy_job;
2842
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
2843
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
2844
0
            return -1;
2845
0
        }
2846
2847
        // decode copy job key
2848
16
        auto k1 = k;
2849
16
        k1.remove_prefix(1);
2850
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2851
16
        decode_key(&k1, &out);
2852
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
2853
        // -> CopyJobPB
2854
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
2855
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
2856
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
2857
2858
16
        bool check_storage = true;
2859
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
2860
12
            ++num_finished;
2861
2862
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
2863
7
                auto it = stage_accessor_map.find(stage_id);
2864
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
2865
7
                std::string_view path;
2866
7
                if (it != stage_accessor_map.end()) {
2867
2
                    accessor = it->second;
2868
5
                } else {
2869
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
2870
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
2871
5
                                                      &inner_accessor);
2872
5
                    if (ret < 0) { // error
2873
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
2874
0
                        return -1;
2875
5
                    } else if (ret == 0) {
2876
3
                        path = inner_accessor->uri();
2877
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
2878
3
                                inner_accessor, batch_count, txn_kv_.get());
2879
3
                        stage_accessor_map.emplace(stage_id, accessor);
2880
3
                    } else { // stage not found, skip check storage
2881
2
                        check_storage = false;
2882
2
                    }
2883
5
                }
2884
7
                if (check_storage) {
2885
                    // TODO delete objects with key and etag is not supported
2886
5
                    accessor->add(std::move(copy_job), std::string(k),
2887
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
2888
5
                    return 0;
2889
5
                }
2890
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
2891
5
                int64_t current_time =
2892
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2893
5
                if (copy_job.finish_time_ms() > 0) {
2894
2
                    if (!config::force_immediate_recycle &&
2895
2
                        current_time < copy_job.finish_time_ms() +
2896
2
                                               config::copy_job_max_retention_second * 1000) {
2897
1
                        return 0;
2898
1
                    }
2899
3
                } else {
2900
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
2901
3
                    if (!config::force_immediate_recycle &&
2902
3
                        current_time < copy_job.start_time_ms() +
2903
3
                                               config::copy_job_max_retention_second * 1000) {
2904
1
                        return 0;
2905
1
                    }
2906
3
                }
2907
5
            }
2908
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
2909
4
            int64_t current_time =
2910
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2911
            // if copy job is timeout: delete all copy file kvs and copy job kv
2912
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
2913
2
                return 0;
2914
2
            }
2915
2
            ++num_expired;
2916
2
        }
2917
2918
        // delete all copy files
2919
7
        std::vector<std::string> copy_file_keys;
2920
70
        for (auto& file : copy_job.object_files()) {
2921
70
            copy_file_keys.push_back(copy_file_key(
2922
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
2923
70
        }
2924
7
        std::unique_ptr<Transaction> txn;
2925
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2926
0
            LOG(WARNING) << "failed to create txn";
2927
0
            return -1;
2928
0
        }
2929
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
2930
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
2931
        // limited, should not cause the txn commit failed.
2932
70
        for (const auto& key : copy_file_keys) {
2933
70
            txn->remove(key);
2934
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
2935
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
2936
70
                      << ", query_id=" << copy_id;
2937
70
        }
2938
7
        txn->remove(k);
2939
7
        TxnErrorCode err = txn->commit();
2940
7
        if (err != TxnErrorCode::TXN_OK) {
2941
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
2942
0
            return -1;
2943
0
        }
2944
2945
7
        ++num_recycled;
2946
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2947
7
        return 0;
2948
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2839
16
                         this](std::string_view k, std::string_view v) -> int {
2840
16
        ++num_scanned;
2841
16
        CopyJobPB copy_job;
2842
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
2843
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
2844
0
            return -1;
2845
0
        }
2846
2847
        // decode copy job key
2848
16
        auto k1 = k;
2849
16
        k1.remove_prefix(1);
2850
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2851
16
        decode_key(&k1, &out);
2852
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
2853
        // -> CopyJobPB
2854
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
2855
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
2856
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
2857
2858
16
        bool check_storage = true;
2859
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
2860
12
            ++num_finished;
2861
2862
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
2863
7
                auto it = stage_accessor_map.find(stage_id);
2864
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
2865
7
                std::string_view path;
2866
7
                if (it != stage_accessor_map.end()) {
2867
2
                    accessor = it->second;
2868
5
                } else {
2869
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
2870
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
2871
5
                                                      &inner_accessor);
2872
5
                    if (ret < 0) { // error
2873
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
2874
0
                        return -1;
2875
5
                    } else if (ret == 0) {
2876
3
                        path = inner_accessor->uri();
2877
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
2878
3
                                inner_accessor, batch_count, txn_kv_.get());
2879
3
                        stage_accessor_map.emplace(stage_id, accessor);
2880
3
                    } else { // stage not found, skip check storage
2881
2
                        check_storage = false;
2882
2
                    }
2883
5
                }
2884
7
                if (check_storage) {
2885
                    // TODO delete objects with key and etag is not supported
2886
5
                    accessor->add(std::move(copy_job), std::string(k),
2887
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
2888
5
                    return 0;
2889
5
                }
2890
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
2891
5
                int64_t current_time =
2892
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2893
5
                if (copy_job.finish_time_ms() > 0) {
2894
2
                    if (!config::force_immediate_recycle &&
2895
2
                        current_time < copy_job.finish_time_ms() +
2896
2
                                               config::copy_job_max_retention_second * 1000) {
2897
1
                        return 0;
2898
1
                    }
2899
3
                } else {
2900
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
2901
3
                    if (!config::force_immediate_recycle &&
2902
3
                        current_time < copy_job.start_time_ms() +
2903
3
                                               config::copy_job_max_retention_second * 1000) {
2904
1
                        return 0;
2905
1
                    }
2906
3
                }
2907
5
            }
2908
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
2909
4
            int64_t current_time =
2910
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
2911
            // if copy job is timeout: delete all copy file kvs and copy job kv
2912
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
2913
2
                return 0;
2914
2
            }
2915
2
            ++num_expired;
2916
2
        }
2917
2918
        // delete all copy files
2919
7
        std::vector<std::string> copy_file_keys;
2920
70
        for (auto& file : copy_job.object_files()) {
2921
70
            copy_file_keys.push_back(copy_file_key(
2922
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
2923
70
        }
2924
7
        std::unique_ptr<Transaction> txn;
2925
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2926
0
            LOG(WARNING) << "failed to create txn";
2927
0
            return -1;
2928
0
        }
2929
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
2930
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
2931
        // limited, should not cause the txn commit failed.
2932
70
        for (const auto& key : copy_file_keys) {
2933
70
            txn->remove(key);
2934
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
2935
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
2936
70
                      << ", query_id=" << copy_id;
2937
70
        }
2938
7
        txn->remove(k);
2939
7
        TxnErrorCode err = txn->commit();
2940
7
        if (err != TxnErrorCode::TXN_OK) {
2941
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
2942
0
            return -1;
2943
0
        }
2944
2945
7
        ++num_recycled;
2946
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2947
7
        return 0;
2948
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
2949
2950
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
2951
13
}
2952
2953
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
2954
                                             const StagePB::StageType& stage_type,
2955
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
2956
5
#ifdef UNIT_TEST
2957
    // In unit test, external use the same accessor as the internal stage
2958
5
    auto it = accessor_map_.find(stage_id);
2959
5
    if (it != accessor_map_.end()) {
2960
3
        *accessor = it->second;
2961
3
    } else {
2962
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
2963
2
        return 1;
2964
2
    }
2965
#else
2966
    // init s3 accessor and add to accessor map
2967
    auto stage_it =
2968
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
2969
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
2970
2971
    if (stage_it == instance_info_.stages().end()) {
2972
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
2973
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
2974
        return 1;
2975
    }
2976
2977
    const auto& object_store_info = stage_it->obj_info();
2978
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
2979
2980
    S3Conf s3_conf;
2981
    if (stage_type == StagePB::EXTERNAL) {
2982
        if (stage_access_type == StagePB::AKSK) {
2983
            auto conf = S3Conf::from_obj_store_info(object_store_info);
2984
            if (!conf) {
2985
                return -1;
2986
            }
2987
2988
            s3_conf = std::move(*conf);
2989
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
2990
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
2991
            if (!conf) {
2992
                return -1;
2993
            }
2994
2995
            s3_conf = std::move(*conf);
2996
            if (instance_info_.ram_user().has_encryption_info()) {
2997
                AkSkPair plain_ak_sk_pair;
2998
                int ret = decrypt_ak_sk_helper(
2999
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
3000
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
3001
                if (ret != 0) {
3002
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
3003
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
3004
                    return -1;
3005
                }
3006
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
3007
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
3008
            } else {
3009
                s3_conf.ak = instance_info_.ram_user().ak();
3010
                s3_conf.sk = instance_info_.ram_user().sk();
3011
            }
3012
        } else {
3013
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
3014
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
3015
            return -1;
3016
        }
3017
    } else if (stage_type == StagePB::INTERNAL) {
3018
        int idx = stoi(object_store_info.id());
3019
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3020
            LOG(WARNING) << "invalid idx: " << idx;
3021
            return -1;
3022
        }
3023
3024
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
3025
        auto conf = S3Conf::from_obj_store_info(old_obj);
3026
        if (!conf) {
3027
            return -1;
3028
        }
3029
3030
        s3_conf = std::move(*conf);
3031
        s3_conf.prefix = object_store_info.prefix();
3032
    } else {
3033
        LOG(WARNING) << "unknown stage type " << stage_type;
3034
        return -1;
3035
    }
3036
3037
    std::shared_ptr<S3Accessor> s3_accessor;
3038
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
3039
    if (ret != 0) {
3040
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
3041
        return -1;
3042
    }
3043
3044
    *accessor = std::move(s3_accessor);
3045
#endif
3046
3
    return 0;
3047
5
}
3048
3049
11
int InstanceRecycler::recycle_stage() {
3050
11
    int64_t num_scanned = 0;
3051
11
    int64_t num_recycled = 0;
3052
11
    const std::string task_name = "recycle_stage";
3053
3054
11
    LOG_INFO("begin to recycle stage").tag("instance_id", instance_id_);
3055
3056
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3057
11
    register_recycle_task(task_name, start_time);
3058
3059
11
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3060
11
        unregister_recycle_task(task_name);
3061
11
        int64_t cost =
3062
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3063
11
        LOG_INFO("recycle stage, cost={}s", cost)
3064
11
                .tag("instance_id", instance_id_)
3065
11
                .tag("num_scanned", num_scanned)
3066
11
                .tag("num_recycled", num_recycled);
3067
11
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEPi
Line
Count
Source
3059
11
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3060
11
        unregister_recycle_task(task_name);
3061
11
        int64_t cost =
3062
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3063
11
        LOG_INFO("recycle stage, cost={}s", cost)
3064
11
                .tag("instance_id", instance_id_)
3065
11
                .tag("num_scanned", num_scanned)
3066
11
                .tag("num_recycled", num_recycled);
3067
11
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEPi
3068
3069
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
3070
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
3071
11
    std::string key0 = recycle_stage_key(key_info0);
3072
11
    std::string key1 = recycle_stage_key(key_info1);
3073
3074
11
    std::vector<std::string_view> stage_keys;
3075
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, this](
3076
11
                                std::string_view k, std::string_view v) -> int {
3077
1
        ++num_scanned;
3078
1
        RecycleStagePB recycle_stage;
3079
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
3080
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
3081
0
            return -1;
3082
0
        }
3083
3084
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
3085
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3086
0
            LOG(WARNING) << "invalid idx: " << idx;
3087
0
            return -1;
3088
0
        }
3089
3090
1
        std::shared_ptr<StorageVaultAccessor> accessor;
3091
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
3092
1
                [&] {
3093
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
3094
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3095
1
                    if (!s3_conf) {
3096
1
                        return -1;
3097
1
                    }
3098
3099
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
3100
1
                    std::shared_ptr<S3Accessor> s3_accessor;
3101
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
3102
1
                    if (ret != 0) {
3103
1
                        return -1;
3104
1
                    }
3105
3106
1
                    accessor = std::move(s3_accessor);
3107
1
                    return 0;
3108
1
                }(),
3109
1
                "recycle_stage:get_accessor", &accessor);
3110
3111
1
        if (ret != 0) {
3112
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
3113
0
            return ret;
3114
0
        }
3115
3116
1
        LOG_INFO("begin to delete objects of dropped internal stage")
3117
1
                .tag("instance_id", instance_id_)
3118
1
                .tag("stage_id", recycle_stage.stage().stage_id())
3119
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
3120
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
3121
1
                .tag("obj_info_id", idx)
3122
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
3123
1
        ret = accessor->delete_all();
3124
1
        if (ret != 0) {
3125
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
3126
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
3127
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
3128
0
                         << ", ret=" << ret;
3129
0
            return -1;
3130
0
        }
3131
1
        ++num_recycled;
3132
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
3133
1
        stage_keys.push_back(k);
3134
1
        return 0;
3135
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3076
1
                                std::string_view k, std::string_view v) -> int {
3077
1
        ++num_scanned;
3078
1
        RecycleStagePB recycle_stage;
3079
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
3080
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
3081
0
            return -1;
3082
0
        }
3083
3084
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
3085
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3086
0
            LOG(WARNING) << "invalid idx: " << idx;
3087
0
            return -1;
3088
0
        }
3089
3090
1
        std::shared_ptr<StorageVaultAccessor> accessor;
3091
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
3092
1
                [&] {
3093
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
3094
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3095
1
                    if (!s3_conf) {
3096
1
                        return -1;
3097
1
                    }
3098
3099
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
3100
1
                    std::shared_ptr<S3Accessor> s3_accessor;
3101
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
3102
1
                    if (ret != 0) {
3103
1
                        return -1;
3104
1
                    }
3105
3106
1
                    accessor = std::move(s3_accessor);
3107
1
                    return 0;
3108
1
                }(),
3109
1
                "recycle_stage:get_accessor", &accessor);
3110
3111
1
        if (ret != 0) {
3112
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
3113
0
            return ret;
3114
0
        }
3115
3116
1
        LOG_INFO("begin to delete objects of dropped internal stage")
3117
1
                .tag("instance_id", instance_id_)
3118
1
                .tag("stage_id", recycle_stage.stage().stage_id())
3119
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
3120
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
3121
1
                .tag("obj_info_id", idx)
3122
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
3123
1
        ret = accessor->delete_all();
3124
1
        if (ret != 0) {
3125
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
3126
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
3127
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
3128
0
                         << ", ret=" << ret;
3129
0
            return -1;
3130
0
        }
3131
1
        ++num_recycled;
3132
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
3133
1
        stage_keys.push_back(k);
3134
1
        return 0;
3135
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3136
3137
11
    auto loop_done = [&stage_keys, this]() -> int {
3138
1
        if (stage_keys.empty()) return 0;
3139
1
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
3140
1
                                                              [&](int*) { stage_keys.clear(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEvENKUlPiE_clES3_
Line
Count
Source
3140
1
                                                              [&](int*) { stage_keys.clear(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEvENKUlPiE_clES3_
3141
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
3142
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
3143
0
            return -1;
3144
0
        }
3145
1
        return 0;
3146
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEv
Line
Count
Source
3137
1
    auto loop_done = [&stage_keys, this]() -> int {
3138
1
        if (stage_keys.empty()) return 0;
3139
1
        std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
3140
1
                                                              [&](int*) { stage_keys.clear(); });
3141
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
3142
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
3143
0
            return -1;
3144
0
        }
3145
1
        return 0;
3146
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clEv
3147
3148
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
3149
11
}
3150
3151
10
int InstanceRecycler::recycle_expired_stage_objects() {
3152
10
    LOG_INFO("begin to recycle expired stage objects").tag("instance_id", instance_id_);
3153
3154
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3155
3156
10
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3157
10
        int64_t cost =
3158
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3159
10
        LOG_INFO("recycle expired stage objects, cost={}s", cost).tag("instance_id", instance_id_);
3160
10
    });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEPi
Line
Count
Source
3156
10
    std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) {
3157
10
        int64_t cost =
3158
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3159
10
        LOG_INFO("recycle expired stage objects, cost={}s", cost).tag("instance_id", instance_id_);
3160
10
    });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEPi
3161
10
    int ret = 0;
3162
10
    for (const auto& stage : instance_info_.stages()) {
3163
0
        std::stringstream ss;
3164
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
3165
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
3166
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
3167
0
           << ", prefix=" << stage.obj_info().prefix();
3168
3169
0
        if (stopped()) break;
3170
0
        if (stage.type() == StagePB::EXTERNAL) {
3171
0
            continue;
3172
0
        }
3173
0
        int idx = stoi(stage.obj_info().id());
3174
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
3175
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
3176
0
            continue;
3177
0
        }
3178
3179
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
3180
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
3181
0
        if (!s3_conf) {
3182
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
3183
0
            continue;
3184
0
        }
3185
3186
0
        s3_conf->prefix = stage.obj_info().prefix();
3187
0
        std::shared_ptr<S3Accessor> accessor;
3188
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
3189
0
        if (ret1 != 0) {
3190
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
3191
0
            ret = -1;
3192
0
            continue;
3193
0
        }
3194
3195
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
3196
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
3197
0
            ret = -1;
3198
0
            continue;
3199
0
        }
3200
3201
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
3202
0
        int64_t expiration_time =
3203
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
3204
0
                config::internal_stage_objects_expire_time_second;
3205
0
        if (config::force_immediate_recycle) {
3206
0
            expiration_time = INT64_MAX;
3207
0
        }
3208
0
        ret1 = accessor->delete_all(expiration_time);
3209
0
        if (ret1 != 0) {
3210
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
3211
0
                         << ss.str();
3212
0
            ret = -1;
3213
0
            continue;
3214
0
        }
3215
0
    }
3216
10
    return ret;
3217
10
}
3218
3219
108
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
3220
108
    std::lock_guard lock(recycle_tasks_mutex);
3221
108
    running_recycle_tasks[task_name] = start_time;
3222
108
}
3223
3224
108
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
3225
108
    std::lock_guard lock(recycle_tasks_mutex);
3226
108
    DCHECK(running_recycle_tasks[task_name] > 0);
3227
108
    running_recycle_tasks.erase(task_name);
3228
108
}
3229
3230
21
bool InstanceRecycler::check_recycle_tasks() {
3231
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
3232
21
    {
3233
21
        std::lock_guard lock(recycle_tasks_mutex);
3234
21
        tmp_running_recycle_tasks = running_recycle_tasks;
3235
21
    }
3236
3237
21
    bool found = false;
3238
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3239
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
3240
20
        int64_t cost = now - start_time;
3241
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
3242
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
3243
20
                    .tag("instance_id", instance_id_)
3244
20
                    .tag("task", task_name);
3245
20
            found = true;
3246
20
        }
3247
20
    }
3248
3249
21
    return found;
3250
21
}
3251
3252
} // namespace doris::cloud