Coverage Report

Created: 2025-12-10 17:03

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <string>
40
#include <string_view>
41
#include <unordered_map>
42
#include <utility>
43
#include <variant>
44
45
#include "common/defer.h"
46
#include "common/stopwatch.h"
47
#include "meta-service/meta_service.h"
48
#include "meta-service/meta_service_helper.h"
49
#include "meta-service/meta_service_schema.h"
50
#include "meta-store/blob_message.h"
51
#include "meta-store/meta_reader.h"
52
#include "meta-store/txn_kv.h"
53
#include "meta-store/txn_kv_error.h"
54
#include "meta-store/versioned_value.h"
55
#include "recycler/checker.h"
56
#ifdef ENABLE_HDFS_STORAGE_VAULT
57
#include "recycler/hdfs_accessor.h"
58
#endif
59
#include "recycler/s3_accessor.h"
60
#include "recycler/storage_vault_accessor.h"
61
#ifdef UNIT_TEST
62
#include "../test/mock_accessor.h"
63
#endif
64
#include "common/bvars.h"
65
#include "common/config.h"
66
#include "common/encryption_util.h"
67
#include "common/logging.h"
68
#include "common/simple_thread_pool.h"
69
#include "common/util.h"
70
#include "cpp/sync_point.h"
71
#include "meta-store/codec.h"
72
#include "meta-store/keys.h"
73
#include "recycler/recycler_service.h"
74
#include "recycler/sync_executor.h"
75
#include "recycler/util.h"
76
77
namespace doris::cloud {
78
79
using namespace std::chrono;
80
81
// return 0 for success get a key, 1 for key not found, negative for error
82
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
83
0
    std::unique_ptr<Transaction> txn;
84
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
85
0
    if (err != TxnErrorCode::TXN_OK) {
86
0
        return -1;
87
0
    }
88
0
    switch (txn->get(key, &val, true)) {
89
0
    case TxnErrorCode::TXN_OK:
90
0
        return 0;
91
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
92
0
        return 1;
93
0
    default:
94
0
        return -1;
95
0
    };
96
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
97
98
// 0 for success, negative for error
99
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
100
272
                   std::unique_ptr<RangeGetIterator>& it) {
101
272
    std::unique_ptr<Transaction> txn;
102
272
    TxnErrorCode err = txn_kv->create_txn(&txn);
103
272
    if (err != TxnErrorCode::TXN_OK) {
104
0
        return -1;
105
0
    }
106
272
    switch (txn->get(begin, end, &it, true)) {
107
272
    case TxnErrorCode::TXN_OK:
108
272
        return 0;
109
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
110
0
        return 1;
111
0
    default:
112
0
        return -1;
113
272
    };
114
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
100
19
                   std::unique_ptr<RangeGetIterator>& it) {
101
19
    std::unique_ptr<Transaction> txn;
102
19
    TxnErrorCode err = txn_kv->create_txn(&txn);
103
19
    if (err != TxnErrorCode::TXN_OK) {
104
0
        return -1;
105
0
    }
106
19
    switch (txn->get(begin, end, &it, true)) {
107
19
    case TxnErrorCode::TXN_OK:
108
19
        return 0;
109
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
110
0
        return 1;
111
0
    default:
112
0
        return -1;
113
19
    };
114
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
100
253
                   std::unique_ptr<RangeGetIterator>& it) {
101
253
    std::unique_ptr<Transaction> txn;
102
253
    TxnErrorCode err = txn_kv->create_txn(&txn);
103
253
    if (err != TxnErrorCode::TXN_OK) {
104
0
        return -1;
105
0
    }
106
253
    switch (txn->get(begin, end, &it, true)) {
107
253
    case TxnErrorCode::TXN_OK:
108
253
        return 0;
109
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
110
0
        return 1;
111
0
    default:
112
0
        return -1;
113
253
    };
114
0
}
115
116
// return 0 for success otherwise error
117
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
118
6
    std::unique_ptr<Transaction> txn;
119
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
120
6
    if (err != TxnErrorCode::TXN_OK) {
121
0
        return -1;
122
0
    }
123
10
    for (auto k : keys) {
124
10
        txn->remove(k);
125
10
    }
126
6
    switch (txn->commit()) {
127
6
    case TxnErrorCode::TXN_OK:
128
6
        return 0;
129
0
    case TxnErrorCode::TXN_CONFLICT:
130
0
        return -1;
131
0
    default:
132
0
        return -1;
133
6
    }
134
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
117
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
118
1
    std::unique_ptr<Transaction> txn;
119
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
120
1
    if (err != TxnErrorCode::TXN_OK) {
121
0
        return -1;
122
0
    }
123
1
    for (auto k : keys) {
124
1
        txn->remove(k);
125
1
    }
126
1
    switch (txn->commit()) {
127
1
    case TxnErrorCode::TXN_OK:
128
1
        return 0;
129
0
    case TxnErrorCode::TXN_CONFLICT:
130
0
        return -1;
131
0
    default:
132
0
        return -1;
133
1
    }
134
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
117
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
118
5
    std::unique_ptr<Transaction> txn;
119
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
120
5
    if (err != TxnErrorCode::TXN_OK) {
121
0
        return -1;
122
0
    }
123
9
    for (auto k : keys) {
124
9
        txn->remove(k);
125
9
    }
126
5
    switch (txn->commit()) {
127
5
    case TxnErrorCode::TXN_OK:
128
5
        return 0;
129
0
    case TxnErrorCode::TXN_CONFLICT:
130
0
        return -1;
131
0
    default:
132
0
        return -1;
133
5
    }
134
5
}
135
136
// return 0 for success otherwise error
137
54
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
138
54
    std::unique_ptr<Transaction> txn;
139
54
    TxnErrorCode err = txn_kv->create_txn(&txn);
140
54
    if (err != TxnErrorCode::TXN_OK) {
141
0
        return -1;
142
0
    }
143
110k
    for (auto& k : keys) {
144
110k
        txn->remove(k);
145
110k
    }
146
54
    switch (txn->commit()) {
147
54
    case TxnErrorCode::TXN_OK:
148
54
        return 0;
149
0
    case TxnErrorCode::TXN_CONFLICT:
150
0
        return -1;
151
0
    default:
152
0
        return -1;
153
54
    }
154
54
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
137
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
138
6
    std::unique_ptr<Transaction> txn;
139
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
140
6
    if (err != TxnErrorCode::TXN_OK) {
141
0
        return -1;
142
0
    }
143
4.00k
    for (auto& k : keys) {
144
4.00k
        txn->remove(k);
145
4.00k
    }
146
6
    switch (txn->commit()) {
147
6
    case TxnErrorCode::TXN_OK:
148
6
        return 0;
149
0
    case TxnErrorCode::TXN_CONFLICT:
150
0
        return -1;
151
0
    default:
152
0
        return -1;
153
6
    }
154
6
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
137
48
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
138
48
    std::unique_ptr<Transaction> txn;
139
48
    TxnErrorCode err = txn_kv->create_txn(&txn);
140
48
    if (err != TxnErrorCode::TXN_OK) {
141
0
        return -1;
142
0
    }
143
106k
    for (auto& k : keys) {
144
106k
        txn->remove(k);
145
106k
    }
146
48
    switch (txn->commit()) {
147
48
    case TxnErrorCode::TXN_OK:
148
48
        return 0;
149
0
    case TxnErrorCode::TXN_CONFLICT:
150
0
        return -1;
151
0
    default:
152
0
        return -1;
153
48
    }
154
48
}
155
156
// return 0 for success otherwise error
157
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
158
106k
                                       std::string_view end) {
159
106k
    std::unique_ptr<Transaction> txn;
160
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
161
106k
    if (err != TxnErrorCode::TXN_OK) {
162
0
        return -1;
163
0
    }
164
106k
    txn->remove(begin, end);
165
106k
    switch (txn->commit()) {
166
106k
    case TxnErrorCode::TXN_OK:
167
106k
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
106k
    }
173
106k
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
158
106k
                                       std::string_view end) {
159
106k
    std::unique_ptr<Transaction> txn;
160
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
161
106k
    if (err != TxnErrorCode::TXN_OK) {
162
0
        return -1;
163
0
    }
164
106k
    txn->remove(begin, end);
165
106k
    switch (txn->commit()) {
166
106k
    case TxnErrorCode::TXN_OK:
167
106k
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
106k
    }
173
106k
}
174
175
void scan_restore_job_rowset(
176
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
177
        std::string& msg,
178
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
179
180
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
181
                                      int64_t num_scanned, int64_t num_recycled,
182
52
                                      int64_t start_time) {
183
52
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
184
0
        int64_t cost =
185
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
186
0
        if (cost > config::recycle_task_threshold_seconds) {
187
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
188
0
                    .tag("instance_id", instance_id)
189
0
                    .tag("task", task_name)
190
0
                    .tag("num_scanned", num_scanned)
191
0
                    .tag("num_recycled", num_recycled);
192
0
        }
193
0
    }
194
52
    return;
195
52
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
182
2
                                      int64_t start_time) {
183
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
184
0
        int64_t cost =
185
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
186
0
        if (cost > config::recycle_task_threshold_seconds) {
187
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
188
0
                    .tag("instance_id", instance_id)
189
0
                    .tag("task", task_name)
190
0
                    .tag("num_scanned", num_scanned)
191
0
                    .tag("num_recycled", num_recycled);
192
0
        }
193
0
    }
194
2
    return;
195
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
182
50
                                      int64_t start_time) {
183
50
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
184
0
        int64_t cost =
185
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
186
0
        if (cost > config::recycle_task_threshold_seconds) {
187
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
188
0
                    .tag("instance_id", instance_id)
189
0
                    .tag("task", task_name)
190
0
                    .tag("num_scanned", num_scanned)
191
0
                    .tag("num_recycled", num_recycled);
192
0
        }
193
0
    }
194
50
    return;
195
50
}
196
197
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
198
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
199
200
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
201
4
                                                               "s3_producer_pool");
202
4
    s3_producer_pool->start();
203
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
204
4
                                                                  "recycle_tablet_pool");
205
4
    recycle_tablet_pool->start();
206
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
207
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
208
4
    group_recycle_function_pool->start();
209
4
    _thread_pool_group =
210
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
211
4
                                    std::move(group_recycle_function_pool));
212
213
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
214
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
215
4
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
216
4
}
217
218
4
Recycler::~Recycler() {
219
4
    if (!stopped()) {
220
0
        stop();
221
0
    }
222
4
}
223
224
4
void Recycler::instance_scanner_callback() {
225
    // sleep 60 seconds before scheduling for the launch procedure to complete:
226
    // some bad hdfs connection may cause some log to stdout stderr
227
    // which may pollute .out file and affect the script to check success
228
4
    std::this_thread::sleep_for(
229
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
230
8
    while (!stopped()) {
231
4
        std::vector<InstanceInfoPB> instances;
232
4
        get_all_instances(txn_kv_.get(), instances);
233
        // TODO(plat1ko): delete job recycle kv of non-existent instances
234
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
235
4
            std::stringstream ss;
236
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
237
4
            return ss.str();
238
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
234
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
235
4
            std::stringstream ss;
236
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
237
4
            return ss.str();
238
4
        }();
239
4
        if (!instances.empty()) {
240
            // enqueue instances
241
3
            std::lock_guard lock(mtx_);
242
30
            for (auto& instance : instances) {
243
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
244
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
245
                // skip instance already in pending queue
246
30
                if (success) {
247
30
                    pending_instance_queue_.push_back(std::move(instance));
248
30
                }
249
30
            }
250
3
            pending_instance_cond_.notify_all();
251
3
        }
252
4
        {
253
4
            std::unique_lock lock(mtx_);
254
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
255
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
255
7
                               [&]() { return stopped(); });
256
4
        }
257
4
    }
258
4
}
259
260
8
void Recycler::recycle_callback() {
261
37
    while (!stopped()) {
262
35
        InstanceInfoPB instance;
263
35
        {
264
35
            std::unique_lock lock(mtx_);
265
35
            pending_instance_cond_.wait(
266
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
266
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
267
35
            if (stopped()) {
268
6
                return;
269
6
            }
270
29
            instance = std::move(pending_instance_queue_.front());
271
29
            pending_instance_queue_.pop_front();
272
29
            pending_instance_set_.erase(instance.instance_id());
273
29
        }
274
0
        auto& instance_id = instance.instance_id();
275
29
        {
276
29
            std::lock_guard lock(mtx_);
277
            // skip instance in recycling
278
29
            if (recycling_instance_map_.count(instance_id)) continue;
279
29
        }
280
29
        auto instance_recycler = std::make_shared<InstanceRecycler>(
281
29
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
282
283
29
        if (int r = instance_recycler->init(); r != 0) {
284
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
285
0
                         << " ret=" << r;
286
0
            continue;
287
0
        }
288
29
        std::string recycle_job_key;
289
29
        job_recycle_key({instance_id}, &recycle_job_key);
290
29
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
291
29
                                               ip_port_, config::recycle_interval_seconds * 1000);
292
29
        if (ret != 0) { // Prepare failed
293
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
294
20
                         << " ret=" << ret;
295
20
            continue;
296
20
        } else {
297
9
            std::lock_guard lock(mtx_);
298
9
            recycling_instance_map_.emplace(instance_id, instance_recycler);
299
9
        }
300
9
        if (stopped()) return;
301
9
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
302
9
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
303
9
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
304
9
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
305
9
        ret = instance_recycler->do_recycle();
306
        // If instance recycler has been aborted, don't finish this job
307
308
10
        if (!instance_recycler->stopped()) {
309
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
310
10
                                        ret == 0, ctime_ms);
311
10
        }
312
10
        if (instance_recycler->stopped() || ret != 0) {
313
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
314
0
        }
315
9
        {
316
9
            std::lock_guard lock(mtx_);
317
9
            recycling_instance_map_.erase(instance_id);
318
9
        }
319
320
9
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
321
9
        auto elpased_ms = now - ctime_ms;
322
9
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
323
9
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
324
9
        g_bvar_recycler_instance_next_ts.put({instance_id},
325
9
                                             now + config::recycle_interval_seconds * 1000);
326
9
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
327
9
        LOG(INFO) << "recycle instance done, "
328
9
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
329
9
                  << " now: " << now;
330
331
9
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
332
333
9
        LOG_WARNING("finish recycle instance")
334
9
                .tag("instance_id", instance_id)
335
9
                .tag("cost_ms", elpased_ms);
336
9
    }
337
8
}
338
339
4
void Recycler::lease_recycle_jobs() {
340
54
    while (!stopped()) {
341
50
        std::vector<std::string> instances;
342
50
        instances.reserve(recycling_instance_map_.size());
343
50
        {
344
50
            std::lock_guard lock(mtx_);
345
50
            for (auto& [id, _] : recycling_instance_map_) {
346
30
                instances.push_back(id);
347
30
            }
348
50
        }
349
50
        for (auto& i : instances) {
350
30
            std::string recycle_job_key;
351
30
            job_recycle_key({i}, &recycle_job_key);
352
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
353
30
            if (ret == 1) {
354
0
                std::lock_guard lock(mtx_);
355
0
                if (auto it = recycling_instance_map_.find(i);
356
0
                    it != recycling_instance_map_.end()) {
357
0
                    it->second->stop();
358
0
                }
359
0
            }
360
30
        }
361
50
        {
362
50
            std::unique_lock lock(mtx_);
363
50
            notifier_.wait_for(lock,
364
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
365
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
365
100
                               [&]() { return stopped(); });
366
50
        }
367
50
    }
368
4
}
369
370
4
void Recycler::check_recycle_tasks() {
371
7
    while (!stopped()) {
372
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
373
3
        {
374
3
            std::lock_guard lock(mtx_);
375
3
            recycling_instance_map = recycling_instance_map_;
376
3
        }
377
3
        for (auto& entry : recycling_instance_map) {
378
0
            entry.second->check_recycle_tasks();
379
0
        }
380
381
3
        std::unique_lock lock(mtx_);
382
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
383
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
383
6
                           [&]() { return stopped(); });
384
3
    }
385
4
}
386
387
4
int Recycler::start(brpc::Server* server) {
388
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
389
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
390
4
    S3Environment::getInstance();
391
392
4
    if (config::enable_checker) {
393
0
        checker_ = std::make_unique<Checker>(txn_kv_);
394
0
        int ret = checker_->start();
395
0
        std::string msg;
396
0
        if (ret != 0) {
397
0
            msg = "failed to start checker";
398
0
            LOG(ERROR) << msg;
399
0
            std::cerr << msg << std::endl;
400
0
            return ret;
401
0
        }
402
0
        msg = "checker started";
403
0
        LOG(INFO) << msg;
404
0
        std::cout << msg << std::endl;
405
0
    }
406
407
4
    if (server) {
408
        // Add service
409
1
        auto recycler_service =
410
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
411
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
412
1
    }
413
414
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
414
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
415
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
416
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
416
8
        workers_.emplace_back([this] { recycle_callback(); });
417
8
    }
418
419
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
420
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
421
422
4
    if (config::enable_snapshot_data_migrator) {
423
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
424
0
        int ret = snapshot_data_migrator_->start();
425
0
        if (ret != 0) {
426
0
            LOG(ERROR) << "failed to start snapshot data migrator";
427
0
            return ret;
428
0
        }
429
0
        LOG(INFO) << "snapshot data migrator started";
430
0
    }
431
432
4
    if (config::enable_snapshot_chain_compactor) {
433
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
434
0
        int ret = snapshot_chain_compactor_->start();
435
0
        if (ret != 0) {
436
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
437
0
            return ret;
438
0
        }
439
0
        LOG(INFO) << "snapshot chain compactor started";
440
0
    }
441
442
4
    return 0;
443
4
}
444
445
4
void Recycler::stop() {
446
4
    stopped_ = true;
447
4
    notifier_.notify_all();
448
4
    pending_instance_cond_.notify_all();
449
4
    {
450
4
        std::lock_guard lock(mtx_);
451
4
        for (auto& [_, recycler] : recycling_instance_map_) {
452
0
            recycler->stop();
453
0
        }
454
4
    }
455
20
    for (auto& w : workers_) {
456
20
        if (w.joinable()) w.join();
457
20
    }
458
4
    if (checker_) {
459
0
        checker_->stop();
460
0
    }
461
4
    if (snapshot_data_migrator_) {
462
0
        snapshot_data_migrator_->stop();
463
0
    }
464
4
    if (snapshot_chain_compactor_) {
465
0
        snapshot_chain_compactor_->stop();
466
0
    }
467
4
}
468
469
class InstanceRecycler::InvertedIndexIdCache {
470
public:
471
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
472
109
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
473
474
    // Return 0 if success, 1 if schema kv not found, negative for error
475
    // For the same index_id, schema_version, res, since `get` is not completely atomic
476
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
477
    // resulting in repeated addition and inaccuracy.
478
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
479
    // repeated addition does not affect correctness.
480
27.5k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
481
27.5k
        {
482
27.5k
            std::lock_guard lock(mtx_);
483
27.5k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
484
4.45k
                return 0;
485
4.45k
            }
486
23.1k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
487
23.1k
                it != inverted_index_id_map_.end()) {
488
17.5k
                res = it->second;
489
17.5k
                return 0;
490
17.5k
            }
491
23.1k
        }
492
        // Get schema from kv
493
        // TODO(plat1ko): Single flight
494
5.53k
        std::unique_ptr<Transaction> txn;
495
5.53k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
496
5.53k
        if (err != TxnErrorCode::TXN_OK) {
497
0
            LOG(WARNING) << "failed to create txn, err=" << err;
498
0
            return -1;
499
0
        }
500
5.53k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
501
5.53k
        ValueBuf val_buf;
502
5.53k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
503
5.53k
        if (err != TxnErrorCode::TXN_OK) {
504
501
            LOG(WARNING) << "failed to get schema, err=" << err;
505
501
            return static_cast<int>(err);
506
501
        }
507
5.03k
        doris::TabletSchemaCloudPB schema;
508
5.03k
        if (!parse_schema_value(val_buf, &schema)) {
509
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
510
0
            return -1;
511
0
        }
512
5.03k
        if (schema.index_size() > 0) {
513
4.01k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
514
4.01k
            if (schema.has_inverted_index_storage_format()) {
515
4.00k
                index_format = schema.inverted_index_storage_format();
516
4.00k
            }
517
4.01k
            res.first = index_format;
518
4.01k
            res.second.reserve(schema.index_size());
519
10.0k
            for (auto& i : schema.index()) {
520
10.0k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
521
10.0k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
522
10.0k
                }
523
10.0k
            }
524
4.01k
        }
525
5.03k
        insert(index_id, schema_version, res);
526
5.03k
        return 0;
527
5.03k
    }
528
529
    // Empty `ids` means this schema has no inverted index
530
5.03k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
531
5.03k
        if (index_info.second.empty()) {
532
1.02k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
533
1.02k
            std::lock_guard lock(mtx_);
534
1.02k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
535
4.01k
        } else {
536
4.01k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
537
4.01k
            std::lock_guard lock(mtx_);
538
4.01k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
539
4.01k
        }
540
5.03k
    }
541
542
private:
543
    std::string instance_id_;
544
    std::shared_ptr<TxnKv> txn_kv_;
545
546
    std::mutex mtx_;
547
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
548
    struct HashOfKey {
549
55.7k
        size_t operator()(const Key& key) const {
550
55.7k
            size_t seed = 0;
551
55.7k
            seed = std::hash<int64_t> {}(key.first);
552
55.7k
            seed = std::hash<int32_t> {}(key.second);
553
55.7k
            return seed;
554
55.7k
        }
555
    };
556
    // <index_id, schema_version> -> inverted_index_ids
557
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
558
    // Store <index_id, schema_version> of schema which doesn't have inverted index
559
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
560
};
561
562
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
563
                                   RecyclerThreadPoolGroup thread_pool_group,
564
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
565
        : txn_kv_(std::move(txn_kv)),
566
          instance_id_(instance.instance_id()),
567
          instance_info_(instance),
568
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
569
          _thread_pool_group(std::move(thread_pool_group)),
570
109
          txn_lazy_committer_(std::move(txn_lazy_committer)) {
571
109
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
572
573
    // Since the recycler's resource manager could not be notified when instance info changes,
574
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
575
109
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
576
109
};
577
578
109
InstanceRecycler::~InstanceRecycler() = default;
579
580
97
int InstanceRecycler::init_obj_store_accessors() {
581
97
    for (const auto& obj_info : instance_info_.obj_info()) {
582
66
#ifdef UNIT_TEST
583
66
        auto accessor = std::make_shared<MockAccessor>();
584
#else
585
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
586
        if (!s3_conf) {
587
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
588
            return -1;
589
        }
590
591
        std::shared_ptr<S3Accessor> accessor;
592
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
593
        if (ret != 0) {
594
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
595
                         << " resource_id=" << obj_info.id();
596
            return ret;
597
        }
598
#endif
599
66
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
600
66
    }
601
602
97
    return 0;
603
97
}
604
605
97
int InstanceRecycler::init_storage_vault_accessors() {
606
97
    if (instance_info_.resource_ids().empty()) {
607
90
        return 0;
608
90
    }
609
610
7
    FullRangeGetOptions opts(txn_kv_);
611
7
    opts.prefetch = true;
612
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
613
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
614
615
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
616
18
        auto [k, v] = *kv;
617
18
        StorageVaultPB vault;
618
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
619
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
620
0
            return -1;
621
0
        }
622
18
        std::string recycler_storage_vault_white_list = accumulate(
623
18
                config::recycler_storage_vault_white_list.begin(),
624
18
                config::recycler_storage_vault_white_list.end(), std::string(),
625
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
625
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
626
18
        LOG_INFO("config::recycler_storage_vault_white_list")
627
18
                .tag("", recycler_storage_vault_white_list);
628
18
        if (!config::recycler_storage_vault_white_list.empty()) {
629
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
630
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
631
8
                it == config::recycler_storage_vault_white_list.end()) {
632
2
                LOG_WARNING(
633
2
                        "failed to init accessor for vault because this vault is not in "
634
2
                        "config::recycler_storage_vault_white_list. ")
635
2
                        .tag(" vault name:", vault.name())
636
2
                        .tag(" config::recycler_storage_vault_white_list:",
637
2
                             recycler_storage_vault_white_list);
638
2
                continue;
639
2
            }
640
8
        }
641
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
642
16
                                 &accessor_map_, &vault);
643
16
        if (vault.has_hdfs_info()) {
644
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
645
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
646
9
            int ret = accessor->init();
647
9
            if (ret != 0) {
648
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
649
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
650
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
651
4
                continue;
652
4
            }
653
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
654
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
655
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
656
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
657
#else
658
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
659
                       << "but HDFS storage vaults were detected";
660
#endif
661
7
        } else if (vault.has_obj_info()) {
662
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
663
7
            if (!s3_conf) {
664
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
665
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
666
1
                continue;
667
1
            }
668
669
6
            std::shared_ptr<S3Accessor> accessor;
670
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
671
6
            if (ret != 0) {
672
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
673
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
674
0
                             << " ret=" << ret
675
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
676
0
                continue;
677
0
            }
678
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
679
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
680
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
681
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
682
6
        }
683
16
    }
684
685
7
    if (!it->is_valid()) {
686
0
        LOG_WARNING("failed to get storage vault kv");
687
0
        return -1;
688
0
    }
689
690
7
    if (accessor_map_.empty()) {
691
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
692
1
        return -2;
693
1
    }
694
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
695
6
             instance_id_);
696
697
6
    return 0;
698
7
}
699
700
97
int InstanceRecycler::init() {
701
97
    int ret = init_obj_store_accessors();
702
97
    if (ret != 0) {
703
0
        return ret;
704
0
    }
705
706
97
    return init_storage_vault_accessors();
707
97
}
708
709
template <typename... Func>
710
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
120
    return [funcs...]() {
712
120
        return [](std::initializer_list<int> ret_vals) {
713
120
            int i = 0;
714
140
            for (int ret : ret_vals) {
715
140
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
140
            }
719
120
            return i;
720
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
20
            for (int ret : ret_vals) {
715
20
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
20
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
20
            for (int ret : ret_vals) {
715
20
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
20
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
0
                    i = ret;
717
0
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
710
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
711
10
    return [funcs...]() {
712
10
        return [](std::initializer_list<int> ret_vals) {
713
10
            int i = 0;
714
10
            for (int ret : ret_vals) {
715
10
                if (ret != 0) {
716
10
                    i = ret;
717
10
                }
718
10
            }
719
10
            return i;
720
10
        }({funcs()...});
721
10
    };
722
10
}
723
724
10
int InstanceRecycler::do_recycle() {
725
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
726
10
    tablet_metrics_context_.reset();
727
10
    segment_metrics_context_.reset();
728
10
    DORIS_CLOUD_DEFER {
729
10
        tablet_metrics_context_.finish_report();
730
10
        segment_metrics_context_.finish_report();
731
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
728
10
    DORIS_CLOUD_DEFER {
729
10
        tablet_metrics_context_.finish_report();
730
10
        segment_metrics_context_.finish_report();
731
10
    };
732
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
733
0
        int res = recycle_cluster_snapshots();
734
0
        if (res != 0) {
735
0
            return -1;
736
0
        }
737
0
        return recycle_deleted_instance();
738
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
739
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
740
10
                                        fmt::format("instance id {}", instance_id_),
741
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
741
120
                                        [](int r) { return r != 0; });
742
10
        sync_executor
743
10
                .add(task_wrapper(
744
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
744
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
745
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
745
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
746
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
747
                                   // becase they may both recycle the same set of tablets
748
                        // recycle dropped table or idexes(mv, rollup)
749
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
749
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
750
                        // recycle dropped partitions
751
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
751
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
752
10
                .add(task_wrapper(
753
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
753
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
754
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
754
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
755
10
                .add(task_wrapper(
756
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
756
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
757
10
                .add(task_wrapper(
758
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
758
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
759
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
759
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
760
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
760
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
761
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
761
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
762
10
                .add(task_wrapper(
763
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
763
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
764
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
764
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
765
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
765
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
766
10
        bool finished = true;
767
10
        std::vector<int> rets = sync_executor.when_all(&finished);
768
120
        for (int ret : rets) {
769
120
            if (ret != 0) {
770
0
                return ret;
771
0
            }
772
120
        }
773
10
        return finished ? 0 : -1;
774
10
    } else {
775
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
776
0
                     << " instance_id=" << instance_id_;
777
0
        return -1;
778
0
    }
779
10
}
780
781
/**
782
* 1. delete all remote data
783
* 2. delete all kv
784
* 3. remove instance kv
785
*/
786
4
int InstanceRecycler::recycle_deleted_instance() {
787
4
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
788
789
4
    int ret = 0;
790
4
    auto start_time = steady_clock::now();
791
792
4
    DORIS_CLOUD_DEFER {
793
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
794
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
795
4
                     << " recycle deleted instance, cost=" << cost
796
4
                     << "s, instance_id=" << instance_id_;
797
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
792
4
    DORIS_CLOUD_DEFER {
793
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
794
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
795
4
                     << " recycle deleted instance, cost=" << cost
796
4
                     << "s, instance_id=" << instance_id_;
797
4
    };
798
799
4
    bool has_snapshots = false;
800
4
    if (has_cluster_snapshots(&has_snapshots) != 0) {
801
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
802
0
        return -1;
803
4
    } else if (has_snapshots) {
804
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
805
1
        return 0;
806
1
    }
807
808
    // delete all remote data
809
3
    for (auto& [_, accessor] : accessor_map_) {
810
3
        if (stopped()) {
811
0
            return ret;
812
0
        }
813
814
3
        LOG(INFO) << "begin to delete all objects in " << accessor->uri();
815
3
        int del_ret = accessor->delete_all();
816
3
        if (del_ret == 0) {
817
3
            LOG(INFO) << "successfully delete all objects in " << accessor->uri();
818
3
        } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
819
            // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
820
            // so the recycling has been successful.
821
0
            ret = -1;
822
0
        }
823
3
    }
824
825
3
    if (ret != 0) {
826
0
        LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
827
0
        return ret;
828
0
    }
829
830
    // delete all kv
831
3
    std::unique_ptr<Transaction> txn;
832
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
833
3
    if (err != TxnErrorCode::TXN_OK) {
834
0
        LOG(WARNING) << "failed to create txn";
835
0
        ret = -1;
836
0
        return -1;
837
0
    }
838
3
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
839
    // delete kv before deleting objects to prevent the checker from misjudging data loss
840
3
    std::string start_txn_key = txn_key_prefix(instance_id_);
841
3
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
842
3
    txn->remove(start_txn_key, end_txn_key);
843
3
    std::string start_version_key = version_key_prefix(instance_id_);
844
3
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
845
3
    txn->remove(start_version_key, end_version_key);
846
3
    std::string start_meta_key = meta_key_prefix(instance_id_);
847
3
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
848
3
    txn->remove(start_meta_key, end_meta_key);
849
3
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
850
3
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
851
3
    txn->remove(start_recycle_key, end_recycle_key);
852
3
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
853
3
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
854
3
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
855
3
    std::string start_copy_key = copy_key_prefix(instance_id_);
856
3
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
857
3
    txn->remove(start_copy_key, end_copy_key);
858
    // should not remove job key range, because we need to reserve job recycle kv
859
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
860
3
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
861
3
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
862
3
    txn->remove(start_job_tablet_key, end_job_tablet_key);
863
3
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
864
3
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
865
3
    std::string start_vault_key = storage_vault_key(key_info0);
866
3
    std::string end_vault_key = storage_vault_key(key_info1);
867
3
    txn->remove(start_vault_key, end_vault_key);
868
3
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, 0, ""});
869
3
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, INT64_MAX, ""});
870
3
    txn->remove(dbm_start_key, dbm_end_key);
871
3
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
872
3
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
873
3
    txn->remove(versioned_version_key_start, versioned_version_key_end);
874
3
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
875
3
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
876
3
    txn->remove(versioned_index_key_start, versioned_index_key_end);
877
3
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
878
3
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
879
3
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
880
3
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
881
3
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
882
3
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
883
3
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
884
3
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
885
3
    txn->remove(versioned_data_key_start, versioned_data_key_end);
886
3
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
887
3
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
888
3
    txn->remove(versioned_log_key_start, versioned_log_key_end);
889
3
    err = txn->commit();
890
3
    if (err != TxnErrorCode::TXN_OK) {
891
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
892
0
        ret = -1;
893
0
    }
894
895
3
    if (ret == 0) {
896
        // remove instance kv
897
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
898
3
        err = txn_kv_->create_txn(&txn);
899
3
        if (err != TxnErrorCode::TXN_OK) {
900
0
            LOG(WARNING) << "failed to create txn";
901
0
            ret = -1;
902
0
            return ret;
903
0
        }
904
3
        std::string key;
905
3
        instance_key({instance_id_}, &key);
906
3
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
907
3
        txn->remove(key);
908
3
        err = txn->commit();
909
3
        if (err != TxnErrorCode::TXN_OK) {
910
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
911
0
                         << " err=" << err;
912
0
            ret = -1;
913
0
        }
914
3
    }
915
3
    return ret;
916
3
}
917
918
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
919
9
                                          bool* exists, PackedFileRecycleStats* stats) {
920
9
    if (exists == nullptr) {
921
0
        return -1;
922
0
    }
923
9
    *exists = false;
924
925
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
926
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
927
9
    std::string scan_begin = begin;
928
929
9
    while (true) {
930
9
        std::unique_ptr<RangeGetIterator> it_range;
931
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
932
9
        if (get_ret < 0) {
933
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
934
0
                    .tag("instance_id", instance_id_)
935
0
                    .tag("tablet_id", tablet_id)
936
0
                    .tag("ret", get_ret);
937
0
            return -1;
938
0
        }
939
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
940
6
            return 0;
941
6
        }
942
943
3
        std::string last_key;
944
3
        while (it_range->has_next()) {
945
3
            auto [k, v] = it_range->next();
946
3
            last_key.assign(k.data(), k.size());
947
3
            doris::RowsetMetaCloudPB rowset_meta;
948
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
949
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
950
0
                        .tag("instance_id", instance_id_)
951
0
                        .tag("tablet_id", tablet_id)
952
0
                        .tag("key", hex(k));
953
0
                continue;
954
0
            }
955
3
            if (stats) {
956
3
                ++stats->rowset_scan_count;
957
3
            }
958
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
959
3
                *exists = true;
960
3
                return 0;
961
3
            }
962
3
        }
963
964
0
        if (!it_range->more()) {
965
0
            return 0;
966
0
        }
967
968
        // Continue scanning from the next key to keep each transaction short.
969
0
        scan_begin = std::move(last_key);
970
0
        scan_begin.push_back('\x00');
971
0
    }
972
9
}
973
974
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
975
                                                          const std::string& rowset_id,
976
                                                          int64_t txn_id, bool* recycle_exists,
977
11
                                                          bool* tmp_exists) {
978
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
979
0
        return -1;
980
0
    }
981
11
    *recycle_exists = false;
982
11
    *tmp_exists = false;
983
984
11
    if (txn_id <= 0) {
985
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
986
0
                .tag("instance_id", instance_id_)
987
0
                .tag("tablet_id", tablet_id)
988
0
                .tag("rowset_id", rowset_id)
989
0
                .tag("txn_id", txn_id);
990
0
        return -1;
991
0
    }
992
993
11
    std::unique_ptr<Transaction> txn;
994
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
995
11
    if (err != TxnErrorCode::TXN_OK) {
996
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
997
0
                .tag("instance_id", instance_id_)
998
0
                .tag("tablet_id", tablet_id)
999
0
                .tag("rowset_id", rowset_id)
1000
0
                .tag("txn_id", txn_id)
1001
0
                .tag("err", err);
1002
0
        return -1;
1003
0
    }
1004
1005
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1006
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1007
11
    if (ret == TxnErrorCode::TXN_OK) {
1008
1
        *recycle_exists = true;
1009
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1010
0
        LOG_WARNING("failed to check recycle rowset existence")
1011
0
                .tag("instance_id", instance_id_)
1012
0
                .tag("tablet_id", tablet_id)
1013
0
                .tag("rowset_id", rowset_id)
1014
0
                .tag("key", hex(recycle_key))
1015
0
                .tag("err", ret);
1016
0
        return -1;
1017
0
    }
1018
1019
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1020
11
    ret = key_exists(txn.get(), tmp_key, true);
1021
11
    if (ret == TxnErrorCode::TXN_OK) {
1022
1
        *tmp_exists = true;
1023
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1024
0
        LOG_WARNING("failed to check tmp rowset existence")
1025
0
                .tag("instance_id", instance_id_)
1026
0
                .tag("tablet_id", tablet_id)
1027
0
                .tag("txn_id", txn_id)
1028
0
                .tag("key", hex(tmp_key))
1029
0
                .tag("err", ret);
1030
0
        return -1;
1031
0
    }
1032
1033
11
    return 0;
1034
11
}
1035
1036
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1037
7
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1038
7
    if (!hint.empty()) {
1039
7
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1040
7
            return {hint, it->second};
1041
7
        }
1042
7
    }
1043
1044
0
    return {"", nullptr};
1045
7
}
1046
1047
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1048
                                               const std::string& packed_file_path,
1049
3
                                               PackedFileRecycleStats* stats) {
1050
3
    bool local_changed = false;
1051
3
    int64_t left_num = 0;
1052
3
    int64_t left_bytes = 0;
1053
3
    bool all_small_files_confirmed = true;
1054
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1055
1056
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1057
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1058
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1059
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1060
14
        LOG_INFO("packed slice correction status")
1061
14
                .tag("instance_id", instance_id_)
1062
14
                .tag("packed_file_path", packed_file_path)
1063
14
                .tag("small_file_path", file.path())
1064
14
                .tag("tablet_id", tablet_id)
1065
14
                .tag("rowset_id", rowset_id)
1066
14
                .tag("txn_id", txn_id)
1067
14
                .tag("size", file.size())
1068
14
                .tag("deleted", file.deleted())
1069
14
                .tag("corrected", file.corrected())
1070
14
                .tag("confirmed_this_round", confirmed_this_round);
1071
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1056
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1057
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1058
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1059
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1060
14
        LOG_INFO("packed slice correction status")
1061
14
                .tag("instance_id", instance_id_)
1062
14
                .tag("packed_file_path", packed_file_path)
1063
14
                .tag("small_file_path", file.path())
1064
14
                .tag("tablet_id", tablet_id)
1065
14
                .tag("rowset_id", rowset_id)
1066
14
                .tag("txn_id", txn_id)
1067
14
                .tag("size", file.size())
1068
14
                .tag("deleted", file.deleted())
1069
14
                .tag("corrected", file.corrected())
1070
14
                .tag("confirmed_this_round", confirmed_this_round);
1071
14
    };
1072
1073
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1074
14
        auto* small_file = packed_info->mutable_slices(i);
1075
14
        if (small_file->deleted()) {
1076
3
            log_small_file_status(*small_file, small_file->corrected());
1077
3
            continue;
1078
3
        }
1079
1080
11
        if (small_file->corrected()) {
1081
0
            left_num++;
1082
0
            left_bytes += small_file->size();
1083
0
            log_small_file_status(*small_file, true);
1084
0
            continue;
1085
0
        }
1086
1087
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1088
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1089
0
                    .tag("instance_id", instance_id_)
1090
0
                    .tag("small_file_path", small_file->path())
1091
0
                    .tag("index", i);
1092
0
            return -1;
1093
0
        }
1094
1095
11
        int64_t tablet_id = small_file->tablet_id();
1096
11
        const std::string& rowset_id = small_file->rowset_id();
1097
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1098
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1099
0
                    .tag("instance_id", instance_id_)
1100
0
                    .tag("small_file_path", small_file->path())
1101
0
                    .tag("index", i)
1102
0
                    .tag("tablet_id", tablet_id)
1103
0
                    .tag("rowset_id", rowset_id)
1104
0
                    .tag("has_txn_id", small_file->has_txn_id())
1105
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1106
0
            return -1;
1107
0
        }
1108
11
        int64_t txn_id = small_file->txn_id();
1109
11
        bool recycle_exists = false;
1110
11
        bool tmp_exists = false;
1111
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1112
11
                                                &tmp_exists) != 0) {
1113
0
            return -1;
1114
0
        }
1115
1116
11
        bool small_file_confirmed = false;
1117
11
        if (tmp_exists) {
1118
1
            left_num++;
1119
1
            left_bytes += small_file->size();
1120
1
            small_file_confirmed = true;
1121
10
        } else if (recycle_exists) {
1122
1
            left_num++;
1123
1
            left_bytes += small_file->size();
1124
            // keep small_file_confirmed=false so the packed file remains uncorrected
1125
9
        } else {
1126
9
            bool rowset_exists = false;
1127
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1128
0
                return -1;
1129
0
            }
1130
1131
9
            if (!rowset_exists) {
1132
6
                if (!small_file->deleted()) {
1133
6
                    small_file->set_deleted(true);
1134
6
                    local_changed = true;
1135
6
                }
1136
6
                if (!small_file->corrected()) {
1137
6
                    small_file->set_corrected(true);
1138
6
                    local_changed = true;
1139
6
                }
1140
6
                small_file_confirmed = true;
1141
6
            } else {
1142
3
                left_num++;
1143
3
                left_bytes += small_file->size();
1144
3
                small_file_confirmed = true;
1145
3
            }
1146
9
        }
1147
1148
11
        if (!small_file_confirmed) {
1149
1
            all_small_files_confirmed = false;
1150
1
        }
1151
1152
11
        if (small_file->corrected() != small_file_confirmed) {
1153
4
            small_file->set_corrected(small_file_confirmed);
1154
4
            local_changed = true;
1155
4
        }
1156
1157
11
        log_small_file_status(*small_file, small_file_confirmed);
1158
11
    }
1159
1160
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1161
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1162
3
        local_changed = true;
1163
3
    }
1164
3
    if (packed_info->ref_cnt() != left_num) {
1165
3
        auto old_ref_cnt = packed_info->ref_cnt();
1166
3
        packed_info->set_ref_cnt(left_num);
1167
3
        LOG_INFO("corrected packed file ref count")
1168
3
                .tag("instance_id", instance_id_)
1169
3
                .tag("resource_id", packed_info->resource_id())
1170
3
                .tag("packed_file_path", packed_file_path)
1171
3
                .tag("old_ref_cnt", old_ref_cnt)
1172
3
                .tag("new_ref_cnt", left_num);
1173
3
        local_changed = true;
1174
3
    }
1175
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1176
2
        packed_info->set_corrected(all_small_files_confirmed);
1177
2
        local_changed = true;
1178
2
    }
1179
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1180
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1181
1
        local_changed = true;
1182
1
    }
1183
1184
3
    if (changed != nullptr) {
1185
3
        *changed = local_changed;
1186
3
    }
1187
3
    return 0;
1188
3
}
1189
1190
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1191
                                                 const std::string& packed_file_path,
1192
4
                                                 PackedFileRecycleStats* stats) {
1193
4
    if (stopped()) {
1194
0
        LOG_WARNING("recycler stopped before processing packed file")
1195
0
                .tag("instance_id", instance_id_)
1196
0
                .tag("packed_file_path", packed_file_path);
1197
0
        return -1;
1198
0
    }
1199
1200
4
    std::unique_ptr<Transaction> txn;
1201
4
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1202
4
    if (err != TxnErrorCode::TXN_OK) {
1203
0
        LOG_WARNING("failed to create txn when processing packed file")
1204
0
                .tag("instance_id", instance_id_)
1205
0
                .tag("packed_file_path", packed_file_path)
1206
0
                .tag("err", err);
1207
0
        return -1;
1208
0
    }
1209
1210
4
    std::string packed_val;
1211
4
    err = txn->get(packed_key, &packed_val);
1212
4
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1213
0
        return 0;
1214
0
    }
1215
4
    if (err != TxnErrorCode::TXN_OK) {
1216
0
        LOG_WARNING("failed to get packed file kv")
1217
0
                .tag("instance_id", instance_id_)
1218
0
                .tag("packed_file_path", packed_file_path)
1219
0
                .tag("err", err);
1220
0
        return -1;
1221
0
    }
1222
1223
4
    cloud::PackedFileInfoPB packed_info;
1224
4
    if (!packed_info.ParseFromString(packed_val)) {
1225
0
        LOG_WARNING("failed to parse packed file info")
1226
0
                .tag("instance_id", instance_id_)
1227
0
                .tag("packed_file_path", packed_file_path);
1228
0
        return -1;
1229
0
    }
1230
1231
4
    int64_t now_sec = ::time(nullptr);
1232
4
    bool corrected = packed_info.corrected();
1233
4
    bool due =
1234
4
            config::force_immediate_recycle ||
1235
4
            now_sec - packed_info.created_at_sec() >= config::packed_file_correction_delay_seconds;
1236
1237
4
    if (!corrected && due) {
1238
3
        bool changed = false;
1239
3
        if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1240
0
            LOG_WARNING("correct_packed_file_info failed")
1241
0
                    .tag("instance_id", instance_id_)
1242
0
                    .tag("packed_file_path", packed_file_path);
1243
0
            return -1;
1244
0
        }
1245
3
        if (changed) {
1246
3
            std::string updated;
1247
3
            if (!packed_info.SerializeToString(&updated)) {
1248
0
                LOG_WARNING("failed to serialize packed file info after correction")
1249
0
                        .tag("instance_id", instance_id_)
1250
0
                        .tag("packed_file_path", packed_file_path);
1251
0
                return -1;
1252
0
            }
1253
3
            txn->put(packed_key, updated);
1254
3
            err = txn->commit();
1255
3
            if (err == TxnErrorCode::TXN_OK) {
1256
3
                if (stats) {
1257
3
                    ++stats->num_corrected;
1258
3
                }
1259
3
            } else {
1260
0
                if (err == TxnErrorCode::TXN_CONFLICT) {
1261
0
                    LOG_WARNING("failed to commit correction for packed file due to conflict")
1262
0
                            .tag("instance_id", instance_id_)
1263
0
                            .tag("packed_file_path", packed_file_path);
1264
0
                } else {
1265
0
                    LOG_WARNING("failed to commit correction for packed file")
1266
0
                            .tag("instance_id", instance_id_)
1267
0
                            .tag("packed_file_path", packed_file_path)
1268
0
                            .tag("err", err);
1269
0
                }
1270
0
                return -1;
1271
0
            }
1272
3
        }
1273
3
    }
1274
1275
4
    txn.reset();
1276
1277
4
    if (packed_info.state() == cloud::PackedFileInfoPB::RECYCLING && packed_info.ref_cnt() == 0) {
1278
1
        if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1279
0
            LOG_WARNING("packed file missing resource id when recycling")
1280
0
                    .tag("instance_id", instance_id_)
1281
0
                    .tag("packed_file_path", packed_file_path);
1282
0
            return -1;
1283
0
        }
1284
1
        auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1285
1
        if (!accessor) {
1286
0
            LOG_WARNING("no accessor available to delete packed file")
1287
0
                    .tag("instance_id", instance_id_)
1288
0
                    .tag("packed_file_path", packed_file_path)
1289
0
                    .tag("resource_id", packed_info.resource_id());
1290
0
            return -1;
1291
0
        }
1292
1
        int del_ret = accessor->delete_file(packed_file_path);
1293
1
        if (del_ret != 0 && del_ret != 1) {
1294
0
            LOG_WARNING("failed to delete packed file")
1295
0
                    .tag("instance_id", instance_id_)
1296
0
                    .tag("packed_file_path", packed_file_path)
1297
0
                    .tag("resource_id", resource_id)
1298
0
                    .tag("ret", del_ret);
1299
0
            return -1;
1300
0
        }
1301
1
        if (del_ret == 1) {
1302
0
            LOG_INFO("packed file already removed")
1303
0
                    .tag("instance_id", instance_id_)
1304
0
                    .tag("packed_file_path", packed_file_path)
1305
0
                    .tag("resource_id", resource_id);
1306
1
        } else {
1307
1
            LOG_INFO("deleted packed file")
1308
1
                    .tag("instance_id", instance_id_)
1309
1
                    .tag("packed_file_path", packed_file_path)
1310
1
                    .tag("resource_id", resource_id);
1311
1
        }
1312
1313
1
        std::unique_ptr<Transaction> del_txn;
1314
1
        err = txn_kv_->create_txn(&del_txn);
1315
1
        if (err != TxnErrorCode::TXN_OK) {
1316
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1317
0
                    .tag("instance_id", instance_id_)
1318
0
                    .tag("packed_file_path", packed_file_path)
1319
0
                    .tag("err", err);
1320
0
            return -1;
1321
0
        }
1322
1323
1
        std::string latest_val;
1324
1
        err = del_txn->get(packed_key, &latest_val);
1325
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1326
0
            return 0;
1327
0
        }
1328
1
        if (err != TxnErrorCode::TXN_OK) {
1329
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1330
0
                    .tag("instance_id", instance_id_)
1331
0
                    .tag("packed_file_path", packed_file_path)
1332
0
                    .tag("err", err);
1333
0
            return -1;
1334
0
        }
1335
1336
1
        cloud::PackedFileInfoPB latest_info;
1337
1
        if (!latest_info.ParseFromString(latest_val)) {
1338
0
            LOG_WARNING("failed to parse packed file info before removal")
1339
0
                    .tag("instance_id", instance_id_)
1340
0
                    .tag("packed_file_path", packed_file_path);
1341
0
            return -1;
1342
0
        }
1343
1344
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1345
1
              latest_info.ref_cnt() == 0)) {
1346
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1347
0
                    .tag("instance_id", instance_id_)
1348
0
                    .tag("packed_file_path", packed_file_path);
1349
0
            return 0;
1350
0
        }
1351
1352
1
        del_txn->remove(packed_key);
1353
1
        err = del_txn->commit();
1354
1
        if (err == TxnErrorCode::TXN_OK) {
1355
1
            if (stats) {
1356
1
                ++stats->num_deleted;
1357
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1358
1
                                        static_cast<int64_t>(latest_val.size());
1359
1
                if (del_ret == 0 || del_ret == 1) {
1360
1
                    ++stats->num_object_deleted;
1361
1
                    int64_t object_size = latest_info.total_slice_bytes();
1362
1
                    if (object_size <= 0) {
1363
0
                        object_size = packed_info.total_slice_bytes();
1364
0
                    }
1365
1
                    stats->bytes_object_deleted += object_size;
1366
1
                }
1367
1
            }
1368
1
            LOG_INFO("removed packed file metadata")
1369
1
                    .tag("instance_id", instance_id_)
1370
1
                    .tag("packed_file_path", packed_file_path);
1371
1
            return 0;
1372
1
        }
1373
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1374
0
            LOG_WARNING("failed to remove packed file kv due to conflict")
1375
0
                    .tag("instance_id", instance_id_)
1376
0
                    .tag("packed_file_path", packed_file_path);
1377
0
            return -1;
1378
0
        }
1379
0
        LOG_WARNING("failed to remove packed file kv")
1380
0
                .tag("instance_id", instance_id_)
1381
0
                .tag("packed_file_path", packed_file_path)
1382
0
                .tag("err", err);
1383
0
        return -1;
1384
0
    }
1385
1386
3
    return 0;
1387
4
}
1388
1389
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/,
1390
4
                                            PackedFileRecycleStats* stats, int* ret) {
1391
4
    if (stats) {
1392
4
        ++stats->num_scanned;
1393
4
    }
1394
4
    std::string packed_file_path;
1395
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1396
0
        LOG_WARNING("failed to decode packed file key")
1397
0
                .tag("instance_id", instance_id_)
1398
0
                .tag("key", hex(key));
1399
0
        if (stats) {
1400
0
            ++stats->num_failed;
1401
0
        }
1402
0
        if (ret) {
1403
0
            *ret = -1;
1404
0
        }
1405
0
        return 0;
1406
0
    }
1407
1408
4
    std::string packed_key(key);
1409
4
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1410
4
    if (process_ret != 0) {
1411
0
        if (stats) {
1412
0
            ++stats->num_failed;
1413
0
        }
1414
0
        if (ret) {
1415
0
            *ret = -1;
1416
0
        }
1417
0
    }
1418
4
    return 0;
1419
4
}
1420
bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id,
1421
57.0k
                     int64_t txn_id) {
1422
57.0k
    std::unique_ptr<Transaction> txn;
1423
57.0k
    TxnErrorCode err = txn_kv->create_txn(&txn);
1424
57.0k
    if (err != TxnErrorCode::TXN_OK) {
1425
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
1426
0
        return false;
1427
0
    }
1428
1429
57.0k
    std::string index_val;
1430
57.0k
    const std::string index_key = txn_index_key({instance_id, txn_id});
1431
57.0k
    err = txn->get(index_key, &index_val);
1432
57.0k
    if (err != TxnErrorCode::TXN_OK) {
1433
53.0k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1434
53.0k
            TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled");
1435
            // txn has been recycled;
1436
53.0k
            LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id
1437
53.0k
                      << " instance_id=" << instance_id;
1438
53.0k
            return true;
1439
53.0k
        }
1440
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
1441
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
1442
0
                     << " err=" << err;
1443
0
        return false;
1444
53.0k
    }
1445
1446
4.00k
    TxnIndexPB index_pb;
1447
4.00k
    if (!index_pb.ParseFromString(index_val)) {
1448
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
1449
0
                     << " instance_id=" << instance_id;
1450
0
        return false;
1451
0
    }
1452
1453
4.00k
    DCHECK(index_pb.has_tablet_index() == true);
1454
4.00k
    if (!index_pb.tablet_index().has_db_id()) {
1455
        // In the previous version, the db_id was not set in the index_pb.
1456
        // If updating to the version which enable txn lazy commit, the db_id will be set.
1457
0
        LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id
1458
0
                  << " index=" << index_pb.ShortDebugString();
1459
0
        return true;
1460
0
    }
1461
1462
4.00k
    int64_t db_id = index_pb.tablet_index().db_id();
1463
4.00k
    DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id
1464
0
                        << " instance_id=" << instance_id;
1465
1466
4.00k
    std::string info_val;
1467
4.00k
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
1468
4.00k
    err = txn->get(info_key, &info_val);
1469
4.00k
    if (err != TxnErrorCode::TXN_OK) {
1470
0
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1471
            // txn info has been recycled;
1472
0
            LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id
1473
0
                      << " instance_id=" << instance_id;
1474
0
            return true;
1475
0
        }
1476
1477
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
1478
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
1479
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
1480
0
                     << " err=" << err;
1481
0
        return false;
1482
0
    }
1483
1484
4.00k
    TxnInfoPB txn_info;
1485
4.00k
    if (!txn_info.ParseFromString(info_val)) {
1486
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
1487
0
                     << " instance_id=" << instance_id;
1488
0
        return false;
1489
0
    }
1490
1491
4.00k
    DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id
1492
0
                                        << " txn_info=" << txn_info.ShortDebugString();
1493
1494
4.00k
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() ||
1495
4.00k
        TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) {
1496
2.00k
        TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info);
1497
2.00k
        return true;
1498
2.00k
    }
1499
1500
2.00k
    TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info);
1501
2.00k
    return false;
1502
4.00k
}
1503
1504
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1505
4.01k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1506
4.01k
    if (config::force_immediate_recycle) {
1507
8
        return 0L;
1508
8
    }
1509
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1510
4.00k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1511
4.00k
    int64_t retention_seconds = config::retention_seconds;
1512
4.00k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1513
3.10k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1514
3.10k
    }
1515
4.00k
    int64_t final_expiration = expiration + retention_seconds;
1516
4.00k
    if (*earlest_ts > final_expiration) {
1517
3
        *earlest_ts = final_expiration;
1518
3
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1519
3
    }
1520
4.00k
    return final_expiration;
1521
4.01k
}
1522
1523
int64_t calculate_partition_expired_time(
1524
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1525
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1526
9
    if (config::force_immediate_recycle) {
1527
3
        return 0L;
1528
3
    }
1529
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1530
6
                                                            : partition_meta_pb.creation_time();
1531
6
    int64_t retention_seconds = config::retention_seconds;
1532
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1533
6
        retention_seconds =
1534
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1535
6
    }
1536
6
    int64_t final_expiration = expiration + retention_seconds;
1537
6
    if (*earlest_ts > final_expiration) {
1538
2
        *earlest_ts = final_expiration;
1539
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1540
2
    }
1541
6
    return final_expiration;
1542
9
}
1543
1544
int64_t calculate_index_expired_time(const std::string& instance_id_,
1545
                                     const RecycleIndexPB& index_meta_pb,
1546
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1547
10
    if (config::force_immediate_recycle) {
1548
4
        return 0L;
1549
4
    }
1550
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1551
6
                                                        : index_meta_pb.creation_time();
1552
6
    int64_t retention_seconds = config::retention_seconds;
1553
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1554
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1555
6
    }
1556
6
    int64_t final_expiration = expiration + retention_seconds;
1557
6
    if (*earlest_ts > final_expiration) {
1558
2
        *earlest_ts = final_expiration;
1559
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1560
2
    }
1561
6
    return final_expiration;
1562
10
}
1563
1564
int64_t calculate_tmp_rowset_expired_time(
1565
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1566
57.0k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1567
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1568
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1569
    //  duration or timeout always < `retention_time` in practice.
1570
57.0k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1571
57.0k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1572
57.0k
                                 : tmp_rowset_meta_pb.creation_time();
1573
57.0k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1574
57.0k
    int64_t final_expiration = expiration + config::retention_seconds;
1575
57.0k
    if (*earlest_ts > final_expiration) {
1576
6
        *earlest_ts = final_expiration;
1577
6
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1578
6
    }
1579
57.0k
    return final_expiration;
1580
57.0k
}
1581
1582
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1583
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1584
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1585
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1586
8
        *earlest_ts = final_expiration / 1000;
1587
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1588
8
    }
1589
30.0k
    return final_expiration;
1590
30.0k
}
1591
1592
int64_t calculate_restore_job_expired_time(
1593
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1594
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1595
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1596
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1597
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1598
        // final state, recycle immediately
1599
41
        return 0L;
1600
41
    }
1601
    // not final state, wait much longer than the FE's timeout(1 day)
1602
0
    int64_t last_modified_s =
1603
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1604
0
    int64_t expiration = restore_job.expired_at_s() > 0
1605
0
                                 ? last_modified_s + restore_job.expired_at_s()
1606
0
                                 : last_modified_s;
1607
0
    int64_t final_expiration = expiration + config::retention_seconds;
1608
0
    if (*earlest_ts > final_expiration) {
1609
0
        *earlest_ts = final_expiration;
1610
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1611
0
    }
1612
0
    return final_expiration;
1613
41
}
1614
1615
17
int InstanceRecycler::recycle_indexes() {
1616
17
    const std::string task_name = "recycle_indexes";
1617
17
    int64_t num_scanned = 0;
1618
17
    int64_t num_expired = 0;
1619
17
    int64_t num_recycled = 0;
1620
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1621
1622
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
1623
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
1624
17
    std::string index_key0;
1625
17
    std::string index_key1;
1626
17
    recycle_index_key(index_key_info0, &index_key0);
1627
17
    recycle_index_key(index_key_info1, &index_key1);
1628
1629
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
1630
1631
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1632
17
    register_recycle_task(task_name, start_time);
1633
1634
17
    DORIS_CLOUD_DEFER {
1635
17
        unregister_recycle_task(task_name);
1636
17
        int64_t cost =
1637
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1638
17
        metrics_context.finish_report();
1639
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1640
17
                .tag("instance_id", instance_id_)
1641
17
                .tag("num_scanned", num_scanned)
1642
17
                .tag("num_expired", num_expired)
1643
17
                .tag("num_recycled", num_recycled);
1644
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1634
2
    DORIS_CLOUD_DEFER {
1635
2
        unregister_recycle_task(task_name);
1636
2
        int64_t cost =
1637
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1638
2
        metrics_context.finish_report();
1639
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1640
2
                .tag("instance_id", instance_id_)
1641
2
                .tag("num_scanned", num_scanned)
1642
2
                .tag("num_expired", num_expired)
1643
2
                .tag("num_recycled", num_recycled);
1644
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1634
15
    DORIS_CLOUD_DEFER {
1635
15
        unregister_recycle_task(task_name);
1636
15
        int64_t cost =
1637
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1638
15
        metrics_context.finish_report();
1639
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1640
15
                .tag("instance_id", instance_id_)
1641
15
                .tag("num_scanned", num_scanned)
1642
15
                .tag("num_expired", num_expired)
1643
15
                .tag("num_recycled", num_recycled);
1644
15
    };
1645
1646
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1647
1648
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
1649
17
    std::vector<std::string_view> index_keys;
1650
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1651
10
        ++num_scanned;
1652
10
        RecycleIndexPB index_pb;
1653
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1654
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1655
0
            return -1;
1656
0
        }
1657
10
        int64_t current_time = ::time(nullptr);
1658
10
        if (current_time <
1659
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1660
0
            return 0;
1661
0
        }
1662
10
        ++num_expired;
1663
        // decode index_id
1664
10
        auto k1 = k;
1665
10
        k1.remove_prefix(1);
1666
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1667
10
        decode_key(&k1, &out);
1668
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1669
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1670
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1671
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1672
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1673
        // Change state to RECYCLING
1674
10
        std::unique_ptr<Transaction> txn;
1675
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1676
10
        if (err != TxnErrorCode::TXN_OK) {
1677
0
            LOG_WARNING("failed to create txn").tag("err", err);
1678
0
            return -1;
1679
0
        }
1680
10
        std::string val;
1681
10
        err = txn->get(k, &val);
1682
10
        if (err ==
1683
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1684
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1685
0
            return 0;
1686
0
        }
1687
10
        if (err != TxnErrorCode::TXN_OK) {
1688
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1689
0
            return -1;
1690
0
        }
1691
10
        index_pb.Clear();
1692
10
        if (!index_pb.ParseFromString(val)) {
1693
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1694
0
            return -1;
1695
0
        }
1696
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1697
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1698
9
            txn->put(k, index_pb.SerializeAsString());
1699
9
            err = txn->commit();
1700
9
            if (err != TxnErrorCode::TXN_OK) {
1701
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1702
0
                return -1;
1703
0
            }
1704
9
        }
1705
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1706
1
            LOG_WARNING("failed to recycle tablets under index")
1707
1
                    .tag("table_id", index_pb.table_id())
1708
1
                    .tag("instance_id", instance_id_)
1709
1
                    .tag("index_id", index_id);
1710
1
            return -1;
1711
1
        }
1712
1713
9
        if (index_pb.has_db_id()) {
1714
            // Recycle the versioned keys
1715
3
            std::unique_ptr<Transaction> txn;
1716
3
            err = txn_kv_->create_txn(&txn);
1717
3
            if (err != TxnErrorCode::TXN_OK) {
1718
0
                LOG_WARNING("failed to create txn").tag("err", err);
1719
0
                return -1;
1720
0
            }
1721
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1722
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1723
3
            std::string index_inverted_key = versioned::index_inverted_key(
1724
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1725
3
            versioned_remove_all(txn.get(), meta_key);
1726
3
            txn->remove(index_key);
1727
3
            txn->remove(index_inverted_key);
1728
3
            err = txn->commit();
1729
3
            if (err != TxnErrorCode::TXN_OK) {
1730
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1731
0
                return -1;
1732
0
            }
1733
3
        }
1734
1735
9
        metrics_context.total_recycled_num = ++num_recycled;
1736
9
        metrics_context.report();
1737
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1738
9
        index_keys.push_back(k);
1739
9
        return 0;
1740
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1650
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1651
2
        ++num_scanned;
1652
2
        RecycleIndexPB index_pb;
1653
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1654
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1655
0
            return -1;
1656
0
        }
1657
2
        int64_t current_time = ::time(nullptr);
1658
2
        if (current_time <
1659
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1660
0
            return 0;
1661
0
        }
1662
2
        ++num_expired;
1663
        // decode index_id
1664
2
        auto k1 = k;
1665
2
        k1.remove_prefix(1);
1666
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1667
2
        decode_key(&k1, &out);
1668
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1669
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1670
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1671
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1672
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1673
        // Change state to RECYCLING
1674
2
        std::unique_ptr<Transaction> txn;
1675
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1676
2
        if (err != TxnErrorCode::TXN_OK) {
1677
0
            LOG_WARNING("failed to create txn").tag("err", err);
1678
0
            return -1;
1679
0
        }
1680
2
        std::string val;
1681
2
        err = txn->get(k, &val);
1682
2
        if (err ==
1683
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1684
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1685
0
            return 0;
1686
0
        }
1687
2
        if (err != TxnErrorCode::TXN_OK) {
1688
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1689
0
            return -1;
1690
0
        }
1691
2
        index_pb.Clear();
1692
2
        if (!index_pb.ParseFromString(val)) {
1693
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1694
0
            return -1;
1695
0
        }
1696
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1697
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1698
1
            txn->put(k, index_pb.SerializeAsString());
1699
1
            err = txn->commit();
1700
1
            if (err != TxnErrorCode::TXN_OK) {
1701
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1702
0
                return -1;
1703
0
            }
1704
1
        }
1705
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1706
1
            LOG_WARNING("failed to recycle tablets under index")
1707
1
                    .tag("table_id", index_pb.table_id())
1708
1
                    .tag("instance_id", instance_id_)
1709
1
                    .tag("index_id", index_id);
1710
1
            return -1;
1711
1
        }
1712
1713
1
        if (index_pb.has_db_id()) {
1714
            // Recycle the versioned keys
1715
1
            std::unique_ptr<Transaction> txn;
1716
1
            err = txn_kv_->create_txn(&txn);
1717
1
            if (err != TxnErrorCode::TXN_OK) {
1718
0
                LOG_WARNING("failed to create txn").tag("err", err);
1719
0
                return -1;
1720
0
            }
1721
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1722
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1723
1
            std::string index_inverted_key = versioned::index_inverted_key(
1724
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1725
1
            versioned_remove_all(txn.get(), meta_key);
1726
1
            txn->remove(index_key);
1727
1
            txn->remove(index_inverted_key);
1728
1
            err = txn->commit();
1729
1
            if (err != TxnErrorCode::TXN_OK) {
1730
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1731
0
                return -1;
1732
0
            }
1733
1
        }
1734
1735
1
        metrics_context.total_recycled_num = ++num_recycled;
1736
1
        metrics_context.report();
1737
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1738
1
        index_keys.push_back(k);
1739
1
        return 0;
1740
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1650
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1651
8
        ++num_scanned;
1652
8
        RecycleIndexPB index_pb;
1653
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1654
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1655
0
            return -1;
1656
0
        }
1657
8
        int64_t current_time = ::time(nullptr);
1658
8
        if (current_time <
1659
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1660
0
            return 0;
1661
0
        }
1662
8
        ++num_expired;
1663
        // decode index_id
1664
8
        auto k1 = k;
1665
8
        k1.remove_prefix(1);
1666
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1667
8
        decode_key(&k1, &out);
1668
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1669
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1670
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1671
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1672
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1673
        // Change state to RECYCLING
1674
8
        std::unique_ptr<Transaction> txn;
1675
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1676
8
        if (err != TxnErrorCode::TXN_OK) {
1677
0
            LOG_WARNING("failed to create txn").tag("err", err);
1678
0
            return -1;
1679
0
        }
1680
8
        std::string val;
1681
8
        err = txn->get(k, &val);
1682
8
        if (err ==
1683
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1684
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1685
0
            return 0;
1686
0
        }
1687
8
        if (err != TxnErrorCode::TXN_OK) {
1688
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1689
0
            return -1;
1690
0
        }
1691
8
        index_pb.Clear();
1692
8
        if (!index_pb.ParseFromString(val)) {
1693
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1694
0
            return -1;
1695
0
        }
1696
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1697
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1698
8
            txn->put(k, index_pb.SerializeAsString());
1699
8
            err = txn->commit();
1700
8
            if (err != TxnErrorCode::TXN_OK) {
1701
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1702
0
                return -1;
1703
0
            }
1704
8
        }
1705
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1706
0
            LOG_WARNING("failed to recycle tablets under index")
1707
0
                    .tag("table_id", index_pb.table_id())
1708
0
                    .tag("instance_id", instance_id_)
1709
0
                    .tag("index_id", index_id);
1710
0
            return -1;
1711
0
        }
1712
1713
8
        if (index_pb.has_db_id()) {
1714
            // Recycle the versioned keys
1715
2
            std::unique_ptr<Transaction> txn;
1716
2
            err = txn_kv_->create_txn(&txn);
1717
2
            if (err != TxnErrorCode::TXN_OK) {
1718
0
                LOG_WARNING("failed to create txn").tag("err", err);
1719
0
                return -1;
1720
0
            }
1721
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1722
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1723
2
            std::string index_inverted_key = versioned::index_inverted_key(
1724
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1725
2
            versioned_remove_all(txn.get(), meta_key);
1726
2
            txn->remove(index_key);
1727
2
            txn->remove(index_inverted_key);
1728
2
            err = txn->commit();
1729
2
            if (err != TxnErrorCode::TXN_OK) {
1730
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1731
0
                return -1;
1732
0
            }
1733
2
        }
1734
1735
8
        metrics_context.total_recycled_num = ++num_recycled;
1736
8
        metrics_context.report();
1737
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1738
8
        index_keys.push_back(k);
1739
8
        return 0;
1740
8
    };
1741
1742
17
    auto loop_done = [&index_keys, this]() -> int {
1743
6
        if (index_keys.empty()) return 0;
1744
5
        DORIS_CLOUD_DEFER {
1745
5
            index_keys.clear();
1746
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1744
1
        DORIS_CLOUD_DEFER {
1745
1
            index_keys.clear();
1746
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1744
4
        DORIS_CLOUD_DEFER {
1745
4
            index_keys.clear();
1746
4
        };
1747
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1748
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1749
0
            return -1;
1750
0
        }
1751
5
        return 0;
1752
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1742
2
    auto loop_done = [&index_keys, this]() -> int {
1743
2
        if (index_keys.empty()) return 0;
1744
1
        DORIS_CLOUD_DEFER {
1745
1
            index_keys.clear();
1746
1
        };
1747
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1748
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1749
0
            return -1;
1750
0
        }
1751
1
        return 0;
1752
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1742
4
    auto loop_done = [&index_keys, this]() -> int {
1743
4
        if (index_keys.empty()) return 0;
1744
4
        DORIS_CLOUD_DEFER {
1745
4
            index_keys.clear();
1746
4
        };
1747
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1748
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1749
0
            return -1;
1750
0
        }
1751
4
        return 0;
1752
4
    };
1753
1754
17
    if (config::enable_recycler_stats_metrics) {
1755
0
        scan_and_statistics_indexes();
1756
0
    }
1757
    // recycle_func and loop_done for scan and recycle
1758
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
1759
17
}
1760
1761
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
1762
8.24k
                             int64_t tablet_id) {
1763
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
1764
1765
8.23k
    std::unique_ptr<Transaction> txn;
1766
8.23k
    TxnErrorCode err = txn_kv->create_txn(&txn);
1767
8.23k
    if (err != TxnErrorCode::TXN_OK) {
1768
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
1769
0
                     << " tablet_id=" << tablet_id << " err=" << err;
1770
0
        return false;
1771
0
    }
1772
1773
8.23k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
1774
8.23k
    std::string tablet_idx_val;
1775
8.23k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
1776
8.23k
    if (TxnErrorCode::TXN_OK != err) {
1777
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
1778
0
                     << " tablet_id=" << tablet_id << " err=" << err
1779
0
                     << " key=" << hex(tablet_idx_key);
1780
0
        return false;
1781
0
    }
1782
1783
8.23k
    TabletIndexPB tablet_idx_pb;
1784
8.23k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
1785
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
1786
0
                     << " tablet_id=" << tablet_id;
1787
0
        return false;
1788
0
    }
1789
1790
8.23k
    if (!tablet_idx_pb.has_db_id()) {
1791
        // In the previous version, the db_id was not set in the index_pb.
1792
        // If updating to the version which enable txn lazy commit, the db_id will be set.
1793
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
1794
0
                  << " instance_id=" << instance_id
1795
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
1796
0
        return true;
1797
0
    }
1798
1799
8.23k
    std::string ver_val;
1800
8.23k
    std::string ver_key =
1801
8.23k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
1802
8.23k
                                   tablet_idx_pb.partition_id()});
1803
8.23k
    err = txn->get(ver_key, &ver_val);
1804
1805
8.23k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1806
202
        LOG(INFO) << ""
1807
202
                     "partition version not found, instance_id="
1808
202
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
1809
202
                  << " table_id=" << tablet_idx_pb.table_id()
1810
202
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
1811
202
                  << " key=" << hex(ver_key);
1812
202
        return true;
1813
202
    }
1814
1815
8.03k
    if (TxnErrorCode::TXN_OK != err) {
1816
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
1817
0
                     << " db_id=" << tablet_idx_pb.db_id()
1818
0
                     << " table_id=" << tablet_idx_pb.table_id()
1819
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1820
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
1821
0
        return false;
1822
0
    }
1823
1824
8.03k
    VersionPB version_pb;
1825
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
1826
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
1827
0
                     << " db_id=" << tablet_idx_pb.db_id()
1828
0
                     << " table_id=" << tablet_idx_pb.table_id()
1829
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1830
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
1831
0
        return false;
1832
0
    }
1833
1834
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
1835
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
1836
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
1837
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
1838
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
1839
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
1840
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
1841
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
1842
4.00k
                     << " key=" << hex(ver_key);
1843
4.00k
        return false;
1844
4.00k
    }
1845
4.03k
    return true;
1846
8.03k
}
1847
1848
15
int InstanceRecycler::recycle_partitions() {
1849
15
    const std::string task_name = "recycle_partitions";
1850
15
    int64_t num_scanned = 0;
1851
15
    int64_t num_expired = 0;
1852
15
    int64_t num_recycled = 0;
1853
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1854
1855
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
1856
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
1857
15
    std::string part_key0;
1858
15
    std::string part_key1;
1859
15
    recycle_partition_key(part_key_info0, &part_key0);
1860
15
    recycle_partition_key(part_key_info1, &part_key1);
1861
1862
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
1863
1864
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1865
15
    register_recycle_task(task_name, start_time);
1866
1867
15
    DORIS_CLOUD_DEFER {
1868
15
        unregister_recycle_task(task_name);
1869
15
        int64_t cost =
1870
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1871
15
        metrics_context.finish_report();
1872
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1873
15
                .tag("instance_id", instance_id_)
1874
15
                .tag("num_scanned", num_scanned)
1875
15
                .tag("num_expired", num_expired)
1876
15
                .tag("num_recycled", num_recycled);
1877
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1867
2
    DORIS_CLOUD_DEFER {
1868
2
        unregister_recycle_task(task_name);
1869
2
        int64_t cost =
1870
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1871
2
        metrics_context.finish_report();
1872
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1873
2
                .tag("instance_id", instance_id_)
1874
2
                .tag("num_scanned", num_scanned)
1875
2
                .tag("num_expired", num_expired)
1876
2
                .tag("num_recycled", num_recycled);
1877
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1867
13
    DORIS_CLOUD_DEFER {
1868
13
        unregister_recycle_task(task_name);
1869
13
        int64_t cost =
1870
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1871
13
        metrics_context.finish_report();
1872
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1873
13
                .tag("instance_id", instance_id_)
1874
13
                .tag("num_scanned", num_scanned)
1875
13
                .tag("num_expired", num_expired)
1876
13
                .tag("num_recycled", num_recycled);
1877
13
    };
1878
1879
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1880
1881
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
1882
15
    std::vector<std::string_view> partition_keys;
1883
15
    std::vector<std::string> partition_version_keys;
1884
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1885
9
        ++num_scanned;
1886
9
        RecyclePartitionPB part_pb;
1887
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1888
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1889
0
            return -1;
1890
0
        }
1891
9
        int64_t current_time = ::time(nullptr);
1892
9
        if (current_time <
1893
9
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1894
0
            return 0;
1895
0
        }
1896
9
        ++num_expired;
1897
        // decode partition_id
1898
9
        auto k1 = k;
1899
9
        k1.remove_prefix(1);
1900
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1901
9
        decode_key(&k1, &out);
1902
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1903
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1904
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1905
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1906
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1907
        // Change state to RECYCLING
1908
9
        std::unique_ptr<Transaction> txn;
1909
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1910
9
        if (err != TxnErrorCode::TXN_OK) {
1911
0
            LOG_WARNING("failed to create txn").tag("err", err);
1912
0
            return -1;
1913
0
        }
1914
9
        std::string val;
1915
9
        err = txn->get(k, &val);
1916
9
        if (err ==
1917
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1918
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1919
0
            return 0;
1920
0
        }
1921
9
        if (err != TxnErrorCode::TXN_OK) {
1922
0
            LOG_WARNING("failed to get kv");
1923
0
            return -1;
1924
0
        }
1925
9
        part_pb.Clear();
1926
9
        if (!part_pb.ParseFromString(val)) {
1927
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1928
0
            return -1;
1929
0
        }
1930
        // Partitions with PREPARED state MUST have no data
1931
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1932
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1933
8
            txn->put(k, part_pb.SerializeAsString());
1934
8
            err = txn->commit();
1935
8
            if (err != TxnErrorCode::TXN_OK) {
1936
0
                LOG_WARNING("failed to commit txn: {}", err);
1937
0
                return -1;
1938
0
            }
1939
8
        }
1940
1941
9
        int ret = 0;
1942
33
        for (int64_t index_id : part_pb.index_id()) {
1943
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1944
1
                LOG_WARNING("failed to recycle tablets under partition")
1945
1
                        .tag("table_id", part_pb.table_id())
1946
1
                        .tag("instance_id", instance_id_)
1947
1
                        .tag("index_id", index_id)
1948
1
                        .tag("partition_id", partition_id);
1949
1
                ret = -1;
1950
1
            }
1951
33
        }
1952
9
        if (ret == 0 && part_pb.has_db_id()) {
1953
            // Recycle the versioned keys
1954
8
            std::unique_ptr<Transaction> txn;
1955
8
            err = txn_kv_->create_txn(&txn);
1956
8
            if (err != TxnErrorCode::TXN_OK) {
1957
0
                LOG_WARNING("failed to create txn").tag("err", err);
1958
0
                return -1;
1959
0
            }
1960
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1961
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1962
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1963
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1964
8
            std::string partition_version_key =
1965
8
                    versioned::partition_version_key({instance_id_, partition_id});
1966
8
            versioned_remove_all(txn.get(), meta_key);
1967
8
            txn->remove(index_key);
1968
8
            txn->remove(inverted_index_key);
1969
8
            versioned_remove_all(txn.get(), partition_version_key);
1970
8
            err = txn->commit();
1971
8
            if (err != TxnErrorCode::TXN_OK) {
1972
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1973
0
                return -1;
1974
0
            }
1975
8
        }
1976
1977
9
        if (ret == 0) {
1978
8
            ++num_recycled;
1979
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1980
8
            partition_keys.push_back(k);
1981
8
            if (part_pb.db_id() > 0) {
1982
8
                partition_version_keys.push_back(partition_version_key(
1983
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1984
8
            }
1985
8
            metrics_context.total_recycled_num = num_recycled;
1986
8
            metrics_context.report();
1987
8
        }
1988
9
        return ret;
1989
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1884
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1885
2
        ++num_scanned;
1886
2
        RecyclePartitionPB part_pb;
1887
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1888
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1889
0
            return -1;
1890
0
        }
1891
2
        int64_t current_time = ::time(nullptr);
1892
2
        if (current_time <
1893
2
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1894
0
            return 0;
1895
0
        }
1896
2
        ++num_expired;
1897
        // decode partition_id
1898
2
        auto k1 = k;
1899
2
        k1.remove_prefix(1);
1900
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1901
2
        decode_key(&k1, &out);
1902
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1903
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1904
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1905
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1906
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1907
        // Change state to RECYCLING
1908
2
        std::unique_ptr<Transaction> txn;
1909
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1910
2
        if (err != TxnErrorCode::TXN_OK) {
1911
0
            LOG_WARNING("failed to create txn").tag("err", err);
1912
0
            return -1;
1913
0
        }
1914
2
        std::string val;
1915
2
        err = txn->get(k, &val);
1916
2
        if (err ==
1917
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1918
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1919
0
            return 0;
1920
0
        }
1921
2
        if (err != TxnErrorCode::TXN_OK) {
1922
0
            LOG_WARNING("failed to get kv");
1923
0
            return -1;
1924
0
        }
1925
2
        part_pb.Clear();
1926
2
        if (!part_pb.ParseFromString(val)) {
1927
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1928
0
            return -1;
1929
0
        }
1930
        // Partitions with PREPARED state MUST have no data
1931
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1932
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1933
1
            txn->put(k, part_pb.SerializeAsString());
1934
1
            err = txn->commit();
1935
1
            if (err != TxnErrorCode::TXN_OK) {
1936
0
                LOG_WARNING("failed to commit txn: {}", err);
1937
0
                return -1;
1938
0
            }
1939
1
        }
1940
1941
2
        int ret = 0;
1942
2
        for (int64_t index_id : part_pb.index_id()) {
1943
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1944
1
                LOG_WARNING("failed to recycle tablets under partition")
1945
1
                        .tag("table_id", part_pb.table_id())
1946
1
                        .tag("instance_id", instance_id_)
1947
1
                        .tag("index_id", index_id)
1948
1
                        .tag("partition_id", partition_id);
1949
1
                ret = -1;
1950
1
            }
1951
2
        }
1952
2
        if (ret == 0 && part_pb.has_db_id()) {
1953
            // Recycle the versioned keys
1954
1
            std::unique_ptr<Transaction> txn;
1955
1
            err = txn_kv_->create_txn(&txn);
1956
1
            if (err != TxnErrorCode::TXN_OK) {
1957
0
                LOG_WARNING("failed to create txn").tag("err", err);
1958
0
                return -1;
1959
0
            }
1960
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1961
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1962
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1963
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1964
1
            std::string partition_version_key =
1965
1
                    versioned::partition_version_key({instance_id_, partition_id});
1966
1
            versioned_remove_all(txn.get(), meta_key);
1967
1
            txn->remove(index_key);
1968
1
            txn->remove(inverted_index_key);
1969
1
            versioned_remove_all(txn.get(), partition_version_key);
1970
1
            err = txn->commit();
1971
1
            if (err != TxnErrorCode::TXN_OK) {
1972
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1973
0
                return -1;
1974
0
            }
1975
1
        }
1976
1977
2
        if (ret == 0) {
1978
1
            ++num_recycled;
1979
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1980
1
            partition_keys.push_back(k);
1981
1
            if (part_pb.db_id() > 0) {
1982
1
                partition_version_keys.push_back(partition_version_key(
1983
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1984
1
            }
1985
1
            metrics_context.total_recycled_num = num_recycled;
1986
1
            metrics_context.report();
1987
1
        }
1988
2
        return ret;
1989
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1884
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1885
7
        ++num_scanned;
1886
7
        RecyclePartitionPB part_pb;
1887
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1888
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1889
0
            return -1;
1890
0
        }
1891
7
        int64_t current_time = ::time(nullptr);
1892
7
        if (current_time <
1893
7
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1894
0
            return 0;
1895
0
        }
1896
7
        ++num_expired;
1897
        // decode partition_id
1898
7
        auto k1 = k;
1899
7
        k1.remove_prefix(1);
1900
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1901
7
        decode_key(&k1, &out);
1902
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1903
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1904
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1905
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1906
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1907
        // Change state to RECYCLING
1908
7
        std::unique_ptr<Transaction> txn;
1909
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1910
7
        if (err != TxnErrorCode::TXN_OK) {
1911
0
            LOG_WARNING("failed to create txn").tag("err", err);
1912
0
            return -1;
1913
0
        }
1914
7
        std::string val;
1915
7
        err = txn->get(k, &val);
1916
7
        if (err ==
1917
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1918
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1919
0
            return 0;
1920
0
        }
1921
7
        if (err != TxnErrorCode::TXN_OK) {
1922
0
            LOG_WARNING("failed to get kv");
1923
0
            return -1;
1924
0
        }
1925
7
        part_pb.Clear();
1926
7
        if (!part_pb.ParseFromString(val)) {
1927
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1928
0
            return -1;
1929
0
        }
1930
        // Partitions with PREPARED state MUST have no data
1931
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1932
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1933
7
            txn->put(k, part_pb.SerializeAsString());
1934
7
            err = txn->commit();
1935
7
            if (err != TxnErrorCode::TXN_OK) {
1936
0
                LOG_WARNING("failed to commit txn: {}", err);
1937
0
                return -1;
1938
0
            }
1939
7
        }
1940
1941
7
        int ret = 0;
1942
31
        for (int64_t index_id : part_pb.index_id()) {
1943
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1944
0
                LOG_WARNING("failed to recycle tablets under partition")
1945
0
                        .tag("table_id", part_pb.table_id())
1946
0
                        .tag("instance_id", instance_id_)
1947
0
                        .tag("index_id", index_id)
1948
0
                        .tag("partition_id", partition_id);
1949
0
                ret = -1;
1950
0
            }
1951
31
        }
1952
7
        if (ret == 0 && part_pb.has_db_id()) {
1953
            // Recycle the versioned keys
1954
7
            std::unique_ptr<Transaction> txn;
1955
7
            err = txn_kv_->create_txn(&txn);
1956
7
            if (err != TxnErrorCode::TXN_OK) {
1957
0
                LOG_WARNING("failed to create txn").tag("err", err);
1958
0
                return -1;
1959
0
            }
1960
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1961
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1962
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1963
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1964
7
            std::string partition_version_key =
1965
7
                    versioned::partition_version_key({instance_id_, partition_id});
1966
7
            versioned_remove_all(txn.get(), meta_key);
1967
7
            txn->remove(index_key);
1968
7
            txn->remove(inverted_index_key);
1969
7
            versioned_remove_all(txn.get(), partition_version_key);
1970
7
            err = txn->commit();
1971
7
            if (err != TxnErrorCode::TXN_OK) {
1972
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1973
0
                return -1;
1974
0
            }
1975
7
        }
1976
1977
7
        if (ret == 0) {
1978
7
            ++num_recycled;
1979
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1980
7
            partition_keys.push_back(k);
1981
7
            if (part_pb.db_id() > 0) {
1982
7
                partition_version_keys.push_back(partition_version_key(
1983
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1984
7
            }
1985
7
            metrics_context.total_recycled_num = num_recycled;
1986
7
            metrics_context.report();
1987
7
        }
1988
7
        return ret;
1989
7
    };
1990
1991
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1992
5
        if (partition_keys.empty()) return 0;
1993
4
        DORIS_CLOUD_DEFER {
1994
4
            partition_keys.clear();
1995
4
            partition_version_keys.clear();
1996
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1993
1
        DORIS_CLOUD_DEFER {
1994
1
            partition_keys.clear();
1995
1
            partition_version_keys.clear();
1996
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1993
3
        DORIS_CLOUD_DEFER {
1994
3
            partition_keys.clear();
1995
3
            partition_version_keys.clear();
1996
3
        };
1997
4
        std::unique_ptr<Transaction> txn;
1998
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1999
4
        if (err != TxnErrorCode::TXN_OK) {
2000
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2001
0
            return -1;
2002
0
        }
2003
8
        for (auto& k : partition_keys) {
2004
8
            txn->remove(k);
2005
8
        }
2006
8
        for (auto& k : partition_version_keys) {
2007
8
            txn->remove(k);
2008
8
        }
2009
4
        err = txn->commit();
2010
4
        if (err != TxnErrorCode::TXN_OK) {
2011
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2012
0
                         << " err=" << err;
2013
0
            return -1;
2014
0
        }
2015
4
        return 0;
2016
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1991
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1992
2
        if (partition_keys.empty()) return 0;
1993
1
        DORIS_CLOUD_DEFER {
1994
1
            partition_keys.clear();
1995
1
            partition_version_keys.clear();
1996
1
        };
1997
1
        std::unique_ptr<Transaction> txn;
1998
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1999
1
        if (err != TxnErrorCode::TXN_OK) {
2000
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2001
0
            return -1;
2002
0
        }
2003
1
        for (auto& k : partition_keys) {
2004
1
            txn->remove(k);
2005
1
        }
2006
1
        for (auto& k : partition_version_keys) {
2007
1
            txn->remove(k);
2008
1
        }
2009
1
        err = txn->commit();
2010
1
        if (err != TxnErrorCode::TXN_OK) {
2011
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2012
0
                         << " err=" << err;
2013
0
            return -1;
2014
0
        }
2015
1
        return 0;
2016
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1991
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1992
3
        if (partition_keys.empty()) return 0;
1993
3
        DORIS_CLOUD_DEFER {
1994
3
            partition_keys.clear();
1995
3
            partition_version_keys.clear();
1996
3
        };
1997
3
        std::unique_ptr<Transaction> txn;
1998
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1999
3
        if (err != TxnErrorCode::TXN_OK) {
2000
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2001
0
            return -1;
2002
0
        }
2003
7
        for (auto& k : partition_keys) {
2004
7
            txn->remove(k);
2005
7
        }
2006
7
        for (auto& k : partition_version_keys) {
2007
7
            txn->remove(k);
2008
7
        }
2009
3
        err = txn->commit();
2010
3
        if (err != TxnErrorCode::TXN_OK) {
2011
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2012
0
                         << " err=" << err;
2013
0
            return -1;
2014
0
        }
2015
3
        return 0;
2016
3
    };
2017
2018
15
    if (config::enable_recycler_stats_metrics) {
2019
0
        scan_and_statistics_partitions();
2020
0
    }
2021
    // recycle_func and loop_done for scan and recycle
2022
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2023
15
}
2024
2025
14
int InstanceRecycler::recycle_versions() {
2026
14
    if (should_recycle_versioned_keys()) {
2027
2
        return recycle_orphan_partitions();
2028
2
    }
2029
2030
12
    int64_t num_scanned = 0;
2031
12
    int64_t num_recycled = 0;
2032
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2033
2034
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2035
2036
12
    auto start_time = steady_clock::now();
2037
2038
12
    DORIS_CLOUD_DEFER {
2039
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2040
12
        metrics_context.finish_report();
2041
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2042
12
                .tag("instance_id", instance_id_)
2043
12
                .tag("num_scanned", num_scanned)
2044
12
                .tag("num_recycled", num_recycled);
2045
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2038
12
    DORIS_CLOUD_DEFER {
2039
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2040
12
        metrics_context.finish_report();
2041
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2042
12
                .tag("instance_id", instance_id_)
2043
12
                .tag("num_scanned", num_scanned)
2044
12
                .tag("num_recycled", num_recycled);
2045
12
    };
2046
2047
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2048
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2049
12
    int64_t last_scanned_table_id = 0;
2050
12
    bool is_recycled = false; // Is last scanned kv recycled
2051
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2052
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2053
2
        ++num_scanned;
2054
2
        auto k1 = k;
2055
2
        k1.remove_prefix(1);
2056
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2057
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2058
2
        decode_key(&k1, &out);
2059
2
        DCHECK_EQ(out.size(), 6) << k;
2060
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2061
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2062
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2063
0
            return 0;
2064
0
        }
2065
2
        last_scanned_table_id = table_id;
2066
2
        is_recycled = false;
2067
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2068
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2069
2
        std::unique_ptr<Transaction> txn;
2070
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2071
2
        if (err != TxnErrorCode::TXN_OK) {
2072
0
            return -1;
2073
0
        }
2074
2
        std::unique_ptr<RangeGetIterator> iter;
2075
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2076
2
        if (err != TxnErrorCode::TXN_OK) {
2077
0
            return -1;
2078
0
        }
2079
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2080
1
            return 0;
2081
1
        }
2082
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2083
        // 1. Remove all partition version kvs of this table
2084
1
        auto partition_version_key_begin =
2085
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2086
1
        auto partition_version_key_end =
2087
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2088
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2089
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2090
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2091
1
                     << " table_id=" << table_id;
2092
        // 2. Remove the table version kv of this table
2093
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2094
1
        txn->remove(tbl_version_key);
2095
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2096
        // 3. Remove mow delete bitmap update lock and tablet job lock
2097
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2098
1
        txn->remove(lock_key);
2099
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2100
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2101
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2102
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2103
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2104
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2105
1
                     << " table_id=" << table_id;
2106
1
        err = txn->commit();
2107
1
        if (err != TxnErrorCode::TXN_OK) {
2108
0
            return -1;
2109
0
        }
2110
1
        metrics_context.total_recycled_num = ++num_recycled;
2111
1
        metrics_context.report();
2112
1
        is_recycled = true;
2113
1
        return 0;
2114
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2052
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2053
2
        ++num_scanned;
2054
2
        auto k1 = k;
2055
2
        k1.remove_prefix(1);
2056
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2057
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2058
2
        decode_key(&k1, &out);
2059
2
        DCHECK_EQ(out.size(), 6) << k;
2060
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2061
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2062
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2063
0
            return 0;
2064
0
        }
2065
2
        last_scanned_table_id = table_id;
2066
2
        is_recycled = false;
2067
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2068
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2069
2
        std::unique_ptr<Transaction> txn;
2070
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2071
2
        if (err != TxnErrorCode::TXN_OK) {
2072
0
            return -1;
2073
0
        }
2074
2
        std::unique_ptr<RangeGetIterator> iter;
2075
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2076
2
        if (err != TxnErrorCode::TXN_OK) {
2077
0
            return -1;
2078
0
        }
2079
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2080
1
            return 0;
2081
1
        }
2082
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2083
        // 1. Remove all partition version kvs of this table
2084
1
        auto partition_version_key_begin =
2085
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2086
1
        auto partition_version_key_end =
2087
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2088
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2089
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2090
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2091
1
                     << " table_id=" << table_id;
2092
        // 2. Remove the table version kv of this table
2093
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2094
1
        txn->remove(tbl_version_key);
2095
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2096
        // 3. Remove mow delete bitmap update lock and tablet job lock
2097
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2098
1
        txn->remove(lock_key);
2099
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2100
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2101
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2102
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2103
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2104
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2105
1
                     << " table_id=" << table_id;
2106
1
        err = txn->commit();
2107
1
        if (err != TxnErrorCode::TXN_OK) {
2108
0
            return -1;
2109
0
        }
2110
1
        metrics_context.total_recycled_num = ++num_recycled;
2111
1
        metrics_context.report();
2112
1
        is_recycled = true;
2113
1
        return 0;
2114
1
    };
2115
2116
12
    if (config::enable_recycler_stats_metrics) {
2117
0
        scan_and_statistics_versions();
2118
0
    }
2119
    // recycle_func and loop_done for scan and recycle
2120
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2121
14
}
2122
2123
3
int InstanceRecycler::recycle_orphan_partitions() {
2124
3
    int64_t num_scanned = 0;
2125
3
    int64_t num_recycled = 0;
2126
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2127
2128
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2129
3
            .tag("instance_id", instance_id_);
2130
2131
3
    auto start_time = steady_clock::now();
2132
2133
3
    DORIS_CLOUD_DEFER {
2134
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2135
3
        metrics_context.finish_report();
2136
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2137
3
                .tag("instance_id", instance_id_)
2138
3
                .tag("num_scanned", num_scanned)
2139
3
                .tag("num_recycled", num_recycled);
2140
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2133
3
    DORIS_CLOUD_DEFER {
2134
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2135
3
        metrics_context.finish_report();
2136
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2137
3
                .tag("instance_id", instance_id_)
2138
3
                .tag("num_scanned", num_scanned)
2139
3
                .tag("num_recycled", num_recycled);
2140
3
    };
2141
2142
3
    bool is_empty_table = false;        // whether the table has no indexes
2143
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2144
3
    int64_t current_table_id = 0;       // current scanning table id
2145
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2146
3
                         &current_table_id, &is_table_kvs_recycled,
2147
3
                         this](std::string_view k, std::string_view) {
2148
2
        ++num_scanned;
2149
2150
2
        std::string_view k1(k);
2151
2
        int64_t db_id, table_id, partition_id;
2152
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2153
2
                                                            &partition_id)) {
2154
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2155
0
            return -1;
2156
2
        } else if (table_id != current_table_id) {
2157
2
            current_table_id = table_id;
2158
2
            is_table_kvs_recycled = false;
2159
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2160
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2161
2
            if (err != TxnErrorCode::TXN_OK) {
2162
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2163
0
                             << " table_id=" << table_id << " err=" << err;
2164
0
                return -1;
2165
0
            }
2166
2
        }
2167
2168
2
        if (!is_empty_table) {
2169
            // table is not empty, skip recycle
2170
1
            return 0;
2171
1
        }
2172
2173
1
        std::unique_ptr<Transaction> txn;
2174
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2175
1
        if (err != TxnErrorCode::TXN_OK) {
2176
0
            return -1;
2177
0
        }
2178
2179
        // 1. Remove all partition related kvs
2180
1
        std::string partition_meta_key =
2181
1
                versioned::meta_partition_key({instance_id_, partition_id});
2182
1
        std::string partition_index_key =
2183
1
                versioned::partition_index_key({instance_id_, partition_id});
2184
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2185
1
                {instance_id_, db_id, table_id, partition_id});
2186
1
        std::string partition_version_key =
2187
1
                versioned::partition_version_key({instance_id_, partition_id});
2188
1
        txn->remove(partition_index_key);
2189
1
        txn->remove(partition_inverted_key);
2190
1
        versioned_remove_all(txn.get(), partition_meta_key);
2191
1
        versioned_remove_all(txn.get(), partition_version_key);
2192
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2193
1
                     << " table_id=" << table_id << " db_id=" << db_id
2194
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2195
1
                     << " partition_version_key=" << hex(partition_version_key);
2196
2197
1
        if (!is_table_kvs_recycled) {
2198
1
            is_table_kvs_recycled = true;
2199
2200
            // 2. Remove the table version kv of this table
2201
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2202
1
            versioned_remove_all(txn.get(), table_version_key);
2203
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2204
            // 3. Remove mow delete bitmap update lock and tablet job lock
2205
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2206
1
            txn->remove(lock_key);
2207
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2208
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2209
1
            std::string tablet_job_key_end =
2210
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2211
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2212
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2213
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2214
1
                         << " table_id=" << table_id;
2215
1
        }
2216
2217
1
        err = txn->commit();
2218
1
        if (err != TxnErrorCode::TXN_OK) {
2219
0
            return -1;
2220
0
        }
2221
1
        metrics_context.total_recycled_num = ++num_recycled;
2222
1
        metrics_context.report();
2223
1
        return 0;
2224
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2147
2
                         this](std::string_view k, std::string_view) {
2148
2
        ++num_scanned;
2149
2150
2
        std::string_view k1(k);
2151
2
        int64_t db_id, table_id, partition_id;
2152
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2153
2
                                                            &partition_id)) {
2154
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2155
0
            return -1;
2156
2
        } else if (table_id != current_table_id) {
2157
2
            current_table_id = table_id;
2158
2
            is_table_kvs_recycled = false;
2159
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2160
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2161
2
            if (err != TxnErrorCode::TXN_OK) {
2162
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2163
0
                             << " table_id=" << table_id << " err=" << err;
2164
0
                return -1;
2165
0
            }
2166
2
        }
2167
2168
2
        if (!is_empty_table) {
2169
            // table is not empty, skip recycle
2170
1
            return 0;
2171
1
        }
2172
2173
1
        std::unique_ptr<Transaction> txn;
2174
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2175
1
        if (err != TxnErrorCode::TXN_OK) {
2176
0
            return -1;
2177
0
        }
2178
2179
        // 1. Remove all partition related kvs
2180
1
        std::string partition_meta_key =
2181
1
                versioned::meta_partition_key({instance_id_, partition_id});
2182
1
        std::string partition_index_key =
2183
1
                versioned::partition_index_key({instance_id_, partition_id});
2184
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2185
1
                {instance_id_, db_id, table_id, partition_id});
2186
1
        std::string partition_version_key =
2187
1
                versioned::partition_version_key({instance_id_, partition_id});
2188
1
        txn->remove(partition_index_key);
2189
1
        txn->remove(partition_inverted_key);
2190
1
        versioned_remove_all(txn.get(), partition_meta_key);
2191
1
        versioned_remove_all(txn.get(), partition_version_key);
2192
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2193
1
                     << " table_id=" << table_id << " db_id=" << db_id
2194
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2195
1
                     << " partition_version_key=" << hex(partition_version_key);
2196
2197
1
        if (!is_table_kvs_recycled) {
2198
1
            is_table_kvs_recycled = true;
2199
2200
            // 2. Remove the table version kv of this table
2201
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2202
1
            versioned_remove_all(txn.get(), table_version_key);
2203
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2204
            // 3. Remove mow delete bitmap update lock and tablet job lock
2205
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2206
1
            txn->remove(lock_key);
2207
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2208
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2209
1
            std::string tablet_job_key_end =
2210
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2211
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2212
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2213
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2214
1
                         << " table_id=" << table_id;
2215
1
        }
2216
2217
1
        err = txn->commit();
2218
1
        if (err != TxnErrorCode::TXN_OK) {
2219
0
            return -1;
2220
0
        }
2221
1
        metrics_context.total_recycled_num = ++num_recycled;
2222
1
        metrics_context.report();
2223
1
        return 0;
2224
1
    };
2225
2226
    // recycle_func and loop_done for scan and recycle
2227
3
    return scan_and_recycle(
2228
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2229
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2230
3
            std::move(recycle_func));
2231
3
}
2232
2233
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2234
                                      RecyclerMetricsContext& metrics_context,
2235
47
                                      int64_t partition_id) {
2236
47
    bool is_multi_version =
2237
47
            instance_info_.has_multi_version_status() &&
2238
47
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2239
47
    int64_t num_scanned = 0;
2240
47
    std::atomic_long num_recycled = 0;
2241
2242
47
    std::string tablet_key_begin, tablet_key_end;
2243
47
    std::string stats_key_begin, stats_key_end;
2244
47
    std::string job_key_begin, job_key_end;
2245
2246
47
    std::string tablet_belongs;
2247
47
    if (partition_id > 0) {
2248
        // recycle tablets in a partition belonging to the index
2249
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2250
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2251
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2252
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2253
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2254
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2255
33
        tablet_belongs = "partition";
2256
33
    } else {
2257
        // recycle tablets in the index
2258
14
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2259
14
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2260
14
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2261
14
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2262
14
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2263
14
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2264
14
        tablet_belongs = "index";
2265
14
    }
2266
2267
47
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2268
47
            .tag("table_id", table_id)
2269
47
            .tag("index_id", index_id)
2270
47
            .tag("partition_id", partition_id);
2271
2272
47
    auto start_time = steady_clock::now();
2273
2274
47
    DORIS_CLOUD_DEFER {
2275
47
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2276
47
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2277
47
                .tag("instance_id", instance_id_)
2278
47
                .tag("table_id", table_id)
2279
47
                .tag("index_id", index_id)
2280
47
                .tag("partition_id", partition_id)
2281
47
                .tag("num_scanned", num_scanned)
2282
47
                .tag("num_recycled", num_recycled);
2283
47
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2274
4
    DORIS_CLOUD_DEFER {
2275
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2276
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2277
4
                .tag("instance_id", instance_id_)
2278
4
                .tag("table_id", table_id)
2279
4
                .tag("index_id", index_id)
2280
4
                .tag("partition_id", partition_id)
2281
4
                .tag("num_scanned", num_scanned)
2282
4
                .tag("num_recycled", num_recycled);
2283
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2274
43
    DORIS_CLOUD_DEFER {
2275
43
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2276
43
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2277
43
                .tag("instance_id", instance_id_)
2278
43
                .tag("table_id", table_id)
2279
43
                .tag("index_id", index_id)
2280
43
                .tag("partition_id", partition_id)
2281
43
                .tag("num_scanned", num_scanned)
2282
43
                .tag("num_recycled", num_recycled);
2283
43
    };
2284
2285
    // The first string_view represents the tablet key which has been recycled
2286
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
2287
47
    using TabletKeyPair = std::pair<std::string_view, bool>;
2288
47
    SyncExecutor<TabletKeyPair> sync_executor(
2289
47
            _thread_pool_group.recycle_tablet_pool,
2290
47
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2291
47
                        index_id, partition_id),
2292
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2292
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2292
235
            [](const TabletKeyPair& k) { return k.first.empty(); });
2293
2294
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
2295
47
    std::vector<std::string> tablet_idx_keys;
2296
47
    std::vector<std::string> restore_job_keys;
2297
47
    std::vector<std::string> init_rs_keys;
2298
47
    std::vector<std::string> tablet_compact_stats_keys;
2299
47
    std::vector<std::string> tablet_load_stats_keys;
2300
47
    std::vector<std::string> versioned_meta_tablet_keys;
2301
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2302
8.24k
        bool use_range_remove = true;
2303
8.24k
        ++num_scanned;
2304
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
2305
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2306
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2307
0
            use_range_remove = false;
2308
0
            return -1;
2309
0
        }
2310
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2311
2312
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2313
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2314
4.00k
            return -1;
2315
4.00k
        }
2316
2317
4.24k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2318
4.24k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2319
4.24k
        if (is_multi_version) {
2320
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2321
6
            tablet_compact_stats_keys.push_back(
2322
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2323
6
            tablet_load_stats_keys.push_back(
2324
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2325
6
            versioned_meta_tablet_keys.push_back(
2326
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2327
6
        }
2328
4.24k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2329
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2330
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2331
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2332
0
                LOG_WARNING("failed to recycle tablet")
2333
0
                        .tag("instance_id", instance_id_)
2334
0
                        .tag("tablet_id", tid);
2335
0
                range_move = false;
2336
0
                return {std::string_view(), range_move};
2337
0
            }
2338
4.23k
            ++num_recycled;
2339
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2340
4.23k
            return {k, range_move};
2341
4.23k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2330
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2331
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2332
0
                LOG_WARNING("failed to recycle tablet")
2333
0
                        .tag("instance_id", instance_id_)
2334
0
                        .tag("tablet_id", tid);
2335
0
                range_move = false;
2336
0
                return {std::string_view(), range_move};
2337
0
            }
2338
4.00k
            ++num_recycled;
2339
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2340
4.00k
            return {k, range_move};
2341
4.00k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2330
235
                           &metrics_context, k]() mutable -> TabletKeyPair {
2331
235
            if (recycle_tablet(tid, metrics_context) != 0) {
2332
0
                LOG_WARNING("failed to recycle tablet")
2333
0
                        .tag("instance_id", instance_id_)
2334
0
                        .tag("tablet_id", tid);
2335
0
                range_move = false;
2336
0
                return {std::string_view(), range_move};
2337
0
            }
2338
235
            ++num_recycled;
2339
235
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2340
235
            return {k, range_move};
2341
235
        });
2342
4.23k
        return 0;
2343
4.24k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2301
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2302
8.00k
        bool use_range_remove = true;
2303
8.00k
        ++num_scanned;
2304
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2305
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2306
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2307
0
            use_range_remove = false;
2308
0
            return -1;
2309
0
        }
2310
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2311
2312
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2313
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2314
4.00k
            return -1;
2315
4.00k
        }
2316
2317
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2318
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2319
4.00k
        if (is_multi_version) {
2320
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2321
0
            tablet_compact_stats_keys.push_back(
2322
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2323
0
            tablet_load_stats_keys.push_back(
2324
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2325
0
            versioned_meta_tablet_keys.push_back(
2326
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2327
0
        }
2328
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2329
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2330
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2331
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2332
4.00k
                LOG_WARNING("failed to recycle tablet")
2333
4.00k
                        .tag("instance_id", instance_id_)
2334
4.00k
                        .tag("tablet_id", tid);
2335
4.00k
                range_move = false;
2336
4.00k
                return {std::string_view(), range_move};
2337
4.00k
            }
2338
4.00k
            ++num_recycled;
2339
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2340
4.00k
            return {k, range_move};
2341
4.00k
        });
2342
4.00k
        return 0;
2343
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2301
238
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2302
238
        bool use_range_remove = true;
2303
238
        ++num_scanned;
2304
238
        doris::TabletMetaCloudPB tablet_meta_pb;
2305
238
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2306
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2307
0
            use_range_remove = false;
2308
0
            return -1;
2309
0
        }
2310
238
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2311
2312
238
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2313
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2314
0
            return -1;
2315
0
        }
2316
2317
238
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2318
238
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2319
238
        if (is_multi_version) {
2320
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2321
6
            tablet_compact_stats_keys.push_back(
2322
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2323
6
            tablet_load_stats_keys.push_back(
2324
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2325
6
            versioned_meta_tablet_keys.push_back(
2326
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2327
6
        }
2328
238
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2329
235
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2330
235
                           &metrics_context, k]() mutable -> TabletKeyPair {
2331
235
            if (recycle_tablet(tid, metrics_context) != 0) {
2332
235
                LOG_WARNING("failed to recycle tablet")
2333
235
                        .tag("instance_id", instance_id_)
2334
235
                        .tag("tablet_id", tid);
2335
235
                range_move = false;
2336
235
                return {std::string_view(), range_move};
2337
235
            }
2338
235
            ++num_recycled;
2339
235
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2340
235
            return {k, range_move};
2341
235
        });
2342
235
        return 0;
2343
238
    };
2344
2345
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2346
47
    auto loop_done = [&, this]() -> int {
2347
47
        bool finished = true;
2348
47
        auto tablet_keys = sync_executor.when_all(&finished);
2349
47
        if (!finished) {
2350
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2351
0
            return -1;
2352
0
        }
2353
47
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2354
        // sort the vector using key's order
2355
45
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2356
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2356
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2356
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2357
45
        bool use_range_remove = true;
2358
4.23k
        for (auto& [_, remove] : tablet_keys) {
2359
4.23k
            if (!remove) {
2360
0
                use_range_remove = remove;
2361
0
                break;
2362
0
            }
2363
4.23k
        }
2364
45
        DORIS_CLOUD_DEFER {
2365
45
            tablet_idx_keys.clear();
2366
45
            restore_job_keys.clear();
2367
45
            init_rs_keys.clear();
2368
45
            tablet_compact_stats_keys.clear();
2369
45
            tablet_load_stats_keys.clear();
2370
45
            versioned_meta_tablet_keys.clear();
2371
45
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2364
2
        DORIS_CLOUD_DEFER {
2365
2
            tablet_idx_keys.clear();
2366
2
            restore_job_keys.clear();
2367
2
            init_rs_keys.clear();
2368
2
            tablet_compact_stats_keys.clear();
2369
2
            tablet_load_stats_keys.clear();
2370
2
            versioned_meta_tablet_keys.clear();
2371
2
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2364
43
        DORIS_CLOUD_DEFER {
2365
43
            tablet_idx_keys.clear();
2366
43
            restore_job_keys.clear();
2367
43
            init_rs_keys.clear();
2368
43
            tablet_compact_stats_keys.clear();
2369
43
            tablet_load_stats_keys.clear();
2370
43
            versioned_meta_tablet_keys.clear();
2371
43
        };
2372
45
        std::unique_ptr<Transaction> txn;
2373
45
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2374
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2375
0
            return -1;
2376
0
        }
2377
45
        std::string tablet_key_end;
2378
45
        if (!tablet_keys.empty()) {
2379
43
            if (use_range_remove) {
2380
43
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2381
43
                txn->remove(tablet_keys.front().first, tablet_key_end);
2382
43
            } else {
2383
0
                for (auto& [k, _] : tablet_keys) {
2384
0
                    txn->remove(k);
2385
0
                }
2386
0
            }
2387
43
        }
2388
45
        if (is_multi_version) {
2389
6
            for (auto& k : tablet_compact_stats_keys) {
2390
                // Remove all versions of tablet compact stats for recycled tablet
2391
6
                LOG_INFO("remove versioned tablet compact stats key")
2392
6
                        .tag("compact_stats_key", hex(k));
2393
6
                versioned_remove_all(txn.get(), k);
2394
6
            }
2395
6
            for (auto& k : tablet_load_stats_keys) {
2396
                // Remove all versions of tablet load stats for recycled tablet
2397
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2398
6
                versioned_remove_all(txn.get(), k);
2399
6
            }
2400
6
            for (auto& k : versioned_meta_tablet_keys) {
2401
                // Remove all versions of meta tablet for recycled tablet
2402
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2403
6
                versioned_remove_all(txn.get(), k);
2404
6
            }
2405
5
        }
2406
4.24k
        for (auto& k : tablet_idx_keys) {
2407
4.24k
            txn->remove(k);
2408
4.24k
        }
2409
4.24k
        for (auto& k : restore_job_keys) {
2410
4.24k
            txn->remove(k);
2411
4.24k
        }
2412
45
        for (auto& k : init_rs_keys) {
2413
0
            txn->remove(k);
2414
0
        }
2415
45
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2416
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2417
0
                         << ", err=" << err;
2418
0
            return -1;
2419
0
        }
2420
45
        return 0;
2421
45
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2346
4
    auto loop_done = [&, this]() -> int {
2347
4
        bool finished = true;
2348
4
        auto tablet_keys = sync_executor.when_all(&finished);
2349
4
        if (!finished) {
2350
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2351
0
            return -1;
2352
0
        }
2353
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2354
        // sort the vector using key's order
2355
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2356
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2357
2
        bool use_range_remove = true;
2358
4.00k
        for (auto& [_, remove] : tablet_keys) {
2359
4.00k
            if (!remove) {
2360
0
                use_range_remove = remove;
2361
0
                break;
2362
0
            }
2363
4.00k
        }
2364
2
        DORIS_CLOUD_DEFER {
2365
2
            tablet_idx_keys.clear();
2366
2
            restore_job_keys.clear();
2367
2
            init_rs_keys.clear();
2368
2
            tablet_compact_stats_keys.clear();
2369
2
            tablet_load_stats_keys.clear();
2370
2
            versioned_meta_tablet_keys.clear();
2371
2
        };
2372
2
        std::unique_ptr<Transaction> txn;
2373
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2374
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2375
0
            return -1;
2376
0
        }
2377
2
        std::string tablet_key_end;
2378
2
        if (!tablet_keys.empty()) {
2379
2
            if (use_range_remove) {
2380
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2381
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2382
2
            } else {
2383
0
                for (auto& [k, _] : tablet_keys) {
2384
0
                    txn->remove(k);
2385
0
                }
2386
0
            }
2387
2
        }
2388
2
        if (is_multi_version) {
2389
0
            for (auto& k : tablet_compact_stats_keys) {
2390
                // Remove all versions of tablet compact stats for recycled tablet
2391
0
                LOG_INFO("remove versioned tablet compact stats key")
2392
0
                        .tag("compact_stats_key", hex(k));
2393
0
                versioned_remove_all(txn.get(), k);
2394
0
            }
2395
0
            for (auto& k : tablet_load_stats_keys) {
2396
                // Remove all versions of tablet load stats for recycled tablet
2397
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2398
0
                versioned_remove_all(txn.get(), k);
2399
0
            }
2400
0
            for (auto& k : versioned_meta_tablet_keys) {
2401
                // Remove all versions of meta tablet for recycled tablet
2402
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2403
0
                versioned_remove_all(txn.get(), k);
2404
0
            }
2405
0
        }
2406
4.00k
        for (auto& k : tablet_idx_keys) {
2407
4.00k
            txn->remove(k);
2408
4.00k
        }
2409
4.00k
        for (auto& k : restore_job_keys) {
2410
4.00k
            txn->remove(k);
2411
4.00k
        }
2412
2
        for (auto& k : init_rs_keys) {
2413
0
            txn->remove(k);
2414
0
        }
2415
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2416
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2417
0
                         << ", err=" << err;
2418
0
            return -1;
2419
0
        }
2420
2
        return 0;
2421
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2346
43
    auto loop_done = [&, this]() -> int {
2347
43
        bool finished = true;
2348
43
        auto tablet_keys = sync_executor.when_all(&finished);
2349
43
        if (!finished) {
2350
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2351
0
            return -1;
2352
0
        }
2353
43
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2354
        // sort the vector using key's order
2355
43
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2356
43
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2357
43
        bool use_range_remove = true;
2358
235
        for (auto& [_, remove] : tablet_keys) {
2359
235
            if (!remove) {
2360
0
                use_range_remove = remove;
2361
0
                break;
2362
0
            }
2363
235
        }
2364
43
        DORIS_CLOUD_DEFER {
2365
43
            tablet_idx_keys.clear();
2366
43
            restore_job_keys.clear();
2367
43
            init_rs_keys.clear();
2368
43
            tablet_compact_stats_keys.clear();
2369
43
            tablet_load_stats_keys.clear();
2370
43
            versioned_meta_tablet_keys.clear();
2371
43
        };
2372
43
        std::unique_ptr<Transaction> txn;
2373
43
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2374
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2375
0
            return -1;
2376
0
        }
2377
43
        std::string tablet_key_end;
2378
43
        if (!tablet_keys.empty()) {
2379
41
            if (use_range_remove) {
2380
41
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2381
41
                txn->remove(tablet_keys.front().first, tablet_key_end);
2382
41
            } else {
2383
0
                for (auto& [k, _] : tablet_keys) {
2384
0
                    txn->remove(k);
2385
0
                }
2386
0
            }
2387
41
        }
2388
43
        if (is_multi_version) {
2389
6
            for (auto& k : tablet_compact_stats_keys) {
2390
                // Remove all versions of tablet compact stats for recycled tablet
2391
6
                LOG_INFO("remove versioned tablet compact stats key")
2392
6
                        .tag("compact_stats_key", hex(k));
2393
6
                versioned_remove_all(txn.get(), k);
2394
6
            }
2395
6
            for (auto& k : tablet_load_stats_keys) {
2396
                // Remove all versions of tablet load stats for recycled tablet
2397
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2398
6
                versioned_remove_all(txn.get(), k);
2399
6
            }
2400
6
            for (auto& k : versioned_meta_tablet_keys) {
2401
                // Remove all versions of meta tablet for recycled tablet
2402
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2403
6
                versioned_remove_all(txn.get(), k);
2404
6
            }
2405
5
        }
2406
238
        for (auto& k : tablet_idx_keys) {
2407
238
            txn->remove(k);
2408
238
        }
2409
238
        for (auto& k : restore_job_keys) {
2410
238
            txn->remove(k);
2411
238
        }
2412
43
        for (auto& k : init_rs_keys) {
2413
0
            txn->remove(k);
2414
0
        }
2415
43
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2416
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2417
0
                         << ", err=" << err;
2418
0
            return -1;
2419
0
        }
2420
43
        return 0;
2421
43
    };
2422
2423
47
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
2424
47
                               std::move(loop_done));
2425
47
    if (ret != 0) {
2426
2
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
2427
2
        return ret;
2428
2
    }
2429
2430
    // directly remove tablet stats and tablet jobs of these dropped index or partition
2431
45
    std::unique_ptr<Transaction> txn;
2432
45
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2433
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
2434
0
        return -1;
2435
0
    }
2436
45
    txn->remove(stats_key_begin, stats_key_end);
2437
45
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
2438
45
                 << " end=" << hex(stats_key_end);
2439
45
    txn->remove(job_key_begin, job_key_end);
2440
45
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
2441
45
    std::string schema_key_begin, schema_key_end;
2442
45
    std::string schema_dict_key;
2443
45
    std::string versioned_schema_key_begin, versioned_schema_key_end;
2444
45
    if (partition_id <= 0) {
2445
        // Delete schema kv of this index
2446
13
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
2447
13
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
2448
13
        txn->remove(schema_key_begin, schema_key_end);
2449
13
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
2450
13
                     << " end=" << hex(schema_key_end);
2451
13
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
2452
13
        txn->remove(schema_dict_key);
2453
13
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
2454
13
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
2455
13
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
2456
13
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
2457
13
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
2458
13
                     << " end=" << hex(versioned_schema_key_end);
2459
13
    }
2460
2461
45
    TxnErrorCode err = txn->commit();
2462
45
    if (err != TxnErrorCode::TXN_OK) {
2463
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
2464
0
                     << " err=" << err;
2465
0
        return -1;
2466
0
    }
2467
2468
45
    return ret;
2469
45
}
2470
2471
4.03k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
2472
4.03k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
2473
4.03k
    int64_t num_segments = rs_meta_pb.num_segments();
2474
4.03k
    if (num_segments <= 0) return 0;
2475
2476
4.03k
    std::vector<std::string> file_paths;
2477
4.03k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
2478
0
        return -1;
2479
0
    }
2480
2481
    // Process inverted indexes
2482
4.03k
    std::vector<std::pair<int64_t, std::string>> index_ids;
2483
    // default format as v1.
2484
4.03k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2485
4.03k
    bool delete_rowset_data_by_prefix = false;
2486
4.03k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2487
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2488
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2489
0
        delete_rowset_data_by_prefix = true;
2490
4.03k
    } else if (rs_meta_pb.has_tablet_schema()) {
2491
8.00k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
2492
8.00k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2493
8.00k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2494
8.00k
            }
2495
8.00k
        }
2496
4.00k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
2497
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
2498
2.00k
        }
2499
4.00k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
2500
        // schema version and index id are not found, delete rowset data by prefix directly.
2501
0
        delete_rowset_data_by_prefix = true;
2502
28
    } else {
2503
        // otherwise, try to get schema kv
2504
28
        InvertedIndexInfo index_info;
2505
28
        int inverted_index_get_ret = inverted_index_id_cache_->get(
2506
28
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
2507
28
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2508
28
                                 &inverted_index_get_ret);
2509
28
        if (inverted_index_get_ret == 0) {
2510
27
            index_format = index_info.first;
2511
27
            index_ids = index_info.second;
2512
27
        } else if (inverted_index_get_ret == 1) {
2513
            // 1. Schema kv not found means tablet has been recycled
2514
            // Maybe some tablet recycle failed by some bugs
2515
            // We need to delete again to double check
2516
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2517
            // because we are uncertain about the inverted index information.
2518
            // If there are inverted indexes, some data might not be deleted,
2519
            // but this is acceptable as we have made our best effort to delete the data.
2520
1
            LOG_INFO(
2521
1
                    "delete rowset data schema kv not found, need to delete again to double "
2522
1
                    "check")
2523
1
                    .tag("instance_id", instance_id_)
2524
1
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2525
1
                    .tag("rowset", rs_meta_pb.ShortDebugString());
2526
            // Currently index_ids is guaranteed to be empty,
2527
            // but we clear it again here as a safeguard against future code changes
2528
            // that might cause index_ids to no longer be empty
2529
1
            index_format = InvertedIndexStorageFormatPB::V2;
2530
1
            index_ids.clear();
2531
1
        } else {
2532
            // failed to get schema kv, delete rowset data by prefix directly.
2533
0
            delete_rowset_data_by_prefix = true;
2534
0
        }
2535
28
    }
2536
2537
4.03k
    if (delete_rowset_data_by_prefix) {
2538
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
2539
0
                                  rs_meta_pb.rowset_id_v2());
2540
0
    }
2541
2542
4.03k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
2543
4.03k
    if (it == accessor_map_.end()) {
2544
0
        LOG_WARNING("instance has no such resource id")
2545
0
                .tag("instance_id", instance_id_)
2546
0
                .tag("resource_id", rs_meta_pb.resource_id());
2547
0
        return -1;
2548
0
    }
2549
4.03k
    auto& accessor = it->second;
2550
2551
4.03k
    int64_t tablet_id = rs_meta_pb.tablet_id();
2552
4.03k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
2553
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
2554
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2555
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
2556
40.0k
            for (const auto& index_id : index_ids) {
2557
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
2558
40.0k
                                                            index_id.second));
2559
40.0k
            }
2560
20.0k
        } else if (!index_ids.empty()) {
2561
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
2562
0
        }
2563
20.0k
    }
2564
2565
    // Process delete bitmap
2566
4.03k
    file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2567
    // TODO(AlexYue): seems could do do batch
2568
4.03k
    return accessor->delete_files(file_paths);
2569
4.03k
}
2570
2571
60.6k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
2572
60.6k
    LOG_INFO("begin process_packed_file_location_index")
2573
60.6k
            .tag("instance_id", instance_id_)
2574
60.6k
            .tag("tablet_id", rs_meta_pb.tablet_id())
2575
60.6k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2576
60.6k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
2577
60.6k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
2578
60.6k
    if (index_map.empty()) {
2579
60.6k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
2580
60.6k
                .tag("instance_id", instance_id_)
2581
60.6k
                .tag("tablet_id", rs_meta_pb.tablet_id())
2582
60.6k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
2583
60.6k
        return 0;
2584
60.6k
    }
2585
12
    struct PackedSmallFileInfo {
2586
12
        std::string small_file_path;
2587
12
    };
2588
12
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
2589
12
    packed_file_updates.reserve(index_map.size());
2590
25
    for (const auto& [small_path, index_pb] : index_map) {
2591
25
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
2592
0
            continue;
2593
0
        }
2594
25
        packed_file_updates[index_pb.packed_file_path()].push_back(
2595
25
                PackedSmallFileInfo {small_path});
2596
25
    }
2597
12
    if (packed_file_updates.empty()) {
2598
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
2599
0
                .tag("instance_id", instance_id_)
2600
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
2601
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2602
0
                .tag("index_map_size", index_map.size());
2603
0
        return 0;
2604
0
    }
2605
2606
12
    int ret = 0;
2607
23
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
2608
23
        if (small_files.empty()) {
2609
0
            continue;
2610
0
        }
2611
2612
23
        bool success = false;
2613
23
        do {
2614
23
            std::unique_ptr<Transaction> txn;
2615
23
            TxnErrorCode err = txn_kv_->create_txn(&txn);
2616
23
            if (err != TxnErrorCode::TXN_OK) {
2617
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
2618
0
                        .tag("instance_id", instance_id_)
2619
0
                        .tag("packed_file_path", packed_file_path)
2620
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2621
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2622
0
                        .tag("err", err);
2623
0
                ret = -1;
2624
0
                break;
2625
0
            }
2626
2627
23
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
2628
23
            std::string packed_val;
2629
23
            err = txn->get(packed_key, &packed_val);
2630
23
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2631
0
                LOG_WARNING("packed file info not found when recycling rowset")
2632
0
                        .tag("instance_id", instance_id_)
2633
0
                        .tag("packed_file_path", packed_file_path)
2634
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2635
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2636
0
                        .tag("key", hex(packed_key))
2637
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
2638
                // Skip this packed file entry and continue with others
2639
0
                success = true;
2640
0
                break;
2641
0
            }
2642
23
            if (err != TxnErrorCode::TXN_OK) {
2643
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
2644
0
                        .tag("instance_id", instance_id_)
2645
0
                        .tag("packed_file_path", packed_file_path)
2646
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2647
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2648
0
                        .tag("err", err);
2649
0
                ret = -1;
2650
0
                break;
2651
0
            }
2652
2653
23
            cloud::PackedFileInfoPB packed_info;
2654
23
            if (!packed_info.ParseFromString(packed_val)) {
2655
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
2656
0
                        .tag("instance_id", instance_id_)
2657
0
                        .tag("packed_file_path", packed_file_path)
2658
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2659
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
2660
0
                ret = -1;
2661
0
                break;
2662
0
            }
2663
2664
23
            LOG_INFO("packed file update check")
2665
23
                    .tag("instance_id", instance_id_)
2666
23
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2667
23
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2668
23
                    .tag("merged_file_path", packed_file_path)
2669
23
                    .tag("requested_small_files", small_files.size())
2670
23
                    .tag("merge_entries", packed_info.slices_size());
2671
2672
23
            auto* small_file_entries = packed_info.mutable_slices();
2673
23
            int64_t changed_files = 0;
2674
23
            int64_t missing_entries = 0;
2675
23
            int64_t already_deleted = 0;
2676
25
            for (const auto& small_file_info : small_files) {
2677
25
                bool found = false;
2678
84
                for (auto& small_file_entry : *small_file_entries) {
2679
84
                    if (small_file_entry.path() == small_file_info.small_file_path) {
2680
25
                        if (!small_file_entry.deleted()) {
2681
25
                            small_file_entry.set_deleted(true);
2682
25
                            if (!small_file_entry.corrected()) {
2683
25
                                small_file_entry.set_corrected(true);
2684
25
                            }
2685
25
                            ++changed_files;
2686
25
                        } else {
2687
0
                            ++already_deleted;
2688
0
                        }
2689
25
                        found = true;
2690
25
                        break;
2691
25
                    }
2692
84
                }
2693
25
                if (!found) {
2694
0
                    ++missing_entries;
2695
0
                    LOG_WARNING("packed file info missing small file entry")
2696
0
                            .tag("instance_id", instance_id_)
2697
0
                            .tag("packed_file_path", packed_file_path)
2698
0
                            .tag("small_file_path", small_file_info.small_file_path)
2699
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2700
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
2701
0
                }
2702
25
            }
2703
2704
23
            if (changed_files == 0) {
2705
0
                LOG_INFO("skip merge file update: no merge entries changed")
2706
0
                        .tag("instance_id", instance_id_)
2707
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2708
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2709
0
                        .tag("merged_file_path", packed_file_path)
2710
0
                        .tag("missing_entries", missing_entries)
2711
0
                        .tag("already_deleted", already_deleted)
2712
0
                        .tag("requested_small_files", small_files.size())
2713
0
                        .tag("merge_entries", packed_info.slices_size());
2714
0
                success = true;
2715
0
                break;
2716
0
            }
2717
2718
23
            int64_t left_file_count = 0;
2719
23
            int64_t left_file_bytes = 0;
2720
139
            for (const auto& small_file_entry : packed_info.slices()) {
2721
139
                if (!small_file_entry.deleted()) {
2722
57
                    ++left_file_count;
2723
57
                    left_file_bytes += small_file_entry.size();
2724
57
                }
2725
139
            }
2726
23
            packed_info.set_remaining_slice_bytes(left_file_bytes);
2727
23
            packed_info.set_ref_cnt(left_file_count);
2728
23
            LOG_INFO("updated packed file reference info")
2729
23
                    .tag("instance_id", instance_id_)
2730
23
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2731
23
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2732
23
                    .tag("packed_file_path", packed_file_path)
2733
23
                    .tag("ref_cnt", left_file_count)
2734
23
                    .tag("left_file_bytes", left_file_bytes);
2735
2736
23
            if (left_file_count == 0) {
2737
6
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
2738
6
            }
2739
2740
23
            std::string updated_val;
2741
23
            if (!packed_info.SerializeToString(&updated_val)) {
2742
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
2743
0
                        .tag("instance_id", instance_id_)
2744
0
                        .tag("packed_file_path", packed_file_path)
2745
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2746
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
2747
0
                ret = -1;
2748
0
                break;
2749
0
            }
2750
2751
23
            txn->put(packed_key, updated_val);
2752
23
            err = txn->commit();
2753
23
            if (err == TxnErrorCode::TXN_OK) {
2754
23
                success = true;
2755
23
                if (left_file_count == 0) {
2756
6
                    LOG_INFO("packed file ready to delete, deleting immediately")
2757
6
                            .tag("instance_id", instance_id_)
2758
6
                            .tag("packed_file_path", packed_file_path);
2759
6
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
2760
0
                        ret = -1;
2761
0
                    }
2762
6
                }
2763
23
                break;
2764
23
            }
2765
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
2766
0
                LOG_WARNING("packed file info update conflict, not retrying")
2767
0
                        .tag("instance_id", instance_id_)
2768
0
                        .tag("packed_file_path", packed_file_path)
2769
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2770
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2771
0
                        .tag("changed_files", changed_files);
2772
0
                ret = -1;
2773
0
                break;
2774
0
            }
2775
2776
0
            LOG_WARNING("failed to commit packed file info update")
2777
0
                    .tag("instance_id", instance_id_)
2778
0
                    .tag("packed_file_path", packed_file_path)
2779
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2780
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2781
0
                    .tag("err", err)
2782
0
                    .tag("changed_files", changed_files);
2783
0
            ret = -1;
2784
0
            break;
2785
0
        } while (false);
2786
2787
23
        if (!success) {
2788
0
            ret = -1;
2789
0
        }
2790
23
    }
2791
2792
12
    return ret;
2793
12
}
2794
2795
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
2796
                                                const std::string& packed_key,
2797
6
                                                const cloud::PackedFileInfoPB& packed_info) {
2798
6
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
2799
0
        LOG_WARNING("packed file missing resource id when recycling")
2800
0
                .tag("instance_id", instance_id_)
2801
0
                .tag("packed_file_path", packed_file_path);
2802
0
        return -1;
2803
0
    }
2804
2805
6
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
2806
6
    if (!accessor) {
2807
0
        LOG_WARNING("no accessor available to delete packed file")
2808
0
                .tag("instance_id", instance_id_)
2809
0
                .tag("packed_file_path", packed_file_path)
2810
0
                .tag("resource_id", packed_info.resource_id());
2811
0
        return -1;
2812
0
    }
2813
2814
6
    int del_ret = accessor->delete_file(packed_file_path);
2815
6
    if (del_ret != 0 && del_ret != 1) {
2816
0
        LOG_WARNING("failed to delete packed file")
2817
0
                .tag("instance_id", instance_id_)
2818
0
                .tag("packed_file_path", packed_file_path)
2819
0
                .tag("resource_id", resource_id)
2820
0
                .tag("ret", del_ret);
2821
0
        return -1;
2822
0
    }
2823
6
    if (del_ret == 1) {
2824
0
        LOG_INFO("packed file already removed")
2825
0
                .tag("instance_id", instance_id_)
2826
0
                .tag("packed_file_path", packed_file_path)
2827
0
                .tag("resource_id", resource_id);
2828
6
    } else {
2829
6
        LOG_INFO("deleted packed file")
2830
6
                .tag("instance_id", instance_id_)
2831
6
                .tag("packed_file_path", packed_file_path)
2832
6
                .tag("resource_id", resource_id);
2833
6
    }
2834
2835
6
    std::unique_ptr<Transaction> del_txn;
2836
6
    TxnErrorCode err = txn_kv_->create_txn(&del_txn);
2837
6
    if (err != TxnErrorCode::TXN_OK) {
2838
0
        LOG_WARNING("failed to create txn when removing packed file kv")
2839
0
                .tag("instance_id", instance_id_)
2840
0
                .tag("packed_file_path", packed_file_path)
2841
0
                .tag("err", err);
2842
0
        return -1;
2843
0
    }
2844
2845
6
    std::string latest_val;
2846
6
    err = del_txn->get(packed_key, &latest_val);
2847
6
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2848
0
        return 0;
2849
0
    }
2850
6
    if (err != TxnErrorCode::TXN_OK) {
2851
0
        LOG_WARNING("failed to re-read packed file kv before removal")
2852
0
                .tag("instance_id", instance_id_)
2853
0
                .tag("packed_file_path", packed_file_path)
2854
0
                .tag("err", err);
2855
0
        return -1;
2856
0
    }
2857
2858
6
    cloud::PackedFileInfoPB latest_info;
2859
6
    if (!latest_info.ParseFromString(latest_val)) {
2860
0
        LOG_WARNING("failed to parse packed file info before removal")
2861
0
                .tag("instance_id", instance_id_)
2862
0
                .tag("packed_file_path", packed_file_path);
2863
0
        return -1;
2864
0
    }
2865
2866
6
    if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
2867
6
          latest_info.ref_cnt() == 0)) {
2868
0
        LOG_INFO("packed file state changed before removal, skip deleting kv")
2869
0
                .tag("instance_id", instance_id_)
2870
0
                .tag("packed_file_path", packed_file_path);
2871
0
        return 0;
2872
0
    }
2873
2874
6
    del_txn->remove(packed_key);
2875
6
    err = del_txn->commit();
2876
6
    if (err == TxnErrorCode::TXN_OK) {
2877
6
        LOG_INFO("removed packed file metadata")
2878
6
                .tag("instance_id", instance_id_)
2879
6
                .tag("packed_file_path", packed_file_path);
2880
6
        return 0;
2881
6
    }
2882
0
    if (err == TxnErrorCode::TXN_CONFLICT) {
2883
0
        LOG_WARNING("failed to remove packed file kv due to conflict")
2884
0
                .tag("instance_id", instance_id_)
2885
0
                .tag("packed_file_path", packed_file_path);
2886
0
        return -1;
2887
0
    }
2888
0
    LOG_WARNING("failed to remove packed file kv")
2889
0
            .tag("instance_id", instance_id_)
2890
0
            .tag("packed_file_path", packed_file_path)
2891
0
            .tag("err", err);
2892
0
    return -1;
2893
0
}
2894
2895
int InstanceRecycler::delete_rowset_data(
2896
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
2897
38
        RecyclerMetricsContext& metrics_context) {
2898
38
    int ret = 0;
2899
    // resource_id -> file_paths
2900
38
    std::map<std::string, std::vector<std::string>> resource_file_paths;
2901
    // (resource_id, tablet_id, rowset_id)
2902
38
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
2903
38
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
2904
2905
54.1k
    for (const auto& [_, rs] : rowsets) {
2906
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
2907
        // due to aborted schema change.
2908
54.1k
        if (is_formal_rowset) {
2909
3.12k
            std::lock_guard lock(recycled_tablets_mtx_);
2910
3.12k
            if (recycled_tablets_.count(rs.tablet_id())) {
2911
0
                continue; // Rowset data has already been deleted
2912
0
            }
2913
3.12k
        }
2914
2915
54.1k
        auto it = accessor_map_.find(rs.resource_id());
2916
        // possible if the accessor is not initilized correctly
2917
54.1k
        if (it == accessor_map_.end()) [[unlikely]] {
2918
1
            LOG_WARNING("instance has no such resource id")
2919
1
                    .tag("instance_id", instance_id_)
2920
1
                    .tag("resource_id", rs.resource_id());
2921
1
            ret = -1;
2922
1
            continue;
2923
1
        }
2924
2925
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
2926
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
2927
54.1k
        int64_t tablet_id = rs.tablet_id();
2928
54.1k
        LOG_INFO("recycle rowset merge index size")
2929
54.1k
                .tag("instance_id", instance_id_)
2930
54.1k
                .tag("tablet_id", tablet_id)
2931
54.1k
                .tag("rowset_id", rowset_id)
2932
54.1k
                .tag("merge_index_size", rs.packed_slice_locations_size());
2933
54.1k
        if (decrement_packed_file_ref_counts(rs) != 0) {
2934
0
            ret = -1;
2935
0
            continue;
2936
0
        }
2937
54.1k
        int64_t num_segments = rs.num_segments();
2938
54.1k
        if (num_segments <= 0) {
2939
0
            metrics_context.total_recycled_num++;
2940
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
2941
0
            continue;
2942
0
        }
2943
2944
        // Process delete bitmap
2945
54.1k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2946
2947
        // Process inverted indexes
2948
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
2949
        // default format as v1.
2950
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2951
54.1k
        int inverted_index_get_ret = 0;
2952
54.1k
        if (rs.has_tablet_schema()) {
2953
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
2954
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2955
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2956
53.5k
                }
2957
53.5k
            }
2958
26.6k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
2959
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
2960
26.5k
            }
2961
27.5k
        } else {
2962
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
2963
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
2964
0
                                "instance_id="
2965
0
                             << instance_id_ << " tablet_id=" << tablet_id
2966
0
                             << " rowset_id=" << rowset_id;
2967
0
                ret = -1;
2968
0
                continue;
2969
0
            }
2970
27.5k
            InvertedIndexInfo index_info;
2971
27.5k
            inverted_index_get_ret =
2972
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
2973
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2974
27.5k
                                     &inverted_index_get_ret);
2975
27.5k
            if (inverted_index_get_ret == 0) {
2976
27.0k
                index_format = index_info.first;
2977
27.0k
                index_ids = index_info.second;
2978
27.0k
            } else if (inverted_index_get_ret == 1) {
2979
                // 1. Schema kv not found means tablet has been recycled
2980
                // Maybe some tablet recycle failed by some bugs
2981
                // We need to delete again to double check
2982
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2983
                // because we are uncertain about the inverted index information.
2984
                // If there are inverted indexes, some data might not be deleted,
2985
                // but this is acceptable as we have made our best effort to delete the data.
2986
503
                LOG_INFO(
2987
503
                        "delete rowset data schema kv not found, need to delete again to double "
2988
503
                        "check")
2989
503
                        .tag("instance_id", instance_id_)
2990
503
                        .tag("tablet_id", tablet_id)
2991
503
                        .tag("rowset", rs.ShortDebugString());
2992
                // Currently index_ids is guaranteed to be empty,
2993
                // but we clear it again here as a safeguard against future code changes
2994
                // that might cause index_ids to no longer be empty
2995
503
                index_format = InvertedIndexStorageFormatPB::V2;
2996
503
                index_ids.clear();
2997
18.4E
            } else {
2998
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
2999
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3000
18.4E
                ret = -1;
3001
18.4E
                continue;
3002
18.4E
            }
3003
27.5k
        }
3004
54.1k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3005
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3006
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3007
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3008
5
            continue;
3009
5
        }
3010
324k
        for (int64_t i = 0; i < num_segments; ++i) {
3011
270k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3012
270k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3013
538k
                for (const auto& index_id : index_ids) {
3014
538k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
3015
538k
                                                                index_id.first, index_id.second));
3016
538k
                }
3017
268k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3018
                // try to recycle inverted index v2 when get_ret == 1
3019
                // we treat schema not found as if it has a v2 format inverted index
3020
                // to reduce chance of data leakage
3021
2.50k
                if (inverted_index_get_ret == 1) {
3022
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3023
2.50k
                            .tag("instance_id", instance_id_)
3024
2.50k
                            .tag("inverted index v2 path",
3025
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3026
2.50k
                }
3027
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3028
2.50k
            }
3029
270k
        }
3030
54.1k
    }
3031
3032
38
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3033
38
                                                 "delete_rowset_data",
3034
40
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3034
40
                                                 [](const int& ret) { return ret != 0; });
3035
38
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3036
35
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3037
35
            DCHECK(accessor_map_.count(*rid))
3038
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3039
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3040
35
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3041
35
                                     &accessor_map_);
3042
35
            if (!accessor_map_.contains(*rid)) {
3043
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3044
0
                        .tag("resource_id", resource_id)
3045
0
                        .tag("instance_id", instance_id_);
3046
0
                return -1;
3047
0
            }
3048
35
            auto& accessor = accessor_map_[*rid];
3049
35
            int ret = accessor->delete_files(*paths);
3050
35
            if (!ret) {
3051
                // deduplication of different files with the same rowset id
3052
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3053
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3054
35
                std::set<std::string> deleted_rowset_id;
3055
3056
35
                std::for_each(paths->begin(), paths->end(),
3057
35
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3058
862k
                               this](const std::string& path) {
3059
862k
                                  std::vector<std::string> str;
3060
862k
                                  butil::SplitString(path, '/', &str);
3061
862k
                                  std::string rowset_id;
3062
862k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3063
860k
                                      rowset_id = str.back().substr(0, pos);
3064
860k
                                  } else {
3065
2.38k
                                      if (path.find("packed_file/") != std::string::npos) {
3066
0
                                          return; // packed files do not have rowset_id encoded
3067
0
                                      }
3068
2.38k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3069
2.38k
                                      return;
3070
2.38k
                                  }
3071
860k
                                  auto rs_meta = rowsets.find(rowset_id);
3072
860k
                                  if (rs_meta != rowsets.end() &&
3073
863k
                                      !deleted_rowset_id.contains(rowset_id)) {
3074
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3075
54.1k
                                      metrics_context.total_recycled_data_size +=
3076
54.1k
                                              rs_meta->second.total_disk_size();
3077
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3078
54.1k
                                              rs_meta->second.num_segments();
3079
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3080
54.1k
                                              rs_meta->second.total_disk_size();
3081
54.1k
                                      metrics_context.total_recycled_num++;
3082
54.1k
                                  }
3083
860k
                              });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3058
862k
                               this](const std::string& path) {
3059
862k
                                  std::vector<std::string> str;
3060
862k
                                  butil::SplitString(path, '/', &str);
3061
862k
                                  std::string rowset_id;
3062
862k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3063
860k
                                      rowset_id = str.back().substr(0, pos);
3064
860k
                                  } else {
3065
2.38k
                                      if (path.find("packed_file/") != std::string::npos) {
3066
0
                                          return; // packed files do not have rowset_id encoded
3067
0
                                      }
3068
2.38k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3069
2.38k
                                      return;
3070
2.38k
                                  }
3071
860k
                                  auto rs_meta = rowsets.find(rowset_id);
3072
860k
                                  if (rs_meta != rowsets.end() &&
3073
863k
                                      !deleted_rowset_id.contains(rowset_id)) {
3074
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3075
54.1k
                                      metrics_context.total_recycled_data_size +=
3076
54.1k
                                              rs_meta->second.total_disk_size();
3077
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3078
54.1k
                                              rs_meta->second.num_segments();
3079
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3080
54.1k
                                              rs_meta->second.total_disk_size();
3081
54.1k
                                      metrics_context.total_recycled_num++;
3082
54.1k
                                  }
3083
860k
                              });
3084
35
                segment_metrics_context_.report();
3085
35
                metrics_context.report();
3086
35
            }
3087
35
            return ret;
3088
35
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3036
35
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3037
35
            DCHECK(accessor_map_.count(*rid))
3038
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3039
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3040
35
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3041
35
                                     &accessor_map_);
3042
35
            if (!accessor_map_.contains(*rid)) {
3043
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3044
0
                        .tag("resource_id", resource_id)
3045
0
                        .tag("instance_id", instance_id_);
3046
0
                return -1;
3047
0
            }
3048
35
            auto& accessor = accessor_map_[*rid];
3049
35
            int ret = accessor->delete_files(*paths);
3050
35
            if (!ret) {
3051
                // deduplication of different files with the same rowset id
3052
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3053
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3054
35
                std::set<std::string> deleted_rowset_id;
3055
3056
35
                std::for_each(paths->begin(), paths->end(),
3057
35
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3058
35
                               this](const std::string& path) {
3059
35
                                  std::vector<std::string> str;
3060
35
                                  butil::SplitString(path, '/', &str);
3061
35
                                  std::string rowset_id;
3062
35
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3063
35
                                      rowset_id = str.back().substr(0, pos);
3064
35
                                  } else {
3065
35
                                      if (path.find("packed_file/") != std::string::npos) {
3066
35
                                          return; // packed files do not have rowset_id encoded
3067
35
                                      }
3068
35
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3069
35
                                      return;
3070
35
                                  }
3071
35
                                  auto rs_meta = rowsets.find(rowset_id);
3072
35
                                  if (rs_meta != rowsets.end() &&
3073
35
                                      !deleted_rowset_id.contains(rowset_id)) {
3074
35
                                      deleted_rowset_id.emplace(rowset_id);
3075
35
                                      metrics_context.total_recycled_data_size +=
3076
35
                                              rs_meta->second.total_disk_size();
3077
35
                                      segment_metrics_context_.total_recycled_num +=
3078
35
                                              rs_meta->second.num_segments();
3079
35
                                      segment_metrics_context_.total_recycled_data_size +=
3080
35
                                              rs_meta->second.total_disk_size();
3081
35
                                      metrics_context.total_recycled_num++;
3082
35
                                  }
3083
35
                              });
3084
35
                segment_metrics_context_.report();
3085
35
                metrics_context.report();
3086
35
            }
3087
35
            return ret;
3088
35
        });
3089
35
    }
3090
38
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
3091
5
        LOG_INFO(
3092
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
3093
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
3094
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
3095
5
        concurrent_delete_executor.add([&]() -> int {
3096
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3097
5
            if (!ret) {
3098
5
                auto rs = rowsets.at(rowset_id);
3099
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3100
5
                metrics_context.total_recycled_num++;
3101
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3102
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3103
5
                metrics_context.report();
3104
5
                segment_metrics_context_.report();
3105
5
            }
3106
5
            return ret;
3107
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
3095
5
        concurrent_delete_executor.add([&]() -> int {
3096
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3097
5
            if (!ret) {
3098
5
                auto rs = rowsets.at(rowset_id);
3099
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3100
5
                metrics_context.total_recycled_num++;
3101
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3102
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3103
5
                metrics_context.report();
3104
5
                segment_metrics_context_.report();
3105
5
            }
3106
5
            return ret;
3107
5
        });
3108
5
    }
3109
3110
38
    bool finished = true;
3111
38
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3112
40
    for (int r : rets) {
3113
40
        if (r != 0) {
3114
0
            ret = -1;
3115
0
            break;
3116
0
        }
3117
40
    }
3118
38
    ret = finished ? ret : -1;
3119
38
    return ret;
3120
38
}
3121
3122
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
3123
2.90k
                                         const std::string& rowset_id) {
3124
2.90k
    auto it = accessor_map_.find(resource_id);
3125
2.90k
    if (it == accessor_map_.end()) {
3126
0
        LOG_WARNING("instance has no such resource id")
3127
0
                .tag("instance_id", instance_id_)
3128
0
                .tag("resource_id", resource_id)
3129
0
                .tag("tablet_id", tablet_id)
3130
0
                .tag("rowset_id", rowset_id);
3131
0
        return -1;
3132
0
    }
3133
2.90k
    auto& accessor = it->second;
3134
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
3135
2.90k
}
3136
3137
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
3138
4
    if (key.empty()) {
3139
0
        return false;
3140
0
    }
3141
4
    std::string_view key_view = key;
3142
4
    key_view.remove_prefix(1); // remove keyspace prefix
3143
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
3144
4
    if (decode_key(&key_view, &decoded) != 0) {
3145
0
        return false;
3146
0
    }
3147
4
    if (decoded.size() < 4) {
3148
0
        return false;
3149
0
    }
3150
4
    try {
3151
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
3152
4
    } catch (const std::bad_variant_access&) {
3153
0
        return false;
3154
0
    }
3155
4
    return true;
3156
4
}
3157
3158
14
int InstanceRecycler::recycle_packed_files() {
3159
14
    const std::string task_name = "recycle_packed_files";
3160
14
    auto start_tp = steady_clock::now();
3161
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
3162
14
    int ret = 0;
3163
14
    PackedFileRecycleStats stats;
3164
3165
14
    register_recycle_task(task_name, start_time);
3166
14
    DORIS_CLOUD_DEFER {
3167
14
        unregister_recycle_task(task_name);
3168
14
        int64_t cost =
3169
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3170
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3171
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3172
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3173
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3174
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3175
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3176
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3177
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3178
14
                                                             stats.bytes_object_deleted);
3179
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3180
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3181
14
                .tag("instance_id", instance_id_)
3182
14
                .tag("num_scanned", stats.num_scanned)
3183
14
                .tag("num_corrected", stats.num_corrected)
3184
14
                .tag("num_deleted", stats.num_deleted)
3185
14
                .tag("num_failed", stats.num_failed)
3186
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3187
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3188
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3189
14
                .tag("bytes_deleted", stats.bytes_deleted)
3190
14
                .tag("ret", ret);
3191
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
3166
14
    DORIS_CLOUD_DEFER {
3167
14
        unregister_recycle_task(task_name);
3168
14
        int64_t cost =
3169
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3170
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3171
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3172
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3173
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3174
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3175
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3176
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3177
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3178
14
                                                             stats.bytes_object_deleted);
3179
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3180
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3181
14
                .tag("instance_id", instance_id_)
3182
14
                .tag("num_scanned", stats.num_scanned)
3183
14
                .tag("num_corrected", stats.num_corrected)
3184
14
                .tag("num_deleted", stats.num_deleted)
3185
14
                .tag("num_failed", stats.num_failed)
3186
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3187
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3188
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3189
14
                .tag("bytes_deleted", stats.bytes_deleted)
3190
14
                .tag("ret", ret);
3191
14
    };
3192
3193
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3194
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3195
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3196
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
3193
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3194
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3195
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3196
4
    };
3197
3198
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
3199
3200
14
    std::string begin = packed_file_key({instance_id_, ""});
3201
14
    std::string end = packed_file_key({instance_id_, "\xff"});
3202
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
3203
0
        ret = -1;
3204
0
    }
3205
3206
14
    return ret;
3207
14
}
3208
3209
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
3210
                                                  RecyclerMetricsContext& metrics_context,
3211
0
                                                  int64_t partition_id, bool is_empty_tablet) {
3212
0
    std::string tablet_key_begin, tablet_key_end;
3213
3214
0
    if (partition_id > 0) {
3215
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
3216
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
3217
0
    } else {
3218
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
3219
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
3220
0
    }
3221
    // for calculate the total num or bytes of recyled objects
3222
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
3223
0
                                                          std::string_view v) -> int {
3224
0
        doris::TabletMetaCloudPB tablet_meta_pb;
3225
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
3226
0
            return 0;
3227
0
        }
3228
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
3229
3230
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
3231
0
            return 0;
3232
0
        }
3233
3234
0
        if (!is_empty_tablet) {
3235
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
3236
0
                return 0;
3237
0
            }
3238
0
            tablet_metrics_context_.total_need_recycle_num++;
3239
0
        }
3240
0
        return 0;
3241
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
3242
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
3243
0
    metrics_context.report(true);
3244
0
    tablet_metrics_context_.report(true);
3245
0
    segment_metrics_context_.report(true);
3246
0
    return ret;
3247
0
}
3248
3249
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
3250
0
                                                 RecyclerMetricsContext& metrics_context) {
3251
0
    int ret = 0;
3252
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
3253
0
    std::unique_ptr<Transaction> txn;
3254
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3255
0
        LOG_WARNING("failed to recycle tablet ")
3256
0
                .tag("tablet id", tablet_id)
3257
0
                .tag("instance_id", instance_id_)
3258
0
                .tag("reason", "failed to create txn");
3259
0
        ret = -1;
3260
0
    }
3261
0
    GetRowsetResponse resp;
3262
0
    std::string msg;
3263
0
    MetaServiceCode code = MetaServiceCode::OK;
3264
    // get rowsets in tablet
3265
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3266
0
                        tablet_id, code, msg, &resp);
3267
0
    if (code != MetaServiceCode::OK) {
3268
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3269
0
                .tag("tablet id", tablet_id)
3270
0
                .tag("msg", msg)
3271
0
                .tag("code", code)
3272
0
                .tag("instance id", instance_id_);
3273
0
        ret = -1;
3274
0
    }
3275
0
    for (const auto& rs_meta : resp.rowset_meta()) {
3276
        /*
3277
        * For compatibility, we skip the loop for [0-1] here.
3278
        * The purpose of this loop is to delete object files,
3279
        * and since [0-1] only has meta and doesn't have object files,
3280
        * skipping it doesn't affect system correctness.
3281
        *
3282
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
3283
        * would return error -1 directly, causing the recycle operation to fail.
3284
        *
3285
        * [0-1] doesn't have resource id is a bug.
3286
        * In the future, we will fix this problem, after that,
3287
        * we can remove this if statement.
3288
        *
3289
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
3290
        */
3291
3292
0
        if (rs_meta.end_version() == 1) {
3293
            // Assert that [0-1] has no resource_id to make sure
3294
            // this if statement will not be forgetted to remove
3295
            // when the resource id bug is fixed
3296
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3297
0
            continue;
3298
0
        }
3299
0
        if (!rs_meta.has_resource_id()) {
3300
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3301
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3302
0
                    .tag("instance_id", instance_id_)
3303
0
                    .tag("tablet_id", tablet_id);
3304
0
            continue;
3305
0
        }
3306
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3307
0
        auto it = accessor_map_.find(rs_meta.resource_id());
3308
        // possible if the accessor is not initilized correctly
3309
0
        if (it == accessor_map_.end()) [[unlikely]] {
3310
0
            LOG_WARNING(
3311
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3312
0
                    "recycle process")
3313
0
                    .tag("tablet id", tablet_id)
3314
0
                    .tag("instance_id", instance_id_)
3315
0
                    .tag("resource_id", rs_meta.resource_id())
3316
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3317
0
            continue;
3318
0
        }
3319
3320
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
3321
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
3322
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
3323
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
3324
0
    }
3325
0
    return ret;
3326
0
}
3327
3328
4.24k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
3329
4.24k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
3330
4.24k
            .tag("instance_id", instance_id_)
3331
4.24k
            .tag("tablet_id", tablet_id);
3332
3333
4.24k
    if (should_recycle_versioned_keys()) {
3334
6
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
3335
6
        if (ret != 0) {
3336
0
            return ret;
3337
0
        }
3338
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
3339
        // during the recycle_versioned_tablet process.
3340
        //
3341
        // .. And remove restore job rowsets of this tablet too
3342
6
    }
3343
3344
4.24k
    int ret = 0;
3345
4.24k
    auto start_time = steady_clock::now();
3346
3347
4.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
3348
3349
    // collect resource ids
3350
241
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
3351
241
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
3352
241
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
3353
241
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
3354
241
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
3355
241
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
3356
3357
241
    std::set<std::string> resource_ids;
3358
241
    int64_t recycle_rowsets_number = 0;
3359
241
    int64_t recycle_segments_number = 0;
3360
241
    int64_t recycle_rowsets_data_size = 0;
3361
241
    int64_t recycle_rowsets_index_size = 0;
3362
241
    int64_t recycle_restore_job_rowsets_number = 0;
3363
241
    int64_t recycle_restore_job_segments_number = 0;
3364
241
    int64_t recycle_restore_job_rowsets_data_size = 0;
3365
241
    int64_t recycle_restore_job_rowsets_index_size = 0;
3366
241
    int64_t max_rowset_version = 0;
3367
241
    int64_t min_rowset_creation_time = INT64_MAX;
3368
241
    int64_t max_rowset_creation_time = 0;
3369
241
    int64_t min_rowset_expiration_time = INT64_MAX;
3370
241
    int64_t max_rowset_expiration_time = 0;
3371
3372
241
    DORIS_CLOUD_DEFER {
3373
241
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3374
241
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3375
241
                .tag("instance_id", instance_id_)
3376
241
                .tag("tablet_id", tablet_id)
3377
241
                .tag("recycle rowsets number", recycle_rowsets_number)
3378
241
                .tag("recycle segments number", recycle_segments_number)
3379
241
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3380
241
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3381
241
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
3382
241
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
3383
241
                .tag("all restore job rowsets recycle data size",
3384
241
                     recycle_restore_job_rowsets_data_size)
3385
241
                .tag("all restore job rowsets recycle index size",
3386
241
                     recycle_restore_job_rowsets_index_size)
3387
241
                .tag("max rowset version", max_rowset_version)
3388
241
                .tag("min rowset creation time", min_rowset_creation_time)
3389
241
                .tag("max rowset creation time", max_rowset_creation_time)
3390
241
                .tag("min rowset expiration time", min_rowset_expiration_time)
3391
241
                .tag("max rowset expiration time", max_rowset_expiration_time)
3392
241
                .tag("task type", metrics_context.operation_type)
3393
241
                .tag("ret", ret);
3394
241
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3372
241
    DORIS_CLOUD_DEFER {
3373
241
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3374
241
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3375
241
                .tag("instance_id", instance_id_)
3376
241
                .tag("tablet_id", tablet_id)
3377
241
                .tag("recycle rowsets number", recycle_rowsets_number)
3378
241
                .tag("recycle segments number", recycle_segments_number)
3379
241
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3380
241
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3381
241
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
3382
241
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
3383
241
                .tag("all restore job rowsets recycle data size",
3384
241
                     recycle_restore_job_rowsets_data_size)
3385
241
                .tag("all restore job rowsets recycle index size",
3386
241
                     recycle_restore_job_rowsets_index_size)
3387
241
                .tag("max rowset version", max_rowset_version)
3388
241
                .tag("min rowset creation time", min_rowset_creation_time)
3389
241
                .tag("max rowset creation time", max_rowset_creation_time)
3390
241
                .tag("min rowset expiration time", min_rowset_expiration_time)
3391
241
                .tag("max rowset expiration time", max_rowset_expiration_time)
3392
241
                .tag("task type", metrics_context.operation_type)
3393
241
                .tag("ret", ret);
3394
241
    };
3395
3396
241
    std::unique_ptr<Transaction> txn;
3397
241
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3398
0
        LOG_WARNING("failed to recycle tablet ")
3399
0
                .tag("tablet id", tablet_id)
3400
0
                .tag("instance_id", instance_id_)
3401
0
                .tag("reason", "failed to create txn");
3402
0
        ret = -1;
3403
0
    }
3404
241
    GetRowsetResponse resp;
3405
241
    std::string msg;
3406
241
    MetaServiceCode code = MetaServiceCode::OK;
3407
    // get rowsets in tablet
3408
241
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3409
241
                        tablet_id, code, msg, &resp);
3410
241
    if (code != MetaServiceCode::OK) {
3411
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3412
0
                .tag("tablet id", tablet_id)
3413
0
                .tag("msg", msg)
3414
0
                .tag("code", code)
3415
0
                .tag("instance id", instance_id_);
3416
0
        ret = -1;
3417
0
    }
3418
241
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
3419
3420
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
3421
        // The rowset has no resource id and segments when it was generated by compaction
3422
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
3423
2.50k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
3424
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
3425
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3426
0
                    .tag("instance_id", instance_id_)
3427
0
                    .tag("tablet_id", tablet_id);
3428
0
            recycle_rowsets_number += 1;
3429
0
            continue;
3430
0
        }
3431
2.50k
        if (!rs_meta.has_resource_id()) {
3432
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3433
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
3434
1
                    .tag("instance_id", instance_id_)
3435
1
                    .tag("tablet_id", tablet_id);
3436
1
            return -1;
3437
1
        }
3438
2.50k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3439
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
3440
        // possible if the accessor is not initilized correctly
3441
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
3442
1
            LOG_WARNING(
3443
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3444
1
                    "recycle process")
3445
1
                    .tag("tablet id", tablet_id)
3446
1
                    .tag("instance_id", instance_id_)
3447
1
                    .tag("resource_id", rs_meta.resource_id())
3448
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3449
1
            return -1;
3450
1
        }
3451
2.50k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
3452
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
3453
0
                    .tag("instance_id", instance_id_)
3454
0
                    .tag("tablet_id", tablet_id)
3455
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
3456
0
            return -1;
3457
0
        }
3458
2.50k
        recycle_rowsets_number += 1;
3459
2.50k
        recycle_segments_number += rs_meta.num_segments();
3460
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
3461
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
3462
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
3463
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
3464
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
3465
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
3466
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
3467
2.50k
        resource_ids.emplace(rs_meta.resource_id());
3468
2.50k
    }
3469
3470
    // get restore job rowset in tablet
3471
239
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
3472
239
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
3473
239
    if (code != MetaServiceCode::OK) {
3474
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
3475
0
                .tag("tablet id", tablet_id)
3476
0
                .tag("msg", msg)
3477
0
                .tag("code", code)
3478
0
                .tag("instance id", instance_id_);
3479
0
        return -1;
3480
0
    }
3481
3482
239
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
3483
0
        if (!rs_meta.has_resource_id()) {
3484
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3485
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3486
0
                    .tag("instance_id", instance_id_)
3487
0
                    .tag("tablet_id", tablet_id);
3488
0
            return -1;
3489
0
        }
3490
3491
0
        auto it = accessor_map_.find(rs_meta.resource_id());
3492
        // possible if the accessor is not initilized correctly
3493
0
        if (it == accessor_map_.end()) [[unlikely]] {
3494
0
            LOG_WARNING(
3495
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3496
0
                    "recycle process")
3497
0
                    .tag("tablet id", tablet_id)
3498
0
                    .tag("instance_id", instance_id_)
3499
0
                    .tag("resource_id", rs_meta.resource_id())
3500
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3501
0
            return -1;
3502
0
        }
3503
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
3504
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
3505
0
                    .tag("instance_id", instance_id_)
3506
0
                    .tag("tablet_id", tablet_id)
3507
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
3508
0
            return -1;
3509
0
        }
3510
0
        recycle_restore_job_rowsets_number += 1;
3511
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
3512
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
3513
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
3514
0
        resource_ids.emplace(rs_meta.resource_id());
3515
0
    }
3516
3517
239
    LOG_INFO("recycle tablet start to delete object")
3518
239
            .tag("instance id", instance_id_)
3519
239
            .tag("tablet id", tablet_id)
3520
239
            .tag("recycle tablet resource ids are",
3521
239
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
3522
239
                                 [](std::string rs_id, const auto& it) {
3523
204
                                     return rs_id.empty() ? it : rs_id + ", " + it;
3524
204
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
3522
204
                                 [](std::string rs_id, const auto& it) {
3523
204
                                     return rs_id.empty() ? it : rs_id + ", " + it;
3524
204
                                 }));
3525
3526
239
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
3527
239
            _thread_pool_group.s3_producer_pool,
3528
239
            fmt::format("delete tablet {} s3 rowset", tablet_id),
3529
239
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
3529
204
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
3530
3531
    // delete all rowset data in this tablet
3532
    // ATTN: there may be data leak if not all accessor initilized successfully
3533
    //       partial data deleted if the tablet is stored cross-storage vault
3534
    //       vault id is not attached to TabletMeta...
3535
239
    for (const auto& resource_id : resource_ids) {
3536
204
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
3537
204
        concurrent_delete_executor.add(
3538
204
                [&, rs_id = resource_id,
3539
204
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
3540
204
                    std::unique_ptr<int, std::function<void(int*)>> defer(
3541
204
                            (int*)0x01, [&](int*) { metrics_context.report(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
Line
Count
Source
3541
204
                            (int*)0x01, [&](int*) { metrics_context.report(); });
3542
204
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
3543
204
                    if (res != 0) {
3544
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
3545
1
                                     << " path=" << accessor_ptr->uri()
3546
1
                                     << " task type=" << metrics_context.operation_type;
3547
1
                        return std::make_pair(-1, rs_id);
3548
1
                    }
3549
203
                    return std::make_pair(0, rs_id);
3550
204
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
3539
204
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
3540
204
                    std::unique_ptr<int, std::function<void(int*)>> defer(
3541
204
                            (int*)0x01, [&](int*) { metrics_context.report(); });
3542
204
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
3543
204
                    if (res != 0) {
3544
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
3545
1
                                     << " path=" << accessor_ptr->uri()
3546
1
                                     << " task type=" << metrics_context.operation_type;
3547
1
                        return std::make_pair(-1, rs_id);
3548
1
                    }
3549
203
                    return std::make_pair(0, rs_id);
3550
204
                });
3551
204
    }
3552
3553
239
    bool finished = true;
3554
239
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
3555
239
    for (auto& r : rets) {
3556
204
        if (r.first != 0) {
3557
1
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
3558
1
            ret = -1;
3559
1
        }
3560
204
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
3561
204
    }
3562
239
    ret = finished ? ret : -1;
3563
3564
239
    if (ret != 0) { // failed recycle tablet data
3565
1
        LOG_WARNING("ret!=0")
3566
1
                .tag("finished", finished)
3567
1
                .tag("ret", ret)
3568
1
                .tag("instance_id", instance_id_)
3569
1
                .tag("tablet_id", tablet_id);
3570
1
        return ret;
3571
1
    }
3572
3573
238
    tablet_metrics_context_.total_recycled_data_size +=
3574
238
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3575
238
    tablet_metrics_context_.total_recycled_num += 1;
3576
238
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
3577
238
    segment_metrics_context_.total_recycled_data_size +=
3578
238
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3579
238
    metrics_context.total_recycled_data_size +=
3580
238
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3581
238
    tablet_metrics_context_.report();
3582
238
    segment_metrics_context_.report();
3583
238
    metrics_context.report();
3584
3585
238
    txn.reset();
3586
238
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3587
0
        LOG_WARNING("failed to recycle tablet ")
3588
0
                .tag("tablet id", tablet_id)
3589
0
                .tag("instance_id", instance_id_)
3590
0
                .tag("reason", "failed to create txn");
3591
0
        ret = -1;
3592
0
    }
3593
    // delete all rowset kv in this tablet
3594
238
    txn->remove(rs_key0, rs_key1);
3595
238
    txn->remove(recyc_rs_key0, recyc_rs_key1);
3596
238
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
3597
3598
    // remove delete bitmap for MoW table
3599
238
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
3600
238
    txn->remove(pending_key);
3601
238
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
3602
238
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
3603
238
    txn->remove(delete_bitmap_start, delete_bitmap_end);
3604
3605
238
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
3606
238
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
3607
238
    txn->remove(dbm_start_key, dbm_end_key);
3608
238
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
3609
238
              << " end=" << hex(dbm_end_key);
3610
3611
238
    TxnErrorCode err = txn->commit();
3612
238
    if (err != TxnErrorCode::TXN_OK) {
3613
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
3614
0
        ret = -1;
3615
0
    }
3616
3617
238
    if (ret == 0) {
3618
        // All object files under tablet have been deleted
3619
238
        std::lock_guard lock(recycled_tablets_mtx_);
3620
238
        recycled_tablets_.insert(tablet_id);
3621
238
    }
3622
3623
238
    return ret;
3624
239
}
3625
3626
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
3627
6
                                               RecyclerMetricsContext& metrics_context) {
3628
6
    int ret = 0;
3629
6
    auto start_time = steady_clock::now();
3630
3631
6
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
3632
3633
    // collect resource ids
3634
6
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
3635
6
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
3636
6
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
3637
6
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
3638
3639
6
    int64_t recycle_rowsets_number = 0;
3640
6
    int64_t recycle_segments_number = 0;
3641
6
    int64_t recycle_rowsets_data_size = 0;
3642
6
    int64_t recycle_rowsets_index_size = 0;
3643
6
    int64_t max_rowset_version = 0;
3644
6
    int64_t min_rowset_creation_time = INT64_MAX;
3645
6
    int64_t max_rowset_creation_time = 0;
3646
6
    int64_t min_rowset_expiration_time = INT64_MAX;
3647
6
    int64_t max_rowset_expiration_time = 0;
3648
3649
6
    DORIS_CLOUD_DEFER {
3650
6
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3651
6
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3652
6
                .tag("instance_id", instance_id_)
3653
6
                .tag("tablet_id", tablet_id)
3654
6
                .tag("recycle rowsets number", recycle_rowsets_number)
3655
6
                .tag("recycle segments number", recycle_segments_number)
3656
6
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3657
6
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3658
6
                .tag("max rowset version", max_rowset_version)
3659
6
                .tag("min rowset creation time", min_rowset_creation_time)
3660
6
                .tag("max rowset creation time", max_rowset_creation_time)
3661
6
                .tag("min rowset expiration time", min_rowset_expiration_time)
3662
6
                .tag("max rowset expiration time", max_rowset_expiration_time)
3663
6
                .tag("ret", ret);
3664
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3649
6
    DORIS_CLOUD_DEFER {
3650
6
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3651
6
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3652
6
                .tag("instance_id", instance_id_)
3653
6
                .tag("tablet_id", tablet_id)
3654
6
                .tag("recycle rowsets number", recycle_rowsets_number)
3655
6
                .tag("recycle segments number", recycle_segments_number)
3656
6
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3657
6
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3658
6
                .tag("max rowset version", max_rowset_version)
3659
6
                .tag("min rowset creation time", min_rowset_creation_time)
3660
6
                .tag("max rowset creation time", max_rowset_creation_time)
3661
6
                .tag("min rowset expiration time", min_rowset_expiration_time)
3662
6
                .tag("max rowset expiration time", max_rowset_expiration_time)
3663
6
                .tag("ret", ret);
3664
6
    };
3665
3666
6
    std::unique_ptr<Transaction> txn;
3667
6
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3668
0
        LOG_WARNING("failed to recycle tablet ")
3669
0
                .tag("tablet id", tablet_id)
3670
0
                .tag("instance_id", instance_id_)
3671
0
                .tag("reason", "failed to create txn");
3672
0
        ret = -1;
3673
0
    }
3674
3675
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
3676
    // by the related operation logs.
3677
6
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
3678
6
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
3679
6
    MetaReader meta_reader(instance_id_);
3680
6
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
3681
6
    if (err == TxnErrorCode::TXN_OK) {
3682
6
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
3683
6
    }
3684
6
    if (err != TxnErrorCode::TXN_OK) {
3685
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3686
0
                .tag("tablet id", tablet_id)
3687
0
                .tag("err", err)
3688
0
                .tag("instance id", instance_id_);
3689
0
        ret = -1;
3690
0
    }
3691
3692
6
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
3693
6
             load_rowset_metas.size(), compact_rowset_metas.size())
3694
6
            .tag("instance_id", instance_id_)
3695
6
            .tag("tablet_id", tablet_id);
3696
3697
6
    SyncExecutor<int> concurrent_delete_executor(
3698
6
            _thread_pool_group.s3_producer_pool,
3699
6
            fmt::format("delete tablet {} s3 rowset", tablet_id),
3700
30
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3700
30
            [](const int& ret) { return ret != 0; });
3701
3702
30
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
3703
30
        recycle_rowsets_number += 1;
3704
30
        recycle_segments_number += rs_meta.num_segments();
3705
30
        recycle_rowsets_data_size += rs_meta.data_disk_size();
3706
30
        recycle_rowsets_index_size += rs_meta.index_disk_size();
3707
30
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
3708
30
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
3709
30
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
3710
30
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
3711
30
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
3712
30
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
3702
30
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
3703
30
        recycle_rowsets_number += 1;
3704
30
        recycle_segments_number += rs_meta.num_segments();
3705
30
        recycle_rowsets_data_size += rs_meta.data_disk_size();
3706
30
        recycle_rowsets_index_size += rs_meta.index_disk_size();
3707
30
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
3708
30
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
3709
30
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
3710
30
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
3711
30
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
3712
30
    };
3713
3714
29
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
3715
29
        update_rowset_stats(rs_meta);
3716
30
        concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() {
3717
            // recycle both versioned and non-versioned rowset meta key
3718
30
            std::string rowset_load_key = versioned::meta_rowset_load_key(
3719
30
                    {instance_id_, tablet_id, rs_meta_pb.end_version()});
3720
30
            std::string rowset_key =
3721
30
                    meta_rowset_key({instance_id_, tablet_id, rs_meta_pb.end_version()});
3722
30
            return recycle_rowset_meta_and_data(encode_versioned_key(rowset_load_key, versionstamp),
3723
30
                                                rs_meta_pb, rowset_key);
3724
30
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
Line
Count
Source
3716
30
        concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() {
3717
            // recycle both versioned and non-versioned rowset meta key
3718
30
            std::string rowset_load_key = versioned::meta_rowset_load_key(
3719
30
                    {instance_id_, tablet_id, rs_meta_pb.end_version()});
3720
30
            std::string rowset_key =
3721
30
                    meta_rowset_key({instance_id_, tablet_id, rs_meta_pb.end_version()});
3722
30
            return recycle_rowset_meta_and_data(encode_versioned_key(rowset_load_key, versionstamp),
3723
30
                                                rs_meta_pb, rowset_key);
3724
30
        });
3725
29
    }
3726
3727
6
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
3728
0
        update_rowset_stats(rs_meta);
3729
0
        concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() {
3730
            // recycle both versioned and non-versioned rowset meta key
3731
0
            std::string rowset_load_key = versioned::meta_rowset_compact_key(
3732
0
                    {instance_id_, tablet_id, rs_meta_pb.end_version()});
3733
0
            std::string rowset_key =
3734
0
                    meta_rowset_key({instance_id_, tablet_id, rs_meta_pb.end_version()});
3735
0
            return recycle_rowset_meta_and_data(encode_versioned_key(rowset_load_key, versionstamp),
3736
0
                                                rs_meta_pb, rowset_key);
3737
0
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clEv
3738
0
    }
3739
3740
6
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
3741
0
        RecycleRowsetPB recycle_rowset;
3742
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
3743
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3744
0
            return -1;
3745
0
        }
3746
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
3747
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
3748
                // in old version, keep this key-value pair and it needs to be checked manually
3749
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3750
0
                return -1;
3751
0
            }
3752
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
3753
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3754
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3755
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
3756
0
                return -1;
3757
0
            }
3758
            // decode rowset_id
3759
0
            auto k1 = k;
3760
0
            k1.remove_prefix(1);
3761
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3762
0
            decode_key(&k1, &out);
3763
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3764
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3765
0
            LOG_INFO("delete rowset data")
3766
0
                    .tag("instance_id", instance_id_)
3767
0
                    .tag("tablet_id", tablet_id)
3768
0
                    .tag("rowset_id", rowset_id);
3769
3770
0
            concurrent_delete_executor.add(
3771
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
3772
                        // delete by prefix, the recycle rowset key will be deleted by range later.
3773
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
3774
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
3775
0
        } else {
3776
0
            concurrent_delete_executor.add(
3777
0
                    [k = std::string(k), recycle_rowset = std::move(recycle_rowset), this]() {
3778
0
                        return recycle_rowset_meta_and_data(k, recycle_rowset.rowset_meta());
3779
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv
3780
0
        }
3781
0
        return 0;
3782
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_
3783
3784
6
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
3785
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
3786
0
                .tag("tablet id", tablet_id)
3787
0
                .tag("instance_id", instance_id_)
3788
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
3789
0
        ret = -1;
3790
0
    }
3791
3792
6
    bool finished = true;
3793
6
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3794
30
    for (int r : rets) {
3795
30
        if (r != 0) {
3796
0
            ret = -1;
3797
0
        }
3798
30
    }
3799
3800
6
    ret = finished ? ret : -1;
3801
3802
6
    if (ret != 0) { // failed recycle tablet data
3803
0
        LOG_WARNING("ret!=0")
3804
0
                .tag("finished", finished)
3805
0
                .tag("ret", ret)
3806
0
                .tag("instance_id", instance_id_)
3807
0
                .tag("tablet_id", tablet_id);
3808
0
        return ret;
3809
0
    }
3810
3811
6
    tablet_metrics_context_.total_recycled_data_size +=
3812
6
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3813
6
    tablet_metrics_context_.total_recycled_num += 1;
3814
6
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
3815
6
    segment_metrics_context_.total_recycled_data_size +=
3816
6
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3817
6
    metrics_context.total_recycled_data_size +=
3818
6
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3819
6
    tablet_metrics_context_.report();
3820
6
    segment_metrics_context_.report();
3821
6
    metrics_context.report();
3822
3823
6
    txn.reset();
3824
6
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3825
0
        LOG_WARNING("failed to recycle tablet ")
3826
0
                .tag("tablet id", tablet_id)
3827
0
                .tag("instance_id", instance_id_)
3828
0
                .tag("reason", "failed to create txn");
3829
0
        ret = -1;
3830
0
    }
3831
    // delete all rowset kv in this tablet
3832
6
    txn->remove(rs_key0, rs_key1);
3833
6
    txn->remove(recyc_rs_key0, recyc_rs_key1);
3834
3835
    // remove delete bitmap for MoW table
3836
6
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
3837
6
    txn->remove(pending_key);
3838
6
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
3839
6
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
3840
6
    txn->remove(delete_bitmap_start, delete_bitmap_end);
3841
3842
6
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
3843
6
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
3844
6
    txn->remove(dbm_start_key, dbm_end_key);
3845
6
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
3846
6
              << " end=" << hex(dbm_end_key);
3847
3848
6
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
3849
6
    std::string tablet_index_val;
3850
6
    err = txn->get(versioned_idx_key, &tablet_index_val);
3851
6
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
3852
0
        LOG_WARNING("failed to get tablet index kv")
3853
0
                .tag("instance_id", instance_id_)
3854
0
                .tag("tablet_id", tablet_id)
3855
0
                .tag("err", err);
3856
0
        ret = -1;
3857
6
    } else if (err == TxnErrorCode::TXN_OK) {
3858
        // If the tablet index kv exists, we need to delete it
3859
5
        TabletIndexPB tablet_index_pb;
3860
5
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
3861
0
            LOG_WARNING("failed to parse tablet index pb")
3862
0
                    .tag("instance_id", instance_id_)
3863
0
                    .tag("tablet_id", tablet_id);
3864
0
            ret = -1;
3865
5
        } else {
3866
5
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
3867
5
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
3868
5
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
3869
5
            txn->remove(versioned_inverted_idx_key);
3870
5
            txn->remove(versioned_idx_key);
3871
5
        }
3872
5
    }
3873
3874
6
    err = txn->commit();
3875
6
    if (err != TxnErrorCode::TXN_OK) {
3876
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
3877
0
        ret = -1;
3878
0
    }
3879
3880
6
    if (ret == 0) {
3881
        // All object files under tablet have been deleted
3882
6
        std::lock_guard lock(recycled_tablets_mtx_);
3883
6
        recycled_tablets_.insert(tablet_id);
3884
6
    }
3885
3886
6
    return ret;
3887
6
}
3888
3889
18
int InstanceRecycler::recycle_rowsets() {
3890
18
    if (should_recycle_versioned_keys()) {
3891
5
        return recycle_versioned_rowsets();
3892
5
    }
3893
3894
13
    const std::string task_name = "recycle_rowsets";
3895
13
    int64_t num_scanned = 0;
3896
13
    int64_t num_expired = 0;
3897
13
    int64_t num_prepare = 0;
3898
13
    int64_t num_compacted = 0;
3899
13
    int64_t num_empty_rowset = 0;
3900
13
    size_t total_rowset_key_size = 0;
3901
13
    size_t total_rowset_value_size = 0;
3902
13
    size_t expired_rowset_size = 0;
3903
13
    std::atomic_long num_recycled = 0;
3904
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3905
3906
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
3907
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
3908
13
    std::string recyc_rs_key0;
3909
13
    std::string recyc_rs_key1;
3910
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
3911
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
3912
3913
13
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
3914
3915
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3916
13
    register_recycle_task(task_name, start_time);
3917
3918
13
    DORIS_CLOUD_DEFER {
3919
13
        unregister_recycle_task(task_name);
3920
13
        int64_t cost =
3921
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3922
13
        metrics_context.finish_report();
3923
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3924
13
                .tag("instance_id", instance_id_)
3925
13
                .tag("num_scanned", num_scanned)
3926
13
                .tag("num_expired", num_expired)
3927
13
                .tag("num_recycled", num_recycled)
3928
13
                .tag("num_recycled.prepare", num_prepare)
3929
13
                .tag("num_recycled.compacted", num_compacted)
3930
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3931
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3932
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3933
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
3934
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
3918
13
    DORIS_CLOUD_DEFER {
3919
13
        unregister_recycle_task(task_name);
3920
13
        int64_t cost =
3921
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3922
13
        metrics_context.finish_report();
3923
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3924
13
                .tag("instance_id", instance_id_)
3925
13
                .tag("num_scanned", num_scanned)
3926
13
                .tag("num_expired", num_expired)
3927
13
                .tag("num_recycled", num_recycled)
3928
13
                .tag("num_recycled.prepare", num_prepare)
3929
13
                .tag("num_recycled.compacted", num_compacted)
3930
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3931
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3932
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3933
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
3934
13
    };
3935
3936
13
    std::vector<std::string> rowset_keys;
3937
    // rowset_id -> rowset_meta
3938
    // store rowset id and meta for statistics rs size when delete
3939
13
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
3940
3941
    // Store keys of rowset recycled by background workers
3942
13
    std::mutex async_recycled_rowset_keys_mutex;
3943
13
    std::vector<std::string> async_recycled_rowset_keys;
3944
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
3945
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
3946
13
    worker_pool->start();
3947
    // TODO bacth delete
3948
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3949
4.00k
        std::string dbm_start_key =
3950
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3951
4.00k
        std::string dbm_end_key = dbm_start_key;
3952
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
3953
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
3954
4.00k
        if (ret != 0) {
3955
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
3956
0
                         << instance_id_;
3957
0
        }
3958
4.00k
        return ret;
3959
4.00k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3948
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3949
4.00k
        std::string dbm_start_key =
3950
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3951
4.00k
        std::string dbm_end_key = dbm_start_key;
3952
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
3953
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
3954
4.00k
        if (ret != 0) {
3955
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
3956
0
                         << instance_id_;
3957
0
        }
3958
4.00k
        return ret;
3959
4.00k
    };
3960
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
3961
900
                                            int64_t tablet_id, const std::string& rowset_id) {
3962
        // Try to delete rowset data in background thread
3963
900
        int ret = worker_pool->submit_with_timeout(
3964
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3965
803
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3966
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3967
0
                        return;
3968
0
                    }
3969
803
                    std::vector<std::string> keys;
3970
803
                    {
3971
803
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3972
803
                        async_recycled_rowset_keys.push_back(std::move(key));
3973
803
                        if (async_recycled_rowset_keys.size() > 100) {
3974
7
                            keys.swap(async_recycled_rowset_keys);
3975
7
                        }
3976
803
                    }
3977
803
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3978
803
                    if (keys.empty()) return;
3979
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3980
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3981
0
                                     << instance_id_;
3982
7
                    } else {
3983
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3984
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3985
7
                                           num_recycled, start_time);
3986
7
                    }
3987
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
3964
803
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3965
803
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3966
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3967
0
                        return;
3968
0
                    }
3969
803
                    std::vector<std::string> keys;
3970
803
                    {
3971
803
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3972
803
                        async_recycled_rowset_keys.push_back(std::move(key));
3973
803
                        if (async_recycled_rowset_keys.size() > 100) {
3974
7
                            keys.swap(async_recycled_rowset_keys);
3975
7
                        }
3976
803
                    }
3977
803
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3978
803
                    if (keys.empty()) return;
3979
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3980
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3981
0
                                     << instance_id_;
3982
7
                    } else {
3983
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3984
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3985
7
                                           num_recycled, start_time);
3986
7
                    }
3987
7
                },
3988
900
                0);
3989
900
        if (ret == 0) return 0;
3990
        // Submit task failed, delete rowset data in current thread
3991
97
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3992
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3993
0
            return -1;
3994
0
        }
3995
97
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
3996
0
            return -1;
3997
0
        }
3998
97
        rowset_keys.push_back(std::move(key));
3999
97
        return 0;
4000
97
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
3961
900
                                            int64_t tablet_id, const std::string& rowset_id) {
3962
        // Try to delete rowset data in background thread
3963
900
        int ret = worker_pool->submit_with_timeout(
3964
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3965
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3966
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3967
900
                        return;
3968
900
                    }
3969
900
                    std::vector<std::string> keys;
3970
900
                    {
3971
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3972
900
                        async_recycled_rowset_keys.push_back(std::move(key));
3973
900
                        if (async_recycled_rowset_keys.size() > 100) {
3974
900
                            keys.swap(async_recycled_rowset_keys);
3975
900
                        }
3976
900
                    }
3977
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3978
900
                    if (keys.empty()) return;
3979
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3980
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3981
900
                                     << instance_id_;
3982
900
                    } else {
3983
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3984
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3985
900
                                           num_recycled, start_time);
3986
900
                    }
3987
900
                },
3988
900
                0);
3989
900
        if (ret == 0) return 0;
3990
        // Submit task failed, delete rowset data in current thread
3991
97
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3992
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3993
0
            return -1;
3994
0
        }
3995
97
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
3996
0
            return -1;
3997
0
        }
3998
97
        rowset_keys.push_back(std::move(key));
3999
97
        return 0;
4000
97
    };
4001
4002
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4003
4004
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4005
4.00k
        ++num_scanned;
4006
4.00k
        total_rowset_key_size += k.size();
4007
4.00k
        total_rowset_value_size += v.size();
4008
4.00k
        RecycleRowsetPB rowset;
4009
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4010
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4011
0
            return -1;
4012
0
        }
4013
4014
4.00k
        int64_t current_time = ::time(nullptr);
4015
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4016
4017
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4018
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4019
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4020
4.00k
        if (current_time < expiration) { // not expired
4021
0
            return 0;
4022
0
        }
4023
4.00k
        ++num_expired;
4024
4.00k
        expired_rowset_size += v.size();
4025
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4026
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4027
                // in old version, keep this key-value pair and it needs to be checked manually
4028
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4029
0
                return -1;
4030
0
            }
4031
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4032
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4033
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4034
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4035
0
                rowset_keys.emplace_back(k);
4036
0
                return -1;
4037
0
            }
4038
            // decode rowset_id
4039
250
            auto k1 = k;
4040
250
            k1.remove_prefix(1);
4041
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4042
250
            decode_key(&k1, &out);
4043
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4044
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4045
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4046
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4047
250
                      << " task_type=" << metrics_context.operation_type;
4048
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4049
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4050
0
                return -1;
4051
0
            }
4052
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4053
250
            metrics_context.total_recycled_num++;
4054
250
            segment_metrics_context_.total_recycled_data_size +=
4055
250
                    rowset.rowset_meta().total_disk_size();
4056
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4057
250
            segment_metrics_context_.report();
4058
250
            metrics_context.report();
4059
250
            return 0;
4060
250
        }
4061
        // TODO(plat1ko): check rowset not referenced
4062
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
4063
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4064
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4065
0
                LOG_INFO("recycle rowset that has empty resource id");
4066
0
            } else {
4067
                // other situations, keep this key-value pair and it needs to be checked manually
4068
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4069
0
                return -1;
4070
0
            }
4071
0
        }
4072
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4073
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4074
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4075
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4076
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4077
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4078
3.75k
                  << " rowset_meta_size=" << v.size()
4079
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4080
3.75k
                  << " task_type=" << metrics_context.operation_type;
4081
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4082
            // unable to calculate file path, can only be deleted by rowset id prefix
4083
650
            num_prepare += 1;
4084
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4085
650
                                             rowset_meta->tablet_id(),
4086
650
                                             rowset_meta->rowset_id_v2()) != 0) {
4087
0
                return -1;
4088
0
            }
4089
3.10k
        } else {
4090
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4091
3.10k
            rowset_keys.emplace_back(k);
4092
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4093
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4094
3.10k
                ++num_empty_rowset;
4095
3.10k
            }
4096
3.10k
        }
4097
3.75k
        return 0;
4098
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4004
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4005
4.00k
        ++num_scanned;
4006
4.00k
        total_rowset_key_size += k.size();
4007
4.00k
        total_rowset_value_size += v.size();
4008
4.00k
        RecycleRowsetPB rowset;
4009
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4010
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4011
0
            return -1;
4012
0
        }
4013
4014
4.00k
        int64_t current_time = ::time(nullptr);
4015
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4016
4017
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4018
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4019
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4020
4.00k
        if (current_time < expiration) { // not expired
4021
0
            return 0;
4022
0
        }
4023
4.00k
        ++num_expired;
4024
4.00k
        expired_rowset_size += v.size();
4025
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4026
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4027
                // in old version, keep this key-value pair and it needs to be checked manually
4028
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4029
0
                return -1;
4030
0
            }
4031
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4032
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4033
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4034
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4035
0
                rowset_keys.emplace_back(k);
4036
0
                return -1;
4037
0
            }
4038
            // decode rowset_id
4039
250
            auto k1 = k;
4040
250
            k1.remove_prefix(1);
4041
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4042
250
            decode_key(&k1, &out);
4043
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4044
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4045
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4046
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4047
250
                      << " task_type=" << metrics_context.operation_type;
4048
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4049
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4050
0
                return -1;
4051
0
            }
4052
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4053
250
            metrics_context.total_recycled_num++;
4054
250
            segment_metrics_context_.total_recycled_data_size +=
4055
250
                    rowset.rowset_meta().total_disk_size();
4056
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4057
250
            segment_metrics_context_.report();
4058
250
            metrics_context.report();
4059
250
            return 0;
4060
250
        }
4061
        // TODO(plat1ko): check rowset not referenced
4062
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
4063
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4064
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4065
0
                LOG_INFO("recycle rowset that has empty resource id");
4066
0
            } else {
4067
                // other situations, keep this key-value pair and it needs to be checked manually
4068
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4069
0
                return -1;
4070
0
            }
4071
0
        }
4072
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4073
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4074
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4075
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4076
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4077
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4078
3.75k
                  << " rowset_meta_size=" << v.size()
4079
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4080
3.75k
                  << " task_type=" << metrics_context.operation_type;
4081
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4082
            // unable to calculate file path, can only be deleted by rowset id prefix
4083
650
            num_prepare += 1;
4084
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4085
650
                                             rowset_meta->tablet_id(),
4086
650
                                             rowset_meta->rowset_id_v2()) != 0) {
4087
0
                return -1;
4088
0
            }
4089
3.10k
        } else {
4090
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4091
3.10k
            rowset_keys.emplace_back(k);
4092
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4093
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4094
3.10k
                ++num_empty_rowset;
4095
3.10k
            }
4096
3.10k
        }
4097
3.75k
        return 0;
4098
3.75k
    };
4099
4100
21
    auto loop_done = [&]() -> int {
4101
21
        std::vector<std::string> rowset_keys_to_delete;
4102
        // rowset_id -> rowset_meta
4103
        // store rowset id and meta for statistics rs size when delete
4104
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4105
21
        rowset_keys_to_delete.swap(rowset_keys);
4106
21
        rowsets_to_delete.swap(rowsets);
4107
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4108
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4109
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4110
21
                                   metrics_context) != 0) {
4111
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4112
0
                return;
4113
0
            }
4114
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4115
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4116
0
                    return;
4117
0
                }
4118
3.10k
            }
4119
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4120
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4121
0
                return;
4122
0
            }
4123
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4124
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4108
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4109
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4110
21
                                   metrics_context) != 0) {
4111
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4112
0
                return;
4113
0
            }
4114
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4115
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4116
0
                    return;
4117
0
                }
4118
3.10k
            }
4119
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4120
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4121
0
                return;
4122
0
            }
4123
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4124
21
        });
4125
21
        return 0;
4126
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4100
21
    auto loop_done = [&]() -> int {
4101
21
        std::vector<std::string> rowset_keys_to_delete;
4102
        // rowset_id -> rowset_meta
4103
        // store rowset id and meta for statistics rs size when delete
4104
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4105
21
        rowset_keys_to_delete.swap(rowset_keys);
4106
21
        rowsets_to_delete.swap(rowsets);
4107
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4108
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4109
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4110
21
                                   metrics_context) != 0) {
4111
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4112
21
                return;
4113
21
            }
4114
21
            for (const auto& [_, rs] : rowsets_to_delete) {
4115
21
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4116
21
                    return;
4117
21
                }
4118
21
            }
4119
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4120
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4121
21
                return;
4122
21
            }
4123
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4124
21
        });
4125
21
        return 0;
4126
21
    };
4127
4128
13
    if (config::enable_recycler_stats_metrics) {
4129
0
        scan_and_statistics_rowsets();
4130
0
    }
4131
    // recycle_func and loop_done for scan and recycle
4132
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4133
13
                               std::move(loop_done));
4134
4135
13
    worker_pool->stop();
4136
4137
13
    if (!async_recycled_rowset_keys.empty()) {
4138
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
4139
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4140
0
            return -1;
4141
2
        } else {
4142
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
4143
2
        }
4144
2
    }
4145
13
    return ret;
4146
13
}
4147
4148
13
int InstanceRecycler::recycle_restore_jobs() {
4149
13
    const std::string task_name = "recycle_restore_jobs";
4150
13
    int64_t num_scanned = 0;
4151
13
    int64_t num_expired = 0;
4152
13
    int64_t num_recycled = 0;
4153
13
    int64_t num_aborted = 0;
4154
4155
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4156
4157
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
4158
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
4159
13
    std::string restore_job_key0;
4160
13
    std::string restore_job_key1;
4161
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
4162
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
4163
4164
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
4165
4166
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4167
13
    register_recycle_task(task_name, start_time);
4168
4169
13
    DORIS_CLOUD_DEFER {
4170
13
        unregister_recycle_task(task_name);
4171
13
        int64_t cost =
4172
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4173
13
        metrics_context.finish_report();
4174
4175
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4176
13
                .tag("instance_id", instance_id_)
4177
13
                .tag("num_scanned", num_scanned)
4178
13
                .tag("num_expired", num_expired)
4179
13
                .tag("num_recycled", num_recycled)
4180
13
                .tag("num_aborted", num_aborted);
4181
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
4169
13
    DORIS_CLOUD_DEFER {
4170
13
        unregister_recycle_task(task_name);
4171
13
        int64_t cost =
4172
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4173
13
        metrics_context.finish_report();
4174
4175
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4176
13
                .tag("instance_id", instance_id_)
4177
13
                .tag("num_scanned", num_scanned)
4178
13
                .tag("num_expired", num_expired)
4179
13
                .tag("num_recycled", num_recycled)
4180
13
                .tag("num_aborted", num_aborted);
4181
13
    };
4182
4183
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4184
4185
13
    std::vector<std::string_view> restore_job_keys;
4186
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4187
41
        ++num_scanned;
4188
41
        RestoreJobCloudPB restore_job_pb;
4189
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4190
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4191
0
            return -1;
4192
0
        }
4193
41
        int64_t expiration =
4194
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4195
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4196
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4197
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
4198
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
4199
0
                   << " state=" << restore_job_pb.state();
4200
41
        int64_t current_time = ::time(nullptr);
4201
41
        if (current_time < expiration) { // not expired
4202
0
            return 0;
4203
0
        }
4204
41
        ++num_expired;
4205
4206
41
        int64_t tablet_id = restore_job_pb.tablet_id();
4207
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
4208
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
4209
4210
41
        std::unique_ptr<Transaction> txn;
4211
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4212
41
        if (err != TxnErrorCode::TXN_OK) {
4213
0
            LOG_WARNING("failed to recycle restore job")
4214
0
                    .tag("err", err)
4215
0
                    .tag("tablet id", tablet_id)
4216
0
                    .tag("instance_id", instance_id_)
4217
0
                    .tag("reason", "failed to create txn");
4218
0
            return -1;
4219
0
        }
4220
4221
41
        std::string val;
4222
41
        err = txn->get(k, &val);
4223
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
4224
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
4225
0
            return 0;
4226
0
        }
4227
41
        if (err != TxnErrorCode::TXN_OK) {
4228
0
            LOG_WARNING("failed to get kv");
4229
0
            return -1;
4230
0
        }
4231
41
        restore_job_pb.Clear();
4232
41
        if (!restore_job_pb.ParseFromString(val)) {
4233
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
4234
0
            return -1;
4235
0
        }
4236
4237
        // PREPARED or COMMITTED, change state to DROPPED and return
4238
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
4239
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
4240
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
4241
0
            restore_job_pb.set_need_recycle_data(true);
4242
0
            txn->put(k, restore_job_pb.SerializeAsString());
4243
0
            err = txn->commit();
4244
0
            if (err != TxnErrorCode::TXN_OK) {
4245
0
                LOG_WARNING("failed to commit txn: {}", err);
4246
0
                return -1;
4247
0
            }
4248
0
            num_aborted++;
4249
0
            return 0;
4250
0
        }
4251
4252
        // Change state to RECYCLING
4253
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
4254
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
4255
21
            txn->put(k, restore_job_pb.SerializeAsString());
4256
21
            err = txn->commit();
4257
21
            if (err != TxnErrorCode::TXN_OK) {
4258
0
                LOG_WARNING("failed to commit txn: {}", err);
4259
0
                return -1;
4260
0
            }
4261
21
            return 0;
4262
21
        }
4263
4264
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4265
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4266
4267
        // Recycle all data associated with the restore job.
4268
        // This includes rowsets, segments, and related resources.
4269
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
4270
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
4271
0
            LOG_WARNING("failed to recycle tablet")
4272
0
                    .tag("tablet_id", tablet_id)
4273
0
                    .tag("instance_id", instance_id_);
4274
0
            return -1;
4275
0
        }
4276
4277
        // delete all restore job rowset kv
4278
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4279
4280
20
        err = txn->commit();
4281
20
        if (err != TxnErrorCode::TXN_OK) {
4282
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
4283
0
                    .tag("err", err)
4284
0
                    .tag("tablet id", tablet_id)
4285
0
                    .tag("instance_id", instance_id_)
4286
0
                    .tag("reason", "failed to commit txn");
4287
0
            return -1;
4288
0
        }
4289
4290
20
        metrics_context.total_recycled_num = ++num_recycled;
4291
20
        metrics_context.report();
4292
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4293
20
        restore_job_keys.push_back(k);
4294
4295
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
4296
20
                  << " tablet_id=" << tablet_id;
4297
20
        return 0;
4298
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4186
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4187
41
        ++num_scanned;
4188
41
        RestoreJobCloudPB restore_job_pb;
4189
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4190
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4191
0
            return -1;
4192
0
        }
4193
41
        int64_t expiration =
4194
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4195
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4196
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4197
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
4198
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
4199
0
                   << " state=" << restore_job_pb.state();
4200
41
        int64_t current_time = ::time(nullptr);
4201
41
        if (current_time < expiration) { // not expired
4202
0
            return 0;
4203
0
        }
4204
41
        ++num_expired;
4205
4206
41
        int64_t tablet_id = restore_job_pb.tablet_id();
4207
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
4208
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
4209
4210
41
        std::unique_ptr<Transaction> txn;
4211
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4212
41
        if (err != TxnErrorCode::TXN_OK) {
4213
0
            LOG_WARNING("failed to recycle restore job")
4214
0
                    .tag("err", err)
4215
0
                    .tag("tablet id", tablet_id)
4216
0
                    .tag("instance_id", instance_id_)
4217
0
                    .tag("reason", "failed to create txn");
4218
0
            return -1;
4219
0
        }
4220
4221
41
        std::string val;
4222
41
        err = txn->get(k, &val);
4223
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
4224
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
4225
0
            return 0;
4226
0
        }
4227
41
        if (err != TxnErrorCode::TXN_OK) {
4228
0
            LOG_WARNING("failed to get kv");
4229
0
            return -1;
4230
0
        }
4231
41
        restore_job_pb.Clear();
4232
41
        if (!restore_job_pb.ParseFromString(val)) {
4233
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
4234
0
            return -1;
4235
0
        }
4236
4237
        // PREPARED or COMMITTED, change state to DROPPED and return
4238
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
4239
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
4240
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
4241
0
            restore_job_pb.set_need_recycle_data(true);
4242
0
            txn->put(k, restore_job_pb.SerializeAsString());
4243
0
            err = txn->commit();
4244
0
            if (err != TxnErrorCode::TXN_OK) {
4245
0
                LOG_WARNING("failed to commit txn: {}", err);
4246
0
                return -1;
4247
0
            }
4248
0
            num_aborted++;
4249
0
            return 0;
4250
0
        }
4251
4252
        // Change state to RECYCLING
4253
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
4254
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
4255
21
            txn->put(k, restore_job_pb.SerializeAsString());
4256
21
            err = txn->commit();
4257
21
            if (err != TxnErrorCode::TXN_OK) {
4258
0
                LOG_WARNING("failed to commit txn: {}", err);
4259
0
                return -1;
4260
0
            }
4261
21
            return 0;
4262
21
        }
4263
4264
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4265
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4266
4267
        // Recycle all data associated with the restore job.
4268
        // This includes rowsets, segments, and related resources.
4269
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
4270
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
4271
0
            LOG_WARNING("failed to recycle tablet")
4272
0
                    .tag("tablet_id", tablet_id)
4273
0
                    .tag("instance_id", instance_id_);
4274
0
            return -1;
4275
0
        }
4276
4277
        // delete all restore job rowset kv
4278
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4279
4280
20
        err = txn->commit();
4281
20
        if (err != TxnErrorCode::TXN_OK) {
4282
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
4283
0
                    .tag("err", err)
4284
0
                    .tag("tablet id", tablet_id)
4285
0
                    .tag("instance_id", instance_id_)
4286
0
                    .tag("reason", "failed to commit txn");
4287
0
            return -1;
4288
0
        }
4289
4290
20
        metrics_context.total_recycled_num = ++num_recycled;
4291
20
        metrics_context.report();
4292
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4293
20
        restore_job_keys.push_back(k);
4294
4295
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
4296
20
                  << " tablet_id=" << tablet_id;
4297
20
        return 0;
4298
20
    };
4299
4300
13
    auto loop_done = [&restore_job_keys, this]() -> int {
4301
3
        if (restore_job_keys.empty()) return 0;
4302
1
        DORIS_CLOUD_DEFER {
4303
1
            restore_job_keys.clear();
4304
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4302
1
        DORIS_CLOUD_DEFER {
4303
1
            restore_job_keys.clear();
4304
1
        };
4305
4306
1
        std::unique_ptr<Transaction> txn;
4307
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4308
1
        if (err != TxnErrorCode::TXN_OK) {
4309
0
            LOG_WARNING("failed to recycle restore job")
4310
0
                    .tag("err", err)
4311
0
                    .tag("instance_id", instance_id_)
4312
0
                    .tag("reason", "failed to create txn");
4313
0
            return -1;
4314
0
        }
4315
20
        for (auto& k : restore_job_keys) {
4316
20
            txn->remove(k);
4317
20
        }
4318
1
        err = txn->commit();
4319
1
        if (err != TxnErrorCode::TXN_OK) {
4320
0
            LOG_WARNING("failed to recycle restore job")
4321
0
                    .tag("err", err)
4322
0
                    .tag("instance_id", instance_id_)
4323
0
                    .tag("reason", "failed to commit txn");
4324
0
            return -1;
4325
0
        }
4326
1
        return 0;
4327
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
4300
3
    auto loop_done = [&restore_job_keys, this]() -> int {
4301
3
        if (restore_job_keys.empty()) return 0;
4302
1
        DORIS_CLOUD_DEFER {
4303
1
            restore_job_keys.clear();
4304
1
        };
4305
4306
1
        std::unique_ptr<Transaction> txn;
4307
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4308
1
        if (err != TxnErrorCode::TXN_OK) {
4309
0
            LOG_WARNING("failed to recycle restore job")
4310
0
                    .tag("err", err)
4311
0
                    .tag("instance_id", instance_id_)
4312
0
                    .tag("reason", "failed to create txn");
4313
0
            return -1;
4314
0
        }
4315
20
        for (auto& k : restore_job_keys) {
4316
20
            txn->remove(k);
4317
20
        }
4318
1
        err = txn->commit();
4319
1
        if (err != TxnErrorCode::TXN_OK) {
4320
0
            LOG_WARNING("failed to recycle restore job")
4321
0
                    .tag("err", err)
4322
0
                    .tag("instance_id", instance_id_)
4323
0
                    .tag("reason", "failed to commit txn");
4324
0
            return -1;
4325
0
        }
4326
1
        return 0;
4327
1
    };
4328
4329
13
    if (config::enable_recycler_stats_metrics) {
4330
0
        scan_and_statistics_restore_jobs();
4331
0
    }
4332
4333
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
4334
13
                            std::move(loop_done));
4335
13
}
4336
4337
5
int InstanceRecycler::recycle_versioned_rowsets() {
4338
5
    const std::string task_name = "recycle_rowsets";
4339
5
    int64_t num_scanned = 0;
4340
5
    int64_t num_expired = 0;
4341
5
    int64_t num_prepare = 0;
4342
5
    int64_t num_compacted = 0;
4343
5
    int64_t num_empty_rowset = 0;
4344
5
    size_t total_rowset_key_size = 0;
4345
5
    size_t total_rowset_value_size = 0;
4346
5
    size_t expired_rowset_size = 0;
4347
5
    std::atomic_long num_recycled = 0;
4348
5
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4349
4350
5
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4351
5
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4352
5
    std::string recyc_rs_key0;
4353
5
    std::string recyc_rs_key1;
4354
5
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4355
5
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4356
4357
5
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4358
4359
5
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4360
5
    register_recycle_task(task_name, start_time);
4361
4362
5
    DORIS_CLOUD_DEFER {
4363
5
        unregister_recycle_task(task_name);
4364
5
        int64_t cost =
4365
5
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4366
5
        metrics_context.finish_report();
4367
5
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4368
5
                .tag("instance_id", instance_id_)
4369
5
                .tag("num_scanned", num_scanned)
4370
5
                .tag("num_expired", num_expired)
4371
5
                .tag("num_recycled", num_recycled)
4372
5
                .tag("num_recycled.prepare", num_prepare)
4373
5
                .tag("num_recycled.compacted", num_compacted)
4374
5
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4375
5
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4376
5
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4377
5
                .tag("expired_rowset_meta_size", expired_rowset_size);
4378
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
4362
5
    DORIS_CLOUD_DEFER {
4363
5
        unregister_recycle_task(task_name);
4364
5
        int64_t cost =
4365
5
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4366
5
        metrics_context.finish_report();
4367
5
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4368
5
                .tag("instance_id", instance_id_)
4369
5
                .tag("num_scanned", num_scanned)
4370
5
                .tag("num_expired", num_expired)
4371
5
                .tag("num_recycled", num_recycled)
4372
5
                .tag("num_recycled.prepare", num_prepare)
4373
5
                .tag("num_recycled.compacted", num_compacted)
4374
5
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4375
5
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4376
5
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4377
5
                .tag("expired_rowset_meta_size", expired_rowset_size);
4378
5
    };
4379
4380
5
    std::vector<std::string> orphan_rowset_keys;
4381
4382
    // Store keys of rowset recycled by background workers
4383
5
    std::mutex async_recycled_rowset_keys_mutex;
4384
5
    std::vector<std::string> async_recycled_rowset_keys;
4385
5
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4386
5
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4387
5
    worker_pool->start();
4388
5
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4389
5
                                            int64_t tablet_id, const std::string& rowset_id) {
4390
        // Try to delete rowset data in background thread
4391
0
        int ret = worker_pool->submit_with_timeout(
4392
0
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4393
0
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4394
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4395
0
                        return;
4396
0
                    }
4397
                    // The async recycled rowsets are staled format or has not been used,
4398
                    // so we don't need to check the rowset ref count key.
4399
0
                    std::vector<std::string> keys;
4400
0
                    {
4401
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4402
0
                        async_recycled_rowset_keys.push_back(std::move(key));
4403
0
                        if (async_recycled_rowset_keys.size() > 100) {
4404
0
                            keys.swap(async_recycled_rowset_keys);
4405
0
                        }
4406
0
                    }
4407
0
                    if (keys.empty()) return;
4408
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4409
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4410
0
                                     << instance_id_;
4411
0
                    } else {
4412
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4413
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4414
0
                                           num_recycled, start_time);
4415
0
                    }
4416
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
4417
0
                0);
4418
0
        if (ret == 0) return 0;
4419
        // Submit task failed, delete rowset data in current thread
4420
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4421
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4422
0
            return -1;
4423
0
        }
4424
0
        orphan_rowset_keys.push_back(std::move(key));
4425
0
        return 0;
4426
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
4427
4428
5
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4429
4430
13
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
4431
13
        ++num_scanned;
4432
13
        total_rowset_key_size += k.size();
4433
13
        total_rowset_value_size += v.size();
4434
13
        RecycleRowsetPB rowset;
4435
13
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4436
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4437
0
            return -1;
4438
0
        }
4439
4440
13
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4441
4442
13
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4443
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
4444
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4445
13
        int64_t current_time = ::time(nullptr);
4446
13
        if (current_time < final_expiration) { // not expired
4447
0
            return 0;
4448
0
        }
4449
13
        ++num_expired;
4450
13
        expired_rowset_size += v.size();
4451
13
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4452
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4453
                // in old version, keep this key-value pair and it needs to be checked manually
4454
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4455
0
                return -1;
4456
0
            }
4457
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4458
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4459
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4460
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4461
0
                orphan_rowset_keys.emplace_back(k);
4462
0
                return -1;
4463
0
            }
4464
            // decode rowset_id
4465
0
            auto k1 = k;
4466
0
            k1.remove_prefix(1);
4467
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4468
0
            decode_key(&k1, &out);
4469
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4470
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4471
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4472
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
4473
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4474
0
                                             rowset.tablet_id(), rowset_id) != 0) {
4475
0
                return -1;
4476
0
            }
4477
0
            return 0;
4478
0
        }
4479
        // TODO(plat1ko): check rowset not referenced
4480
13
        auto rowset_meta = rowset.mutable_rowset_meta();
4481
13
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4482
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4483
0
                LOG_INFO("recycle rowset that has empty resource id");
4484
0
            } else {
4485
                // other situations, keep this key-value pair and it needs to be checked manually
4486
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4487
0
                return -1;
4488
0
            }
4489
0
        }
4490
13
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4491
13
                  << " tablet_id=" << rowset_meta->tablet_id()
4492
13
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4493
13
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4494
13
                  << "] txn_id=" << rowset_meta->txn_id()
4495
13
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4496
13
                  << " rowset_meta_size=" << v.size()
4497
13
                  << " creation_time=" << rowset_meta->creation_time();
4498
13
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4499
            // unable to calculate file path, can only be deleted by rowset id prefix
4500
0
            num_prepare += 1;
4501
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4502
0
                                             rowset_meta->tablet_id(),
4503
0
                                             rowset_meta->rowset_id_v2()) != 0) {
4504
0
                return -1;
4505
0
            }
4506
13
        } else {
4507
13
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
4508
13
            worker_pool->submit(
4509
13
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
4510
13
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
4511
0
                            return;
4512
0
                        }
4513
13
                        num_compacted += is_compacted;
4514
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
4515
13
                        if (rowset_meta.num_segments() == 0) {
4516
0
                            ++num_empty_rowset;
4517
0
                        }
4518
13
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
4509
13
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
4510
13
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
4511
0
                            return;
4512
0
                        }
4513
13
                        num_compacted += is_compacted;
4514
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
4515
13
                        if (rowset_meta.num_segments() == 0) {
4516
0
                            ++num_empty_rowset;
4517
0
                        }
4518
13
                    });
4519
13
        }
4520
13
        return 0;
4521
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4430
13
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
4431
13
        ++num_scanned;
4432
13
        total_rowset_key_size += k.size();
4433
13
        total_rowset_value_size += v.size();
4434
13
        RecycleRowsetPB rowset;
4435
13
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4436
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4437
0
            return -1;
4438
0
        }
4439
4440
13
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4441
4442
13
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4443
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
4444
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4445
13
        int64_t current_time = ::time(nullptr);
4446
13
        if (current_time < final_expiration) { // not expired
4447
0
            return 0;
4448
0
        }
4449
13
        ++num_expired;
4450
13
        expired_rowset_size += v.size();
4451
13
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4452
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4453
                // in old version, keep this key-value pair and it needs to be checked manually
4454
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4455
0
                return -1;
4456
0
            }
4457
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4458
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4459
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4460
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4461
0
                orphan_rowset_keys.emplace_back(k);
4462
0
                return -1;
4463
0
            }
4464
            // decode rowset_id
4465
0
            auto k1 = k;
4466
0
            k1.remove_prefix(1);
4467
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4468
0
            decode_key(&k1, &out);
4469
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4470
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4471
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4472
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
4473
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4474
0
                                             rowset.tablet_id(), rowset_id) != 0) {
4475
0
                return -1;
4476
0
            }
4477
0
            return 0;
4478
0
        }
4479
        // TODO(plat1ko): check rowset not referenced
4480
13
        auto rowset_meta = rowset.mutable_rowset_meta();
4481
13
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4482
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4483
0
                LOG_INFO("recycle rowset that has empty resource id");
4484
0
            } else {
4485
                // other situations, keep this key-value pair and it needs to be checked manually
4486
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4487
0
                return -1;
4488
0
            }
4489
0
        }
4490
13
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4491
13
                  << " tablet_id=" << rowset_meta->tablet_id()
4492
13
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4493
13
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4494
13
                  << "] txn_id=" << rowset_meta->txn_id()
4495
13
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4496
13
                  << " rowset_meta_size=" << v.size()
4497
13
                  << " creation_time=" << rowset_meta->creation_time();
4498
13
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4499
            // unable to calculate file path, can only be deleted by rowset id prefix
4500
0
            num_prepare += 1;
4501
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4502
0
                                             rowset_meta->tablet_id(),
4503
0
                                             rowset_meta->rowset_id_v2()) != 0) {
4504
0
                return -1;
4505
0
            }
4506
13
        } else {
4507
13
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
4508
13
            worker_pool->submit(
4509
13
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
4510
13
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
4511
13
                            return;
4512
13
                        }
4513
13
                        num_compacted += is_compacted;
4514
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
4515
13
                        if (rowset_meta.num_segments() == 0) {
4516
13
                            ++num_empty_rowset;
4517
13
                        }
4518
13
                    });
4519
13
        }
4520
13
        return 0;
4521
13
    };
4522
4523
5
    if (config::enable_recycler_stats_metrics) {
4524
0
        scan_and_statistics_rowsets();
4525
0
    }
4526
4527
5
    auto loop_done = [&]() -> int {
4528
4
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
4529
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4530
0
        }
4531
4
        orphan_rowset_keys.clear();
4532
4
        return 0;
4533
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
4527
4
    auto loop_done = [&]() -> int {
4528
4
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
4529
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4530
0
        }
4531
4
        orphan_rowset_keys.clear();
4532
4
        return 0;
4533
4
    };
4534
4535
    // recycle_func and loop_done for scan and recycle
4536
5
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4537
5
                               std::move(loop_done));
4538
4539
5
    worker_pool->stop();
4540
4541
5
    if (!async_recycled_rowset_keys.empty()) {
4542
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
4543
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4544
0
            return -1;
4545
0
        } else {
4546
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
4547
0
        }
4548
0
    }
4549
5
    return ret;
4550
5
}
4551
4552
int InstanceRecycler::recycle_rowset_meta_and_data(std::string_view recycle_rowset_key,
4553
                                                   const RowsetMetaCloudPB& rowset_meta,
4554
43
                                                   std::string_view secondary_rowset_key) {
4555
43
    constexpr int MAX_RETRY = 10;
4556
43
    int64_t tablet_id = rowset_meta.tablet_id();
4557
43
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
4558
43
    std::string_view reference_instance_id = instance_id_;
4559
43
    if (rowset_meta.has_reference_instance_id()) {
4560
13
        reference_instance_id = rowset_meta.reference_instance_id();
4561
13
    }
4562
4563
43
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
4564
43
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
4565
43
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(recycle_rowset_key));
4566
43
    AnnotateTag instance_id_tag("instance_id", instance_id_);
4567
43
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
4568
48
    for (int i = 0; i < MAX_RETRY; ++i) {
4569
48
        std::unique_ptr<Transaction> txn;
4570
48
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4571
48
        if (err != TxnErrorCode::TXN_OK) {
4572
0
            LOG_WARNING("failed to create txn").tag("err", err);
4573
0
            return -1;
4574
0
        }
4575
4576
48
        std::string rowset_ref_count_key =
4577
48
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
4578
48
        int64_t ref_count = 0;
4579
48
        {
4580
48
            std::string value;
4581
48
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
4582
48
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
4583
                // This is the old version rowset, we could recycle it directly.
4584
2
                ref_count = 1;
4585
46
            } else if (err != TxnErrorCode::TXN_OK) {
4586
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
4587
0
                return -1;
4588
46
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
4589
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
4590
0
                return -1;
4591
0
            }
4592
48
        }
4593
4594
48
        if (ref_count == 1) {
4595
            // It would not be added since it is recycling.
4596
34
            if (delete_rowset_data(rowset_meta) != 0) {
4597
0
                LOG_WARNING("failed to delete rowset data");
4598
0
                return -1;
4599
0
            }
4600
4601
            // Reset the transaction to avoid timeout.
4602
34
            err = txn_kv_->create_txn(&txn);
4603
34
            if (err != TxnErrorCode::TXN_OK) {
4604
0
                LOG_WARNING("failed to create txn").tag("err", err);
4605
0
                return -1;
4606
0
            }
4607
34
            txn->remove(rowset_ref_count_key);
4608
34
            LOG_INFO("delete rowset data ref count key")
4609
34
                    .tag("txn_id", rowset_meta.txn_id())
4610
34
                    .tag("ref_count_key", hex(rowset_ref_count_key));
4611
4612
34
            std::string dbm_start_key =
4613
34
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
4614
34
            std::string dbm_end_key = meta_delete_bitmap_key(
4615
34
                    {reference_instance_id, tablet_id, rowset_id,
4616
34
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
4617
34
            txn->remove(dbm_start_key, dbm_end_key);
4618
34
            LOG_INFO("remove delete bitmap kv")
4619
34
                    .tag("begin", hex(dbm_start_key))
4620
34
                    .tag("end", hex(dbm_end_key));
4621
4622
34
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
4623
34
                    {reference_instance_id, tablet_id, rowset_id});
4624
34
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
4625
34
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
4626
34
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
4627
34
            LOG_INFO("remove versioned delete bitmap kv")
4628
34
                    .tag("begin", hex(versioned_dbm_start_key))
4629
34
                    .tag("end", hex(versioned_dbm_end_key));
4630
34
        } else {
4631
            // Decrease the rowset ref count.
4632
            //
4633
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
4634
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
4635
14
            txn->atomic_add(rowset_ref_count_key, -1);
4636
14
            LOG_INFO("decrease rowset data ref count")
4637
14
                    .tag("txn_id", rowset_meta.txn_id())
4638
14
                    .tag("ref_count", ref_count - 1)
4639
14
                    .tag("ref_count_key", hex(rowset_ref_count_key));
4640
14
        }
4641
4642
48
        txn->remove(recycle_rowset_key);
4643
48
        LOG_INFO("remove recycle rowset key").tag("key", hex(recycle_rowset_key));
4644
48
        if (!secondary_rowset_key.empty()) {
4645
35
            txn->remove(secondary_rowset_key);
4646
35
            LOG_INFO("remove secondary rowset key").tag("key", hex(secondary_rowset_key));
4647
35
        }
4648
4649
48
        err = txn->commit();
4650
48
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
4651
            // The rowset ref count key has been changed, we need to retry.
4652
5
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
4653
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
4654
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
4655
5
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
4656
5
            continue;
4657
43
        } else if (err != TxnErrorCode::TXN_OK) {
4658
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
4659
0
            return -1;
4660
0
        }
4661
43
        LOG_INFO("recycle rowset meta and data success");
4662
43
        return 0;
4663
48
    }
4664
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
4665
0
            .tag("tablet_id", tablet_id)
4666
0
            .tag("rowset_id", rowset_id)
4667
0
            .tag("retry", MAX_RETRY);
4668
0
    return -1;
4669
43
}
4670
4671
18
int InstanceRecycler::recycle_tmp_rowsets() {
4672
18
    const std::string task_name = "recycle_tmp_rowsets";
4673
18
    int64_t num_scanned = 0;
4674
18
    int64_t num_expired = 0;
4675
18
    std::atomic_long num_recycled = 0;
4676
18
    size_t expired_rowset_size = 0;
4677
18
    size_t total_rowset_key_size = 0;
4678
18
    size_t total_rowset_value_size = 0;
4679
18
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4680
4681
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
4682
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
4683
18
    std::string tmp_rs_key0;
4684
18
    std::string tmp_rs_key1;
4685
18
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
4686
18
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
4687
4688
18
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
4689
4690
18
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4691
18
    register_recycle_task(task_name, start_time);
4692
4693
18
    DORIS_CLOUD_DEFER {
4694
18
        unregister_recycle_task(task_name);
4695
18
        int64_t cost =
4696
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4697
18
        metrics_context.finish_report();
4698
18
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
4699
18
                .tag("instance_id", instance_id_)
4700
18
                .tag("num_scanned", num_scanned)
4701
18
                .tag("num_expired", num_expired)
4702
18
                .tag("num_recycled", num_recycled)
4703
18
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4704
18
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4705
18
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
4706
18
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
4693
4
    DORIS_CLOUD_DEFER {
4694
4
        unregister_recycle_task(task_name);
4695
4
        int64_t cost =
4696
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4697
4
        metrics_context.finish_report();
4698
4
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
4699
4
                .tag("instance_id", instance_id_)
4700
4
                .tag("num_scanned", num_scanned)
4701
4
                .tag("num_expired", num_expired)
4702
4
                .tag("num_recycled", num_recycled)
4703
4
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4704
4
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4705
4
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
4706
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
4693
14
    DORIS_CLOUD_DEFER {
4694
14
        unregister_recycle_task(task_name);
4695
14
        int64_t cost =
4696
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4697
14
        metrics_context.finish_report();
4698
14
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
4699
14
                .tag("instance_id", instance_id_)
4700
14
                .tag("num_scanned", num_scanned)
4701
14
                .tag("num_expired", num_expired)
4702
14
                .tag("num_recycled", num_recycled)
4703
14
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4704
14
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4705
14
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
4706
14
    };
4707
4708
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
4709
4710
18
    std::vector<std::string> tmp_rowset_keys;
4711
18
    std::vector<std::string> tmp_rowset_ref_count_keys;
4712
4713
    // rowset_id -> rowset_meta
4714
    // store tmp_rowset id and meta for statistics rs size when delete
4715
18
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
4716
18
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4717
18
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
4718
18
    worker_pool->start();
4719
4720
18
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4721
4722
18
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
4723
18
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
4724
18
                             &earlest_ts, &tmp_rowset_ref_count_keys, this,
4725
57.0k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
4726
57.0k
        ++num_scanned;
4727
57.0k
        total_rowset_key_size += k.size();
4728
57.0k
        total_rowset_value_size += v.size();
4729
57.0k
        doris::RowsetMetaCloudPB rowset;
4730
57.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4731
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
4732
0
            return -1;
4733
0
        }
4734
57.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4735
57.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4736
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4737
0
                   << " txn_expiration=" << rowset.txn_expiration()
4738
0
                   << " rowset_creation_time=" << rowset.creation_time();
4739
57.0k
        int64_t current_time = ::time(nullptr);
4740
57.0k
        if (current_time < expiration) { // not expired
4741
0
            return 0;
4742
0
        }
4743
4744
57.0k
        DCHECK_GT(rowset.txn_id(), 0)
4745
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
4746
57.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
4747
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
4748
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
4749
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
4750
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
4751
2.00k
                      << "] txn_id=" << rowset.txn_id()
4752
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
4753
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
4754
2.00k
            return 0;
4755
2.00k
        }
4756
4757
55.0k
        ++num_expired;
4758
55.0k
        expired_rowset_size += v.size();
4759
55.0k
        if (!rowset.has_resource_id()) {
4760
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
4761
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
4762
0
                return -1;
4763
0
            }
4764
            // might be a delete pred rowset
4765
4.00k
            tmp_rowset_keys.emplace_back(k);
4766
4.00k
            return 0;
4767
4.00k
        }
4768
        // TODO(plat1ko): check rowset not referenced
4769
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4770
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
4771
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
4772
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
4773
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
4774
51.0k
                  << " num_expired=" << num_expired
4775
51.0k
                  << " task_type=" << metrics_context.operation_type;
4776
4777
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
4778
        // Remove the rowset ref count key directly since it has not been used.
4779
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
4780
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
4781
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
4782
51.0k
                  << "key=" << hex(rowset_ref_count_key);
4783
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
4784
4785
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
4786
51.0k
        return 0;
4787
55.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4725
6.00k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
4726
6.00k
        ++num_scanned;
4727
6.00k
        total_rowset_key_size += k.size();
4728
6.00k
        total_rowset_value_size += v.size();
4729
6.00k
        doris::RowsetMetaCloudPB rowset;
4730
6.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4731
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
4732
0
            return -1;
4733
0
        }
4734
6.00k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4735
6.00k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4736
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4737
0
                   << " txn_expiration=" << rowset.txn_expiration()
4738
0
                   << " rowset_creation_time=" << rowset.creation_time();
4739
6.00k
        int64_t current_time = ::time(nullptr);
4740
6.00k
        if (current_time < expiration) { // not expired
4741
0
            return 0;
4742
0
        }
4743
4744
6.00k
        DCHECK_GT(rowset.txn_id(), 0)
4745
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
4746
6.00k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
4747
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
4748
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
4749
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
4750
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
4751
2.00k
                      << "] txn_id=" << rowset.txn_id()
4752
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
4753
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
4754
2.00k
            return 0;
4755
2.00k
        }
4756
4757
4.00k
        ++num_expired;
4758
4.00k
        expired_rowset_size += v.size();
4759
4.00k
        if (!rowset.has_resource_id()) {
4760
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
4761
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
4762
0
                return -1;
4763
0
            }
4764
            // might be a delete pred rowset
4765
4.00k
            tmp_rowset_keys.emplace_back(k);
4766
4.00k
            return 0;
4767
4.00k
        }
4768
        // TODO(plat1ko): check rowset not referenced
4769
0
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4770
0
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
4771
0
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
4772
0
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
4773
0
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
4774
0
                  << " num_expired=" << num_expired
4775
0
                  << " task_type=" << metrics_context.operation_type;
4776
4777
0
        tmp_rowset_keys.emplace_back(k.data(), k.size());
4778
        // Remove the rowset ref count key directly since it has not been used.
4779
0
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
4780
0
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
4781
0
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
4782
0
                  << "key=" << hex(rowset_ref_count_key);
4783
0
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
4784
4785
0
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
4786
0
        return 0;
4787
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4725
51.0k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
4726
51.0k
        ++num_scanned;
4727
51.0k
        total_rowset_key_size += k.size();
4728
51.0k
        total_rowset_value_size += v.size();
4729
51.0k
        doris::RowsetMetaCloudPB rowset;
4730
51.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4731
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
4732
0
            return -1;
4733
0
        }
4734
51.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4735
51.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4736
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4737
0
                   << " txn_expiration=" << rowset.txn_expiration()
4738
0
                   << " rowset_creation_time=" << rowset.creation_time();
4739
51.0k
        int64_t current_time = ::time(nullptr);
4740
51.0k
        if (current_time < expiration) { // not expired
4741
0
            return 0;
4742
0
        }
4743
4744
51.0k
        DCHECK_GT(rowset.txn_id(), 0)
4745
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
4746
51.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
4747
0
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
4748
0
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
4749
0
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
4750
0
                      << rowset.start_version() << '-' << rowset.end_version()
4751
0
                      << "] txn_id=" << rowset.txn_id()
4752
0
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
4753
0
                      << " txn_expiration=" << rowset.txn_expiration();
4754
0
            return 0;
4755
0
        }
4756
4757
51.0k
        ++num_expired;
4758
51.0k
        expired_rowset_size += v.size();
4759
51.0k
        if (!rowset.has_resource_id()) {
4760
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
4761
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
4762
0
                return -1;
4763
0
            }
4764
            // might be a delete pred rowset
4765
0
            tmp_rowset_keys.emplace_back(k);
4766
0
            return 0;
4767
0
        }
4768
        // TODO(plat1ko): check rowset not referenced
4769
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4770
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
4771
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
4772
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
4773
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
4774
51.0k
                  << " num_expired=" << num_expired
4775
51.0k
                  << " task_type=" << metrics_context.operation_type;
4776
4777
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
4778
        // Remove the rowset ref count key directly since it has not been used.
4779
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
4780
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
4781
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
4782
51.0k
                  << "key=" << hex(rowset_ref_count_key);
4783
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
4784
4785
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
4786
51.0k
        return 0;
4787
51.0k
    };
4788
4789
    // TODO bacth delete
4790
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4791
51.0k
        std::string dbm_start_key =
4792
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4793
51.0k
        std::string dbm_end_key = dbm_start_key;
4794
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
4795
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4796
51.0k
        if (ret != 0) {
4797
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4798
0
                         << instance_id_ << ", tablet_id=" << tablet_id
4799
0
                         << ", rowset_id=" << rowset_id;
4800
0
        }
4801
51.0k
        return ret;
4802
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4790
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4791
51.0k
        std::string dbm_start_key =
4792
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4793
51.0k
        std::string dbm_end_key = dbm_start_key;
4794
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
4795
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4796
51.0k
        if (ret != 0) {
4797
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4798
0
                         << instance_id_ << ", tablet_id=" << tablet_id
4799
0
                         << ", rowset_id=" << rowset_id;
4800
0
        }
4801
51.0k
        return ret;
4802
51.0k
    };
4803
4804
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4805
51.0k
        auto delete_bitmap_start =
4806
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
4807
51.0k
        auto delete_bitmap_end =
4808
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
4809
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
4810
51.0k
        if (ret != 0) {
4811
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
4812
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
4813
0
        }
4814
51.0k
        return ret;
4815
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4804
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4805
51.0k
        auto delete_bitmap_start =
4806
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
4807
51.0k
        auto delete_bitmap_end =
4808
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
4809
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
4810
51.0k
        if (ret != 0) {
4811
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
4812
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
4813
0
        }
4814
51.0k
        return ret;
4815
51.0k
    };
4816
4817
18
    auto loop_done = [&]() -> int {
4818
10
        DORIS_CLOUD_DEFER {
4819
10
            tmp_rowset_keys.clear();
4820
10
            tmp_rowsets.clear();
4821
10
            tmp_rowset_ref_count_keys.clear();
4822
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4818
3
        DORIS_CLOUD_DEFER {
4819
3
            tmp_rowset_keys.clear();
4820
3
            tmp_rowsets.clear();
4821
3
            tmp_rowset_ref_count_keys.clear();
4822
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4818
7
        DORIS_CLOUD_DEFER {
4819
7
            tmp_rowset_keys.clear();
4820
7
            tmp_rowsets.clear();
4821
7
            tmp_rowset_ref_count_keys.clear();
4822
7
        };
4823
10
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
4824
10
                             tmp_rowsets_to_delete = tmp_rowsets,
4825
10
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4826
10
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4827
10
                                   metrics_context) != 0) {
4828
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4829
0
                return;
4830
0
            }
4831
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4832
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4833
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4834
0
                                 << rs.ShortDebugString();
4835
0
                    return;
4836
0
                }
4837
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4838
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4839
0
                                 << rs.ShortDebugString();
4840
0
                    return;
4841
0
                }
4842
51.0k
            }
4843
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4844
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4845
0
                return;
4846
0
            }
4847
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4848
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4849
0
                return;
4850
0
            }
4851
10
            num_recycled += tmp_rowset_keys.size();
4852
10
            return;
4853
10
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4825
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4826
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4827
3
                                   metrics_context) != 0) {
4828
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4829
0
                return;
4830
0
            }
4831
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4832
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4833
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4834
0
                                 << rs.ShortDebugString();
4835
0
                    return;
4836
0
                }
4837
0
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4838
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4839
0
                                 << rs.ShortDebugString();
4840
0
                    return;
4841
0
                }
4842
0
            }
4843
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4844
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4845
0
                return;
4846
0
            }
4847
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4848
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4849
0
                return;
4850
0
            }
4851
3
            num_recycled += tmp_rowset_keys.size();
4852
3
            return;
4853
3
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4825
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4826
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4827
7
                                   metrics_context) != 0) {
4828
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4829
0
                return;
4830
0
            }
4831
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4832
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4833
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4834
0
                                 << rs.ShortDebugString();
4835
0
                    return;
4836
0
                }
4837
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4838
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4839
0
                                 << rs.ShortDebugString();
4840
0
                    return;
4841
0
                }
4842
51.0k
            }
4843
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4844
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4845
0
                return;
4846
0
            }
4847
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4848
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4849
0
                return;
4850
0
            }
4851
7
            num_recycled += tmp_rowset_keys.size();
4852
7
            return;
4853
7
        });
4854
10
        return 0;
4855
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
4817
3
    auto loop_done = [&]() -> int {
4818
3
        DORIS_CLOUD_DEFER {
4819
3
            tmp_rowset_keys.clear();
4820
3
            tmp_rowsets.clear();
4821
3
            tmp_rowset_ref_count_keys.clear();
4822
3
        };
4823
3
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
4824
3
                             tmp_rowsets_to_delete = tmp_rowsets,
4825
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4826
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4827
3
                                   metrics_context) != 0) {
4828
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4829
3
                return;
4830
3
            }
4831
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4832
3
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4833
3
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4834
3
                                 << rs.ShortDebugString();
4835
3
                    return;
4836
3
                }
4837
3
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4838
3
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4839
3
                                 << rs.ShortDebugString();
4840
3
                    return;
4841
3
                }
4842
3
            }
4843
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4844
3
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4845
3
                return;
4846
3
            }
4847
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4848
3
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4849
3
                return;
4850
3
            }
4851
3
            num_recycled += tmp_rowset_keys.size();
4852
3
            return;
4853
3
        });
4854
3
        return 0;
4855
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
4817
7
    auto loop_done = [&]() -> int {
4818
7
        DORIS_CLOUD_DEFER {
4819
7
            tmp_rowset_keys.clear();
4820
7
            tmp_rowsets.clear();
4821
7
            tmp_rowset_ref_count_keys.clear();
4822
7
        };
4823
7
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
4824
7
                             tmp_rowsets_to_delete = tmp_rowsets,
4825
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4826
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4827
7
                                   metrics_context) != 0) {
4828
7
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4829
7
                return;
4830
7
            }
4831
7
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4832
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4833
7
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4834
7
                                 << rs.ShortDebugString();
4835
7
                    return;
4836
7
                }
4837
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4838
7
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4839
7
                                 << rs.ShortDebugString();
4840
7
                    return;
4841
7
                }
4842
7
            }
4843
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4844
7
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4845
7
                return;
4846
7
            }
4847
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4848
7
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4849
7
                return;
4850
7
            }
4851
7
            num_recycled += tmp_rowset_keys.size();
4852
7
            return;
4853
7
        });
4854
7
        return 0;
4855
7
    };
4856
4857
18
    if (config::enable_recycler_stats_metrics) {
4858
0
        scan_and_statistics_tmp_rowsets();
4859
0
    }
4860
    // recycle_func and loop_done for scan and recycle
4861
18
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
4862
18
                               std::move(loop_done));
4863
4864
18
    worker_pool->stop();
4865
18
    return ret;
4866
18
}
4867
4868
int InstanceRecycler::scan_and_recycle(
4869
        std::string begin, std::string_view end,
4870
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
4871
228
        std::function<int()> loop_done) {
4872
228
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
4873
228
    int ret = 0;
4874
228
    int64_t cnt = 0;
4875
228
    int get_range_retried = 0;
4876
228
    std::string err;
4877
228
    DORIS_CLOUD_DEFER_COPY(begin, end) {
4878
228
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
4879
228
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
4880
228
                  << " ret=" << ret << " err=" << err;
4881
228
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
4877
19
    DORIS_CLOUD_DEFER_COPY(begin, end) {
4878
19
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
4879
19
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
4880
19
                  << " ret=" << ret << " err=" << err;
4881
19
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
4877
209
    DORIS_CLOUD_DEFER_COPY(begin, end) {
4878
209
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
4879
209
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
4880
209
                  << " ret=" << ret << " err=" << err;
4881
209
    };
4882
4883
228
    std::unique_ptr<RangeGetIterator> it;
4884
256
    do {
4885
256
        if (get_range_retried > 1000) {
4886
0
            err = "txn_get exceeds max retry, may not scan all keys";
4887
0
            ret = -1;
4888
0
            return -1;
4889
0
        }
4890
256
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
4891
256
        if (get_ret != 0) { // txn kv may complain "Request for future version"
4892
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
4893
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
4894
0
                         << " get_range_retried=" << get_range_retried;
4895
0
            ++get_range_retried;
4896
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
4897
0
            continue; // try again
4898
0
        }
4899
256
        if (!it->has_next()) {
4900
126
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
4901
126
            break; // scan finished
4902
126
        }
4903
99.5k
        while (it->has_next()) {
4904
99.4k
            ++cnt;
4905
            // recycle corresponding resources
4906
99.4k
            auto [k, v] = it->next();
4907
99.4k
            if (!it->has_next()) {
4908
130
                begin = k;
4909
130
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
4910
130
            }
4911
            // if we want to continue scanning, the recycle_func should not return non-zero
4912
99.4k
            if (recycle_func(k, v) != 0) {
4913
4.00k
                err = "recycle_func error";
4914
4.00k
                ret = -1;
4915
4.00k
            }
4916
99.4k
        }
4917
130
        begin.push_back('\x00'); // Update to next smallest key for iteration
4918
        // if we want to continue scanning, the recycle_func should not return non-zero
4919
130
        if (loop_done && loop_done() != 0) {
4920
3
            err = "loop_done error";
4921
3
            ret = -1;
4922
3
        }
4923
130
    } while (it->more() && !stopped());
4924
228
    return ret;
4925
228
}
4926
4927
20
int InstanceRecycler::abort_timeout_txn() {
4928
20
    const std::string task_name = "abort_timeout_txn";
4929
20
    int64_t num_scanned = 0;
4930
20
    int64_t num_timeout = 0;
4931
20
    int64_t num_abort = 0;
4932
20
    int64_t num_advance = 0;
4933
20
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4934
4935
20
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
4936
20
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
4937
20
    std::string begin_txn_running_key;
4938
20
    std::string end_txn_running_key;
4939
20
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
4940
20
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
4941
4942
20
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
4943
4944
20
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4945
20
    register_recycle_task(task_name, start_time);
4946
4947
20
    DORIS_CLOUD_DEFER {
4948
20
        unregister_recycle_task(task_name);
4949
20
        int64_t cost =
4950
20
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4951
20
        metrics_context.finish_report();
4952
20
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
4953
20
                .tag("instance_id", instance_id_)
4954
20
                .tag("num_scanned", num_scanned)
4955
20
                .tag("num_timeout", num_timeout)
4956
20
                .tag("num_abort", num_abort)
4957
20
                .tag("num_advance", num_advance);
4958
20
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
4947
4
    DORIS_CLOUD_DEFER {
4948
4
        unregister_recycle_task(task_name);
4949
4
        int64_t cost =
4950
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4951
4
        metrics_context.finish_report();
4952
4
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
4953
4
                .tag("instance_id", instance_id_)
4954
4
                .tag("num_scanned", num_scanned)
4955
4
                .tag("num_timeout", num_timeout)
4956
4
                .tag("num_abort", num_abort)
4957
4
                .tag("num_advance", num_advance);
4958
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
4947
16
    DORIS_CLOUD_DEFER {
4948
16
        unregister_recycle_task(task_name);
4949
16
        int64_t cost =
4950
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4951
16
        metrics_context.finish_report();
4952
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
4953
16
                .tag("instance_id", instance_id_)
4954
16
                .tag("num_scanned", num_scanned)
4955
16
                .tag("num_timeout", num_timeout)
4956
16
                .tag("num_abort", num_abort)
4957
16
                .tag("num_advance", num_advance);
4958
16
    };
4959
4960
20
    int64_t current_time =
4961
20
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4962
4963
20
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
4964
20
                                  &current_time, &metrics_context,
4965
20
                                  this](std::string_view k, std::string_view v) -> int {
4966
10
        ++num_scanned;
4967
4968
10
        std::unique_ptr<Transaction> txn;
4969
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4970
10
        if (err != TxnErrorCode::TXN_OK) {
4971
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4972
0
            return -1;
4973
0
        }
4974
10
        std::string_view k1 = k;
4975
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
4976
10
        k1.remove_prefix(1); // Remove key space
4977
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4978
10
        if (decode_key(&k1, &out) != 0) {
4979
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
4980
0
            return -1;
4981
0
        }
4982
10
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4983
10
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4984
10
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4985
        // Update txn_info
4986
10
        std::string txn_inf_key, txn_inf_val;
4987
10
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
4988
10
        err = txn->get(txn_inf_key, &txn_inf_val);
4989
10
        if (err != TxnErrorCode::TXN_OK) {
4990
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
4991
0
            return -1;
4992
0
        }
4993
10
        TxnInfoPB txn_info;
4994
10
        if (!txn_info.ParseFromString(txn_inf_val)) {
4995
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
4996
0
            return -1;
4997
0
        }
4998
4999
10
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5000
4
            txn.reset();
5001
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5002
4
            std::shared_ptr<TxnLazyCommitTask> task =
5003
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5004
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5005
4
            if (ret.first != MetaServiceCode::OK) {
5006
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5007
0
                             << "msg=" << ret.second;
5008
0
                return -1;
5009
0
            }
5010
4
            ++num_advance;
5011
4
            return 0;
5012
6
        } else {
5013
6
            TxnRunningPB txn_running_pb;
5014
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5015
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5016
0
                return -1;
5017
0
            }
5018
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5019
4
                return 0;
5020
4
            }
5021
2
            ++num_timeout;
5022
5023
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5024
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5025
2
            txn_info.set_finish_time(current_time);
5026
2
            txn_info.set_reason("timeout");
5027
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5028
2
            txn_inf_val.clear();
5029
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5030
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5031
0
                return -1;
5032
0
            }
5033
2
            txn->put(txn_inf_key, txn_inf_val);
5034
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5035
            // Put recycle txn key
5036
2
            std::string recyc_txn_key, recyc_txn_val;
5037
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5038
2
            RecycleTxnPB recycle_txn_pb;
5039
2
            recycle_txn_pb.set_creation_time(current_time);
5040
2
            recycle_txn_pb.set_label(txn_info.label());
5041
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5042
0
                LOG_WARNING("failed to serialize txn recycle info")
5043
0
                        .tag("key", hex(k))
5044
0
                        .tag("db_id", db_id)
5045
0
                        .tag("txn_id", txn_id);
5046
0
                return -1;
5047
0
            }
5048
2
            txn->put(recyc_txn_key, recyc_txn_val);
5049
            // Remove txn running key
5050
2
            txn->remove(k);
5051
2
            err = txn->commit();
5052
2
            if (err != TxnErrorCode::TXN_OK) {
5053
0
                LOG_WARNING("failed to commit txn err={}", err)
5054
0
                        .tag("key", hex(k))
5055
0
                        .tag("db_id", db_id)
5056
0
                        .tag("txn_id", txn_id);
5057
0
                return -1;
5058
0
            }
5059
2
            metrics_context.total_recycled_num = ++num_abort;
5060
2
            metrics_context.report();
5061
2
        }
5062
5063
2
        return 0;
5064
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4965
4
                                  this](std::string_view k, std::string_view v) -> int {
4966
4
        ++num_scanned;
4967
4968
4
        std::unique_ptr<Transaction> txn;
4969
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4970
4
        if (err != TxnErrorCode::TXN_OK) {
4971
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4972
0
            return -1;
4973
0
        }
4974
4
        std::string_view k1 = k;
4975
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
4976
4
        k1.remove_prefix(1); // Remove key space
4977
4
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4978
4
        if (decode_key(&k1, &out) != 0) {
4979
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
4980
0
            return -1;
4981
0
        }
4982
4
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4983
4
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4984
4
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4985
        // Update txn_info
4986
4
        std::string txn_inf_key, txn_inf_val;
4987
4
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
4988
4
        err = txn->get(txn_inf_key, &txn_inf_val);
4989
4
        if (err != TxnErrorCode::TXN_OK) {
4990
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
4991
0
            return -1;
4992
0
        }
4993
4
        TxnInfoPB txn_info;
4994
4
        if (!txn_info.ParseFromString(txn_inf_val)) {
4995
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
4996
0
            return -1;
4997
0
        }
4998
4999
4
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5000
4
            txn.reset();
5001
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5002
4
            std::shared_ptr<TxnLazyCommitTask> task =
5003
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5004
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5005
4
            if (ret.first != MetaServiceCode::OK) {
5006
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5007
0
                             << "msg=" << ret.second;
5008
0
                return -1;
5009
0
            }
5010
4
            ++num_advance;
5011
4
            return 0;
5012
4
        } else {
5013
0
            TxnRunningPB txn_running_pb;
5014
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5015
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5016
0
                return -1;
5017
0
            }
5018
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5019
0
                return 0;
5020
0
            }
5021
0
            ++num_timeout;
5022
5023
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5024
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5025
0
            txn_info.set_finish_time(current_time);
5026
0
            txn_info.set_reason("timeout");
5027
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5028
0
            txn_inf_val.clear();
5029
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5030
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5031
0
                return -1;
5032
0
            }
5033
0
            txn->put(txn_inf_key, txn_inf_val);
5034
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5035
            // Put recycle txn key
5036
0
            std::string recyc_txn_key, recyc_txn_val;
5037
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5038
0
            RecycleTxnPB recycle_txn_pb;
5039
0
            recycle_txn_pb.set_creation_time(current_time);
5040
0
            recycle_txn_pb.set_label(txn_info.label());
5041
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5042
0
                LOG_WARNING("failed to serialize txn recycle info")
5043
0
                        .tag("key", hex(k))
5044
0
                        .tag("db_id", db_id)
5045
0
                        .tag("txn_id", txn_id);
5046
0
                return -1;
5047
0
            }
5048
0
            txn->put(recyc_txn_key, recyc_txn_val);
5049
            // Remove txn running key
5050
0
            txn->remove(k);
5051
0
            err = txn->commit();
5052
0
            if (err != TxnErrorCode::TXN_OK) {
5053
0
                LOG_WARNING("failed to commit txn err={}", err)
5054
0
                        .tag("key", hex(k))
5055
0
                        .tag("db_id", db_id)
5056
0
                        .tag("txn_id", txn_id);
5057
0
                return -1;
5058
0
            }
5059
0
            metrics_context.total_recycled_num = ++num_abort;
5060
0
            metrics_context.report();
5061
0
        }
5062
5063
0
        return 0;
5064
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4965
6
                                  this](std::string_view k, std::string_view v) -> int {
4966
6
        ++num_scanned;
4967
4968
6
        std::unique_ptr<Transaction> txn;
4969
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4970
6
        if (err != TxnErrorCode::TXN_OK) {
4971
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4972
0
            return -1;
4973
0
        }
4974
6
        std::string_view k1 = k;
4975
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
4976
6
        k1.remove_prefix(1); // Remove key space
4977
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4978
6
        if (decode_key(&k1, &out) != 0) {
4979
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
4980
0
            return -1;
4981
0
        }
4982
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4983
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4984
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4985
        // Update txn_info
4986
6
        std::string txn_inf_key, txn_inf_val;
4987
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
4988
6
        err = txn->get(txn_inf_key, &txn_inf_val);
4989
6
        if (err != TxnErrorCode::TXN_OK) {
4990
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
4991
0
            return -1;
4992
0
        }
4993
6
        TxnInfoPB txn_info;
4994
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
4995
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
4996
0
            return -1;
4997
0
        }
4998
4999
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5000
0
            txn.reset();
5001
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5002
0
            std::shared_ptr<TxnLazyCommitTask> task =
5003
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5004
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5005
0
            if (ret.first != MetaServiceCode::OK) {
5006
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5007
0
                             << "msg=" << ret.second;
5008
0
                return -1;
5009
0
            }
5010
0
            ++num_advance;
5011
0
            return 0;
5012
6
        } else {
5013
6
            TxnRunningPB txn_running_pb;
5014
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5015
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5016
0
                return -1;
5017
0
            }
5018
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5019
4
                return 0;
5020
4
            }
5021
2
            ++num_timeout;
5022
5023
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5024
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5025
2
            txn_info.set_finish_time(current_time);
5026
2
            txn_info.set_reason("timeout");
5027
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5028
2
            txn_inf_val.clear();
5029
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5030
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5031
0
                return -1;
5032
0
            }
5033
2
            txn->put(txn_inf_key, txn_inf_val);
5034
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5035
            // Put recycle txn key
5036
2
            std::string recyc_txn_key, recyc_txn_val;
5037
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5038
2
            RecycleTxnPB recycle_txn_pb;
5039
2
            recycle_txn_pb.set_creation_time(current_time);
5040
2
            recycle_txn_pb.set_label(txn_info.label());
5041
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5042
0
                LOG_WARNING("failed to serialize txn recycle info")
5043
0
                        .tag("key", hex(k))
5044
0
                        .tag("db_id", db_id)
5045
0
                        .tag("txn_id", txn_id);
5046
0
                return -1;
5047
0
            }
5048
2
            txn->put(recyc_txn_key, recyc_txn_val);
5049
            // Remove txn running key
5050
2
            txn->remove(k);
5051
2
            err = txn->commit();
5052
2
            if (err != TxnErrorCode::TXN_OK) {
5053
0
                LOG_WARNING("failed to commit txn err={}", err)
5054
0
                        .tag("key", hex(k))
5055
0
                        .tag("db_id", db_id)
5056
0
                        .tag("txn_id", txn_id);
5057
0
                return -1;
5058
0
            }
5059
2
            metrics_context.total_recycled_num = ++num_abort;
5060
2
            metrics_context.report();
5061
2
        }
5062
5063
2
        return 0;
5064
6
    };
5065
5066
20
    if (config::enable_recycler_stats_metrics) {
5067
0
        scan_and_statistics_abort_timeout_txn();
5068
0
    }
5069
    // recycle_func and loop_done for scan and recycle
5070
20
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
5071
20
                            std::move(handle_txn_running_kv));
5072
20
}
5073
5074
21
int InstanceRecycler::recycle_expired_txn_label() {
5075
21
    const std::string task_name = "recycle_expired_txn_label";
5076
21
    int64_t num_scanned = 0;
5077
21
    int64_t num_expired = 0;
5078
21
    int64_t num_recycled = 0;
5079
21
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5080
21
    int ret = 0;
5081
5082
21
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
5083
21
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5084
21
    std::string begin_recycle_txn_key;
5085
21
    std::string end_recycle_txn_key;
5086
21
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
5087
21
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
5088
21
    std::vector<std::string> recycle_txn_info_keys;
5089
5090
21
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
5091
5092
21
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5093
21
    register_recycle_task(task_name, start_time);
5094
21
    DORIS_CLOUD_DEFER {
5095
21
        unregister_recycle_task(task_name);
5096
21
        int64_t cost =
5097
21
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5098
21
        metrics_context.finish_report();
5099
21
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5100
21
                .tag("instance_id", instance_id_)
5101
21
                .tag("num_scanned", num_scanned)
5102
21
                .tag("num_expired", num_expired)
5103
21
                .tag("num_recycled", num_recycled);
5104
21
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5094
3
    DORIS_CLOUD_DEFER {
5095
3
        unregister_recycle_task(task_name);
5096
3
        int64_t cost =
5097
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5098
3
        metrics_context.finish_report();
5099
3
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5100
3
                .tag("instance_id", instance_id_)
5101
3
                .tag("num_scanned", num_scanned)
5102
3
                .tag("num_expired", num_expired)
5103
3
                .tag("num_recycled", num_recycled);
5104
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5094
18
    DORIS_CLOUD_DEFER {
5095
18
        unregister_recycle_task(task_name);
5096
18
        int64_t cost =
5097
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5098
18
        metrics_context.finish_report();
5099
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5100
18
                .tag("instance_id", instance_id_)
5101
18
                .tag("num_scanned", num_scanned)
5102
18
                .tag("num_expired", num_expired)
5103
18
                .tag("num_recycled", num_recycled);
5104
18
    };
5105
5106
21
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5107
5108
21
    SyncExecutor<int> concurrent_delete_executor(
5109
21
            _thread_pool_group.s3_producer_pool,
5110
21
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
5111
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5111
3
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5111
23.0k
            [](const int& ret) { return ret != 0; });
5112
5113
21
    int64_t current_time_ms =
5114
21
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5115
5116
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5117
30.0k
        ++num_scanned;
5118
30.0k
        RecycleTxnPB recycle_txn_pb;
5119
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5120
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5121
0
            return -1;
5122
0
        }
5123
30.0k
        if ((config::force_immediate_recycle) ||
5124
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5125
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5126
30.0k
             current_time_ms)) {
5127
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5128
23.0k
            num_expired++;
5129
23.0k
            recycle_txn_info_keys.emplace_back(k);
5130
23.0k
        }
5131
30.0k
        return 0;
5132
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5116
3
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5117
3
        ++num_scanned;
5118
3
        RecycleTxnPB recycle_txn_pb;
5119
3
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5120
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5121
0
            return -1;
5122
0
        }
5123
3
        if ((config::force_immediate_recycle) ||
5124
3
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5125
3
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5126
3
             current_time_ms)) {
5127
3
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5128
3
            num_expired++;
5129
3
            recycle_txn_info_keys.emplace_back(k);
5130
3
        }
5131
3
        return 0;
5132
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5116
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5117
30.0k
        ++num_scanned;
5118
30.0k
        RecycleTxnPB recycle_txn_pb;
5119
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5120
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5121
0
            return -1;
5122
0
        }
5123
30.0k
        if ((config::force_immediate_recycle) ||
5124
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5125
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5126
30.0k
             current_time_ms)) {
5127
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5128
23.0k
            num_expired++;
5129
23.0k
            recycle_txn_info_keys.emplace_back(k);
5130
23.0k
        }
5131
30.0k
        return 0;
5132
30.0k
    };
5133
5134
    // int 0 for success, 1 for conflict, -1 for error
5135
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5136
23.0k
        std::string_view k1 = k;
5137
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5138
23.0k
        k1.remove_prefix(1); // Remove key space
5139
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5140
23.0k
        int ret = decode_key(&k1, &out);
5141
23.0k
        if (ret != 0) {
5142
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5143
0
            return -1;
5144
0
        }
5145
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5146
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5147
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5148
23.0k
        std::unique_ptr<Transaction> txn;
5149
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5150
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5151
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5152
0
            return -1;
5153
0
        }
5154
        // Remove txn index kv
5155
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
5156
23.0k
        txn->remove(index_key);
5157
        // Remove txn info kv
5158
23.0k
        std::string info_key, info_val;
5159
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5160
23.0k
        err = txn->get(info_key, &info_val);
5161
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5162
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5163
0
            return -1;
5164
0
        }
5165
23.0k
        TxnInfoPB txn_info;
5166
23.0k
        if (!txn_info.ParseFromString(info_val)) {
5167
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5168
0
            return -1;
5169
0
        }
5170
23.0k
        txn->remove(info_key);
5171
        // Remove sub txn index kvs
5172
23.0k
        std::vector<std::string> sub_txn_index_keys;
5173
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5174
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5175
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
5176
22.9k
        }
5177
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5178
22.9k
            txn->remove(sub_txn_index_key);
5179
22.9k
        }
5180
        // Update txn label
5181
23.0k
        std::string label_key, label_val;
5182
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5183
23.0k
        err = txn->get(label_key, &label_val);
5184
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5185
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5186
0
                         << " err=" << err;
5187
0
            return -1;
5188
0
        }
5189
23.0k
        TxnLabelPB txn_label;
5190
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5191
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5192
0
            return -1;
5193
0
        }
5194
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5195
23.0k
        if (it != txn_label.txn_ids().end()) {
5196
23.0k
            txn_label.mutable_txn_ids()->erase(it);
5197
23.0k
        }
5198
23.0k
        if (txn_label.txn_ids().empty()) {
5199
23.0k
            txn->remove(label_key);
5200
23.0k
            TEST_SYNC_POINT_CALLBACK(
5201
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5202
23.0k
        } else {
5203
73
            if (!txn_label.SerializeToString(&label_val)) {
5204
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5205
0
                return -1;
5206
0
            }
5207
73
            TEST_SYNC_POINT_CALLBACK(
5208
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5209
73
            txn->atomic_set_ver_value(label_key, label_val);
5210
73
            TEST_SYNC_POINT_CALLBACK(
5211
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5212
73
        }
5213
        // Remove recycle txn kv
5214
23.0k
        txn->remove(k);
5215
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5216
23.0k
        err = txn->commit();
5217
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5218
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
5219
62
                TEST_SYNC_POINT_CALLBACK(
5220
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5221
                // log the txn_id and label
5222
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5223
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5224
62
                             << " txn_label=" << txn_info.label();
5225
62
                return 1;
5226
62
            }
5227
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5228
0
            return -1;
5229
62
        }
5230
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
5231
23.0k
        metrics_context.report();
5232
5233
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5234
23.0k
        return 0;
5235
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5135
3
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5136
3
        std::string_view k1 = k;
5137
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5138
3
        k1.remove_prefix(1); // Remove key space
5139
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5140
3
        int ret = decode_key(&k1, &out);
5141
3
        if (ret != 0) {
5142
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5143
0
            return -1;
5144
0
        }
5145
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5146
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5147
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5148
3
        std::unique_ptr<Transaction> txn;
5149
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5150
3
        if (err != TxnErrorCode::TXN_OK) {
5151
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5152
0
            return -1;
5153
0
        }
5154
        // Remove txn index kv
5155
3
        auto index_key = txn_index_key({instance_id_, txn_id});
5156
3
        txn->remove(index_key);
5157
        // Remove txn info kv
5158
3
        std::string info_key, info_val;
5159
3
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5160
3
        err = txn->get(info_key, &info_val);
5161
3
        if (err != TxnErrorCode::TXN_OK) {
5162
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5163
0
            return -1;
5164
0
        }
5165
3
        TxnInfoPB txn_info;
5166
3
        if (!txn_info.ParseFromString(info_val)) {
5167
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5168
0
            return -1;
5169
0
        }
5170
3
        txn->remove(info_key);
5171
        // Remove sub txn index kvs
5172
3
        std::vector<std::string> sub_txn_index_keys;
5173
3
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5174
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5175
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
5176
0
        }
5177
3
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5178
0
            txn->remove(sub_txn_index_key);
5179
0
        }
5180
        // Update txn label
5181
3
        std::string label_key, label_val;
5182
3
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5183
3
        err = txn->get(label_key, &label_val);
5184
3
        if (err != TxnErrorCode::TXN_OK) {
5185
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5186
0
                         << " err=" << err;
5187
0
            return -1;
5188
0
        }
5189
3
        TxnLabelPB txn_label;
5190
3
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5191
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5192
0
            return -1;
5193
0
        }
5194
3
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5195
3
        if (it != txn_label.txn_ids().end()) {
5196
3
            txn_label.mutable_txn_ids()->erase(it);
5197
3
        }
5198
3
        if (txn_label.txn_ids().empty()) {
5199
3
            txn->remove(label_key);
5200
3
            TEST_SYNC_POINT_CALLBACK(
5201
3
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5202
3
        } else {
5203
0
            if (!txn_label.SerializeToString(&label_val)) {
5204
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5205
0
                return -1;
5206
0
            }
5207
0
            TEST_SYNC_POINT_CALLBACK(
5208
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5209
0
            txn->atomic_set_ver_value(label_key, label_val);
5210
0
            TEST_SYNC_POINT_CALLBACK(
5211
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5212
0
        }
5213
        // Remove recycle txn kv
5214
3
        txn->remove(k);
5215
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5216
3
        err = txn->commit();
5217
3
        if (err != TxnErrorCode::TXN_OK) {
5218
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
5219
0
                TEST_SYNC_POINT_CALLBACK(
5220
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5221
                // log the txn_id and label
5222
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5223
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5224
0
                             << " txn_label=" << txn_info.label();
5225
0
                return 1;
5226
0
            }
5227
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5228
0
            return -1;
5229
0
        }
5230
3
        metrics_context.total_recycled_num = ++num_recycled;
5231
3
        metrics_context.report();
5232
5233
3
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5234
3
        return 0;
5235
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5135
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5136
23.0k
        std::string_view k1 = k;
5137
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5138
23.0k
        k1.remove_prefix(1); // Remove key space
5139
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5140
23.0k
        int ret = decode_key(&k1, &out);
5141
23.0k
        if (ret != 0) {
5142
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5143
0
            return -1;
5144
0
        }
5145
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5146
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5147
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5148
23.0k
        std::unique_ptr<Transaction> txn;
5149
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5150
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5151
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5152
0
            return -1;
5153
0
        }
5154
        // Remove txn index kv
5155
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
5156
23.0k
        txn->remove(index_key);
5157
        // Remove txn info kv
5158
23.0k
        std::string info_key, info_val;
5159
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5160
23.0k
        err = txn->get(info_key, &info_val);
5161
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5162
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5163
0
            return -1;
5164
0
        }
5165
23.0k
        TxnInfoPB txn_info;
5166
23.0k
        if (!txn_info.ParseFromString(info_val)) {
5167
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5168
0
            return -1;
5169
0
        }
5170
23.0k
        txn->remove(info_key);
5171
        // Remove sub txn index kvs
5172
23.0k
        std::vector<std::string> sub_txn_index_keys;
5173
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5174
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5175
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
5176
22.9k
        }
5177
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5178
22.9k
            txn->remove(sub_txn_index_key);
5179
22.9k
        }
5180
        // Update txn label
5181
23.0k
        std::string label_key, label_val;
5182
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5183
23.0k
        err = txn->get(label_key, &label_val);
5184
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5185
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5186
0
                         << " err=" << err;
5187
0
            return -1;
5188
0
        }
5189
23.0k
        TxnLabelPB txn_label;
5190
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5191
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5192
0
            return -1;
5193
0
        }
5194
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5195
23.0k
        if (it != txn_label.txn_ids().end()) {
5196
23.0k
            txn_label.mutable_txn_ids()->erase(it);
5197
23.0k
        }
5198
23.0k
        if (txn_label.txn_ids().empty()) {
5199
23.0k
            txn->remove(label_key);
5200
23.0k
            TEST_SYNC_POINT_CALLBACK(
5201
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5202
23.0k
        } else {
5203
73
            if (!txn_label.SerializeToString(&label_val)) {
5204
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5205
0
                return -1;
5206
0
            }
5207
73
            TEST_SYNC_POINT_CALLBACK(
5208
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5209
73
            txn->atomic_set_ver_value(label_key, label_val);
5210
73
            TEST_SYNC_POINT_CALLBACK(
5211
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5212
73
        }
5213
        // Remove recycle txn kv
5214
23.0k
        txn->remove(k);
5215
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5216
23.0k
        err = txn->commit();
5217
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5218
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
5219
62
                TEST_SYNC_POINT_CALLBACK(
5220
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5221
                // log the txn_id and label
5222
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5223
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5224
62
                             << " txn_label=" << txn_info.label();
5225
62
                return 1;
5226
62
            }
5227
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5228
0
            return -1;
5229
62
        }
5230
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
5231
23.0k
        metrics_context.report();
5232
5233
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5234
23.0k
        return 0;
5235
23.0k
    };
5236
5237
21
    auto loop_done = [&]() -> int {
5238
12
        DORIS_CLOUD_DEFER {
5239
12
            recycle_txn_info_keys.clear();
5240
12
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5238
3
        DORIS_CLOUD_DEFER {
5239
3
            recycle_txn_info_keys.clear();
5240
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5238
9
        DORIS_CLOUD_DEFER {
5239
9
            recycle_txn_info_keys.clear();
5240
9
        };
5241
12
        TEST_SYNC_POINT_CALLBACK(
5242
12
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5243
12
                &recycle_txn_info_keys);
5244
23.0k
        for (const auto& k : recycle_txn_info_keys) {
5245
23.0k
            concurrent_delete_executor.add([&]() {
5246
23.0k
                int ret = delete_recycle_txn_kv(k);
5247
23.0k
                if (ret == 1) {
5248
18
                    constexpr int MAX_RETRY = 10;
5249
54
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
5250
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5251
54
                        ret = delete_recycle_txn_kv(k);
5252
                        // clang-format off
5253
54
                        TEST_SYNC_POINT_CALLBACK(
5254
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5255
                        // clang-format off
5256
54
                        if (ret != 1) {
5257
18
                            break;
5258
18
                        }
5259
                        // random sleep 0-100 ms to retry
5260
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5261
36
                    }
5262
18
                }
5263
23.0k
                if (ret != 0) {
5264
9
                    LOG_WARNING("failed to delete recycle txn kv")
5265
9
                            .tag("instance id", instance_id_)
5266
9
                            .tag("key", hex(k));
5267
9
                    return -1;
5268
9
                }
5269
23.0k
                return 0;
5270
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5245
3
            concurrent_delete_executor.add([&]() {
5246
3
                int ret = delete_recycle_txn_kv(k);
5247
3
                if (ret == 1) {
5248
0
                    constexpr int MAX_RETRY = 10;
5249
0
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
5250
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5251
0
                        ret = delete_recycle_txn_kv(k);
5252
                        // clang-format off
5253
0
                        TEST_SYNC_POINT_CALLBACK(
5254
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5255
                        // clang-format off
5256
0
                        if (ret != 1) {
5257
0
                            break;
5258
0
                        }
5259
                        // random sleep 0-100 ms to retry
5260
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5261
0
                    }
5262
0
                }
5263
3
                if (ret != 0) {
5264
0
                    LOG_WARNING("failed to delete recycle txn kv")
5265
0
                            .tag("instance id", instance_id_)
5266
0
                            .tag("key", hex(k));
5267
0
                    return -1;
5268
0
                }
5269
3
                return 0;
5270
3
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5245
23.0k
            concurrent_delete_executor.add([&]() {
5246
23.0k
                int ret = delete_recycle_txn_kv(k);
5247
23.0k
                if (ret == 1) {
5248
18
                    constexpr int MAX_RETRY = 10;
5249
54
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
5250
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5251
54
                        ret = delete_recycle_txn_kv(k);
5252
                        // clang-format off
5253
54
                        TEST_SYNC_POINT_CALLBACK(
5254
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5255
                        // clang-format off
5256
54
                        if (ret != 1) {
5257
18
                            break;
5258
18
                        }
5259
                        // random sleep 0-100 ms to retry
5260
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5261
36
                    }
5262
18
                }
5263
23.0k
                if (ret != 0) {
5264
9
                    LOG_WARNING("failed to delete recycle txn kv")
5265
9
                            .tag("instance id", instance_id_)
5266
9
                            .tag("key", hex(k));
5267
9
                    return -1;
5268
9
                }
5269
23.0k
                return 0;
5270
23.0k
            });
5271
23.0k
        }
5272
12
        bool finished = true;
5273
12
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5274
23.0k
        for (int r : rets) {
5275
23.0k
            if (r != 0) {
5276
9
                ret = -1;
5277
9
            }
5278
23.0k
        }
5279
5280
12
        ret = finished ? ret : -1;
5281
5282
12
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5283
5284
12
        if (ret != 0) {
5285
3
            LOG_WARNING("recycle txn kv ret!=0")
5286
3
                    .tag("finished", finished)
5287
3
                    .tag("ret", ret)
5288
3
                    .tag("instance_id", instance_id_);
5289
3
            return ret;
5290
3
        }
5291
9
        return ret;
5292
12
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
5237
3
    auto loop_done = [&]() -> int {
5238
3
        DORIS_CLOUD_DEFER {
5239
3
            recycle_txn_info_keys.clear();
5240
3
        };
5241
3
        TEST_SYNC_POINT_CALLBACK(
5242
3
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5243
3
                &recycle_txn_info_keys);
5244
3
        for (const auto& k : recycle_txn_info_keys) {
5245
3
            concurrent_delete_executor.add([&]() {
5246
3
                int ret = delete_recycle_txn_kv(k);
5247
3
                if (ret == 1) {
5248
3
                    constexpr int MAX_RETRY = 10;
5249
3
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
5250
3
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5251
3
                        ret = delete_recycle_txn_kv(k);
5252
                        // clang-format off
5253
3
                        TEST_SYNC_POINT_CALLBACK(
5254
3
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5255
                        // clang-format off
5256
3
                        if (ret != 1) {
5257
3
                            break;
5258
3
                        }
5259
                        // random sleep 0-100 ms to retry
5260
3
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5261
3
                    }
5262
3
                }
5263
3
                if (ret != 0) {
5264
3
                    LOG_WARNING("failed to delete recycle txn kv")
5265
3
                            .tag("instance id", instance_id_)
5266
3
                            .tag("key", hex(k));
5267
3
                    return -1;
5268
3
                }
5269
3
                return 0;
5270
3
            });
5271
3
        }
5272
3
        bool finished = true;
5273
3
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5274
3
        for (int r : rets) {
5275
3
            if (r != 0) {
5276
0
                ret = -1;
5277
0
            }
5278
3
        }
5279
5280
3
        ret = finished ? ret : -1;
5281
5282
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5283
5284
3
        if (ret != 0) {
5285
0
            LOG_WARNING("recycle txn kv ret!=0")
5286
0
                    .tag("finished", finished)
5287
0
                    .tag("ret", ret)
5288
0
                    .tag("instance_id", instance_id_);
5289
0
            return ret;
5290
0
        }
5291
3
        return ret;
5292
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
5237
9
    auto loop_done = [&]() -> int {
5238
9
        DORIS_CLOUD_DEFER {
5239
9
            recycle_txn_info_keys.clear();
5240
9
        };
5241
9
        TEST_SYNC_POINT_CALLBACK(
5242
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5243
9
                &recycle_txn_info_keys);
5244
23.0k
        for (const auto& k : recycle_txn_info_keys) {
5245
23.0k
            concurrent_delete_executor.add([&]() {
5246
23.0k
                int ret = delete_recycle_txn_kv(k);
5247
23.0k
                if (ret == 1) {
5248
23.0k
                    constexpr int MAX_RETRY = 10;
5249
23.0k
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
5250
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5251
23.0k
                        ret = delete_recycle_txn_kv(k);
5252
                        // clang-format off
5253
23.0k
                        TEST_SYNC_POINT_CALLBACK(
5254
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5255
                        // clang-format off
5256
23.0k
                        if (ret != 1) {
5257
23.0k
                            break;
5258
23.0k
                        }
5259
                        // random sleep 0-100 ms to retry
5260
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5261
23.0k
                    }
5262
23.0k
                }
5263
23.0k
                if (ret != 0) {
5264
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
5265
23.0k
                            .tag("instance id", instance_id_)
5266
23.0k
                            .tag("key", hex(k));
5267
23.0k
                    return -1;
5268
23.0k
                }
5269
23.0k
                return 0;
5270
23.0k
            });
5271
23.0k
        }
5272
9
        bool finished = true;
5273
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5274
23.0k
        for (int r : rets) {
5275
23.0k
            if (r != 0) {
5276
9
                ret = -1;
5277
9
            }
5278
23.0k
        }
5279
5280
9
        ret = finished ? ret : -1;
5281
5282
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5283
5284
9
        if (ret != 0) {
5285
3
            LOG_WARNING("recycle txn kv ret!=0")
5286
3
                    .tag("finished", finished)
5287
3
                    .tag("ret", ret)
5288
3
                    .tag("instance_id", instance_id_);
5289
3
            return ret;
5290
3
        }
5291
6
        return ret;
5292
9
    };
5293
5294
21
    if (config::enable_recycler_stats_metrics) {
5295
0
        scan_and_statistics_expired_txn_label();
5296
0
    }
5297
    // recycle_func and loop_done for scan and recycle
5298
21
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
5299
21
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
5300
21
}
5301
5302
struct CopyJobIdTuple {
5303
    std::string instance_id;
5304
    std::string stage_id;
5305
    long table_id;
5306
    std::string copy_id;
5307
    std::string stage_path;
5308
};
5309
struct BatchObjStoreAccessor {
5310
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
5311
                          TxnKv* txn_kv)
5312
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
5313
3
    ~BatchObjStoreAccessor() {
5314
3
        if (!paths_.empty()) {
5315
3
            consume();
5316
3
        }
5317
3
    }
5318
5319
    /**
5320
    * To implicitely do batch work and submit the batch delete task to s3
5321
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
5322
    *
5323
    * @param copy_job The protubuf struct consists of the copy job files.
5324
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
5325
    *            it would last until we finish the delete task, here we need pass one string value
5326
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
5327
    */
5328
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
5329
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
5330
5
        auto& file_keys = copy_file_keys_[key];
5331
5
        file_keys.log_trace =
5332
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
5333
5
                            instance_id, stage_id, table_id, copy_id, path);
5334
5
        std::string_view log_trace = file_keys.log_trace;
5335
2.03k
        for (const auto& file : copy_job.object_files()) {
5336
2.03k
            auto relative_path = file.relative_path();
5337
2.03k
            paths_.push_back(relative_path);
5338
2.03k
            file_keys.keys.push_back(copy_file_key(
5339
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
5340
2.03k
            LOG_INFO(log_trace)
5341
2.03k
                    .tag("relative_path", relative_path)
5342
2.03k
                    .tag("batch_count", batch_count_);
5343
2.03k
        }
5344
5
        LOG_INFO(log_trace)
5345
5
                .tag("objects_num", copy_job.object_files().size())
5346
5
                .tag("batch_count", batch_count_);
5347
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
5348
        // recommend using delete objects when objects num is less than 10)
5349
5
        if (paths_.size() < 1000) {
5350
3
            return;
5351
3
        }
5352
2
        consume();
5353
2
    }
5354
5355
private:
5356
5
    void consume() {
5357
5
        DORIS_CLOUD_DEFER {
5358
5
            paths_.clear();
5359
5
            copy_file_keys_.clear();
5360
5
            batch_count_++;
5361
5362
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
5363
5
                        batch_count_);
5364
5
        };
5365
5366
5
        StopWatch sw;
5367
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
5368
5
        if (0 != accessor_->delete_files(paths_)) {
5369
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
5370
2
                        paths_.size(), batch_count_, sw.elapsed_us());
5371
2
            return;
5372
2
        }
5373
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
5374
3
                    paths_.size(), batch_count_, sw.elapsed_us());
5375
        // delete fdb's keys
5376
3
        for (auto& file_keys : copy_file_keys_) {
5377
3
            auto& [log_trace, keys] = file_keys.second;
5378
3
            std::unique_ptr<Transaction> txn;
5379
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
5380
0
                LOG(WARNING) << "failed to create txn";
5381
0
                continue;
5382
0
            }
5383
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
5384
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
5385
            // limited, should not cause the txn commit failed.
5386
1.02k
            for (const auto& key : keys) {
5387
1.02k
                txn->remove(key);
5388
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
5389
1.02k
            }
5390
3
            txn->remove(file_keys.first);
5391
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
5392
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
5393
0
                continue;
5394
0
            }
5395
3
        }
5396
3
    }
5397
    std::shared_ptr<StorageVaultAccessor> accessor_;
5398
    // the path of the s3 files to be deleted
5399
    std::vector<std::string> paths_;
5400
    struct CopyFiles {
5401
        std::string log_trace;
5402
        std::vector<std::string> keys;
5403
    };
5404
    // pair<std::string, std::vector<std::string>>
5405
    // first: instance_id_ stage_id table_id query_id
5406
    // second: keys to be deleted
5407
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
5408
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
5409
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
5410
    // which can together uniquely identifies different tasks for tracing log
5411
    uint64_t& batch_count_;
5412
    TxnKv* txn_kv_;
5413
};
5414
5415
13
int InstanceRecycler::recycle_copy_jobs() {
5416
13
    int64_t num_scanned = 0;
5417
13
    int64_t num_finished = 0;
5418
13
    int64_t num_expired = 0;
5419
13
    int64_t num_recycled = 0;
5420
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
5421
13
    uint64_t batch_count = 0;
5422
13
    const std::string task_name = "recycle_copy_jobs";
5423
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5424
5425
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
5426
5427
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5428
13
    register_recycle_task(task_name, start_time);
5429
5430
13
    DORIS_CLOUD_DEFER {
5431
13
        unregister_recycle_task(task_name);
5432
13
        int64_t cost =
5433
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5434
13
        metrics_context.finish_report();
5435
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
5436
13
                .tag("instance_id", instance_id_)
5437
13
                .tag("num_scanned", num_scanned)
5438
13
                .tag("num_finished", num_finished)
5439
13
                .tag("num_expired", num_expired)
5440
13
                .tag("num_recycled", num_recycled);
5441
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
5430
13
    DORIS_CLOUD_DEFER {
5431
13
        unregister_recycle_task(task_name);
5432
13
        int64_t cost =
5433
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5434
13
        metrics_context.finish_report();
5435
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
5436
13
                .tag("instance_id", instance_id_)
5437
13
                .tag("num_scanned", num_scanned)
5438
13
                .tag("num_finished", num_finished)
5439
13
                .tag("num_expired", num_expired)
5440
13
                .tag("num_recycled", num_recycled);
5441
13
    };
5442
5443
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
5444
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
5445
13
    std::string key0;
5446
13
    std::string key1;
5447
13
    copy_job_key(key_info0, &key0);
5448
13
    copy_job_key(key_info1, &key1);
5449
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
5450
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
5451
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
5452
16
                         this](std::string_view k, std::string_view v) -> int {
5453
16
        ++num_scanned;
5454
16
        CopyJobPB copy_job;
5455
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
5456
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
5457
0
            return -1;
5458
0
        }
5459
5460
        // decode copy job key
5461
16
        auto k1 = k;
5462
16
        k1.remove_prefix(1);
5463
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5464
16
        decode_key(&k1, &out);
5465
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
5466
        // -> CopyJobPB
5467
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
5468
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
5469
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
5470
5471
16
        bool check_storage = true;
5472
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
5473
12
            ++num_finished;
5474
5475
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
5476
7
                auto it = stage_accessor_map.find(stage_id);
5477
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
5478
7
                std::string_view path;
5479
7
                if (it != stage_accessor_map.end()) {
5480
2
                    accessor = it->second;
5481
5
                } else {
5482
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
5483
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
5484
5
                                                      &inner_accessor);
5485
5
                    if (ret < 0) { // error
5486
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
5487
0
                        return -1;
5488
5
                    } else if (ret == 0) {
5489
3
                        path = inner_accessor->uri();
5490
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
5491
3
                                inner_accessor, batch_count, txn_kv_.get());
5492
3
                        stage_accessor_map.emplace(stage_id, accessor);
5493
3
                    } else { // stage not found, skip check storage
5494
2
                        check_storage = false;
5495
2
                    }
5496
5
                }
5497
7
                if (check_storage) {
5498
                    // TODO delete objects with key and etag is not supported
5499
5
                    accessor->add(std::move(copy_job), std::string(k),
5500
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
5501
5
                    return 0;
5502
5
                }
5503
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
5504
5
                int64_t current_time =
5505
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5506
5
                if (copy_job.finish_time_ms() > 0) {
5507
2
                    if (!config::force_immediate_recycle &&
5508
2
                        current_time < copy_job.finish_time_ms() +
5509
2
                                               config::copy_job_max_retention_second * 1000) {
5510
1
                        return 0;
5511
1
                    }
5512
3
                } else {
5513
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
5514
3
                    if (!config::force_immediate_recycle &&
5515
3
                        current_time < copy_job.start_time_ms() +
5516
3
                                               config::copy_job_max_retention_second * 1000) {
5517
1
                        return 0;
5518
1
                    }
5519
3
                }
5520
5
            }
5521
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
5522
4
            int64_t current_time =
5523
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5524
            // if copy job is timeout: delete all copy file kvs and copy job kv
5525
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
5526
2
                return 0;
5527
2
            }
5528
2
            ++num_expired;
5529
2
        }
5530
5531
        // delete all copy files
5532
7
        std::vector<std::string> copy_file_keys;
5533
70
        for (auto& file : copy_job.object_files()) {
5534
70
            copy_file_keys.push_back(copy_file_key(
5535
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
5536
70
        }
5537
7
        std::unique_ptr<Transaction> txn;
5538
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
5539
0
            LOG(WARNING) << "failed to create txn";
5540
0
            return -1;
5541
0
        }
5542
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
5543
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
5544
        // limited, should not cause the txn commit failed.
5545
70
        for (const auto& key : copy_file_keys) {
5546
70
            txn->remove(key);
5547
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
5548
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
5549
70
                      << ", query_id=" << copy_id;
5550
70
        }
5551
7
        txn->remove(k);
5552
7
        TxnErrorCode err = txn->commit();
5553
7
        if (err != TxnErrorCode::TXN_OK) {
5554
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
5555
0
            return -1;
5556
0
        }
5557
5558
7
        metrics_context.total_recycled_num = ++num_recycled;
5559
7
        metrics_context.report();
5560
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5561
7
        return 0;
5562
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5452
16
                         this](std::string_view k, std::string_view v) -> int {
5453
16
        ++num_scanned;
5454
16
        CopyJobPB copy_job;
5455
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
5456
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
5457
0
            return -1;
5458
0
        }
5459
5460
        // decode copy job key
5461
16
        auto k1 = k;
5462
16
        k1.remove_prefix(1);
5463
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5464
16
        decode_key(&k1, &out);
5465
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
5466
        // -> CopyJobPB
5467
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
5468
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
5469
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
5470
5471
16
        bool check_storage = true;
5472
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
5473
12
            ++num_finished;
5474
5475
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
5476
7
                auto it = stage_accessor_map.find(stage_id);
5477
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
5478
7
                std::string_view path;
5479
7
                if (it != stage_accessor_map.end()) {
5480
2
                    accessor = it->second;
5481
5
                } else {
5482
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
5483
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
5484
5
                                                      &inner_accessor);
5485
5
                    if (ret < 0) { // error
5486
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
5487
0
                        return -1;
5488
5
                    } else if (ret == 0) {
5489
3
                        path = inner_accessor->uri();
5490
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
5491
3
                                inner_accessor, batch_count, txn_kv_.get());
5492
3
                        stage_accessor_map.emplace(stage_id, accessor);
5493
3
                    } else { // stage not found, skip check storage
5494
2
                        check_storage = false;
5495
2
                    }
5496
5
                }
5497
7
                if (check_storage) {
5498
                    // TODO delete objects with key and etag is not supported
5499
5
                    accessor->add(std::move(copy_job), std::string(k),
5500
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
5501
5
                    return 0;
5502
5
                }
5503
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
5504
5
                int64_t current_time =
5505
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5506
5
                if (copy_job.finish_time_ms() > 0) {
5507
2
                    if (!config::force_immediate_recycle &&
5508
2
                        current_time < copy_job.finish_time_ms() +
5509
2
                                               config::copy_job_max_retention_second * 1000) {
5510
1
                        return 0;
5511
1
                    }
5512
3
                } else {
5513
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
5514
3
                    if (!config::force_immediate_recycle &&
5515
3
                        current_time < copy_job.start_time_ms() +
5516
3
                                               config::copy_job_max_retention_second * 1000) {
5517
1
                        return 0;
5518
1
                    }
5519
3
                }
5520
5
            }
5521
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
5522
4
            int64_t current_time =
5523
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5524
            // if copy job is timeout: delete all copy file kvs and copy job kv
5525
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
5526
2
                return 0;
5527
2
            }
5528
2
            ++num_expired;
5529
2
        }
5530
5531
        // delete all copy files
5532
7
        std::vector<std::string> copy_file_keys;
5533
70
        for (auto& file : copy_job.object_files()) {
5534
70
            copy_file_keys.push_back(copy_file_key(
5535
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
5536
70
        }
5537
7
        std::unique_ptr<Transaction> txn;
5538
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
5539
0
            LOG(WARNING) << "failed to create txn";
5540
0
            return -1;
5541
0
        }
5542
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
5543
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
5544
        // limited, should not cause the txn commit failed.
5545
70
        for (const auto& key : copy_file_keys) {
5546
70
            txn->remove(key);
5547
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
5548
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
5549
70
                      << ", query_id=" << copy_id;
5550
70
        }
5551
7
        txn->remove(k);
5552
7
        TxnErrorCode err = txn->commit();
5553
7
        if (err != TxnErrorCode::TXN_OK) {
5554
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
5555
0
            return -1;
5556
0
        }
5557
5558
7
        metrics_context.total_recycled_num = ++num_recycled;
5559
7
        metrics_context.report();
5560
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5561
7
        return 0;
5562
7
    };
5563
5564
13
    if (config::enable_recycler_stats_metrics) {
5565
0
        scan_and_statistics_copy_jobs();
5566
0
    }
5567
    // recycle_func and loop_done for scan and recycle
5568
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
5569
13
}
5570
5571
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
5572
                                             const StagePB::StageType& stage_type,
5573
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
5574
5
#ifdef UNIT_TEST
5575
    // In unit test, external use the same accessor as the internal stage
5576
5
    auto it = accessor_map_.find(stage_id);
5577
5
    if (it != accessor_map_.end()) {
5578
3
        *accessor = it->second;
5579
3
    } else {
5580
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
5581
2
        return 1;
5582
2
    }
5583
#else
5584
    // init s3 accessor and add to accessor map
5585
    auto stage_it =
5586
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
5587
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
5588
5589
    if (stage_it == instance_info_.stages().end()) {
5590
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
5591
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
5592
        return 1;
5593
    }
5594
5595
    const auto& object_store_info = stage_it->obj_info();
5596
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
5597
5598
    S3Conf s3_conf;
5599
    if (stage_type == StagePB::EXTERNAL) {
5600
        if (stage_access_type == StagePB::AKSK) {
5601
            auto conf = S3Conf::from_obj_store_info(object_store_info);
5602
            if (!conf) {
5603
                return -1;
5604
            }
5605
5606
            s3_conf = std::move(*conf);
5607
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
5608
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
5609
            if (!conf) {
5610
                return -1;
5611
            }
5612
5613
            s3_conf = std::move(*conf);
5614
            if (instance_info_.ram_user().has_encryption_info()) {
5615
                AkSkPair plain_ak_sk_pair;
5616
                int ret = decrypt_ak_sk_helper(
5617
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
5618
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
5619
                if (ret != 0) {
5620
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
5621
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
5622
                    return -1;
5623
                }
5624
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
5625
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
5626
            } else {
5627
                s3_conf.ak = instance_info_.ram_user().ak();
5628
                s3_conf.sk = instance_info_.ram_user().sk();
5629
            }
5630
        } else {
5631
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
5632
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
5633
            return -1;
5634
        }
5635
    } else if (stage_type == StagePB::INTERNAL) {
5636
        int idx = stoi(object_store_info.id());
5637
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5638
            LOG(WARNING) << "invalid idx: " << idx;
5639
            return -1;
5640
        }
5641
5642
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
5643
        auto conf = S3Conf::from_obj_store_info(old_obj);
5644
        if (!conf) {
5645
            return -1;
5646
        }
5647
5648
        s3_conf = std::move(*conf);
5649
        s3_conf.prefix = object_store_info.prefix();
5650
    } else {
5651
        LOG(WARNING) << "unknown stage type " << stage_type;
5652
        return -1;
5653
    }
5654
5655
    std::shared_ptr<S3Accessor> s3_accessor;
5656
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
5657
    if (ret != 0) {
5658
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
5659
        return -1;
5660
    }
5661
5662
    *accessor = std::move(s3_accessor);
5663
#endif
5664
3
    return 0;
5665
5
}
5666
5667
11
int InstanceRecycler::recycle_stage() {
5668
11
    int64_t num_scanned = 0;
5669
11
    int64_t num_recycled = 0;
5670
11
    const std::string task_name = "recycle_stage";
5671
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5672
5673
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
5674
5675
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5676
11
    register_recycle_task(task_name, start_time);
5677
5678
11
    DORIS_CLOUD_DEFER {
5679
11
        unregister_recycle_task(task_name);
5680
11
        int64_t cost =
5681
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5682
11
        metrics_context.finish_report();
5683
11
        LOG_WARNING("recycle stage, cost={}s", cost)
5684
11
                .tag("instance_id", instance_id_)
5685
11
                .tag("num_scanned", num_scanned)
5686
11
                .tag("num_recycled", num_recycled);
5687
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
5678
11
    DORIS_CLOUD_DEFER {
5679
11
        unregister_recycle_task(task_name);
5680
11
        int64_t cost =
5681
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5682
11
        metrics_context.finish_report();
5683
11
        LOG_WARNING("recycle stage, cost={}s", cost)
5684
11
                .tag("instance_id", instance_id_)
5685
11
                .tag("num_scanned", num_scanned)
5686
11
                .tag("num_recycled", num_recycled);
5687
11
    };
5688
5689
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
5690
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
5691
11
    std::string key0 = recycle_stage_key(key_info0);
5692
11
    std::string key1 = recycle_stage_key(key_info1);
5693
5694
11
    std::vector<std::string_view> stage_keys;
5695
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
5696
11
                         this](std::string_view k, std::string_view v) -> int {
5697
1
        ++num_scanned;
5698
1
        RecycleStagePB recycle_stage;
5699
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
5700
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
5701
0
            return -1;
5702
0
        }
5703
5704
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
5705
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5706
0
            LOG(WARNING) << "invalid idx: " << idx;
5707
0
            return -1;
5708
0
        }
5709
5710
1
        std::shared_ptr<StorageVaultAccessor> accessor;
5711
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
5712
1
                [&] {
5713
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
5714
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5715
1
                    if (!s3_conf) {
5716
1
                        return -1;
5717
1
                    }
5718
5719
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
5720
1
                    std::shared_ptr<S3Accessor> s3_accessor;
5721
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
5722
1
                    if (ret != 0) {
5723
1
                        return -1;
5724
1
                    }
5725
5726
1
                    accessor = std::move(s3_accessor);
5727
1
                    return 0;
5728
1
                }(),
5729
1
                "recycle_stage:get_accessor", &accessor);
5730
5731
1
        if (ret != 0) {
5732
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
5733
0
            return ret;
5734
0
        }
5735
5736
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
5737
1
                .tag("instance_id", instance_id_)
5738
1
                .tag("stage_id", recycle_stage.stage().stage_id())
5739
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
5740
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
5741
1
                .tag("obj_info_id", idx)
5742
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
5743
1
        ret = accessor->delete_all();
5744
1
        if (ret != 0) {
5745
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
5746
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
5747
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
5748
0
                         << ", ret=" << ret;
5749
0
            return -1;
5750
0
        }
5751
1
        metrics_context.total_recycled_num = ++num_recycled;
5752
1
        metrics_context.report();
5753
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
5754
1
        stage_keys.push_back(k);
5755
1
        return 0;
5756
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5696
1
                         this](std::string_view k, std::string_view v) -> int {
5697
1
        ++num_scanned;
5698
1
        RecycleStagePB recycle_stage;
5699
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
5700
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
5701
0
            return -1;
5702
0
        }
5703
5704
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
5705
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5706
0
            LOG(WARNING) << "invalid idx: " << idx;
5707
0
            return -1;
5708
0
        }
5709
5710
1
        std::shared_ptr<StorageVaultAccessor> accessor;
5711
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
5712
1
                [&] {
5713
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
5714
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5715
1
                    if (!s3_conf) {
5716
1
                        return -1;
5717
1
                    }
5718
5719
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
5720
1
                    std::shared_ptr<S3Accessor> s3_accessor;
5721
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
5722
1
                    if (ret != 0) {
5723
1
                        return -1;
5724
1
                    }
5725
5726
1
                    accessor = std::move(s3_accessor);
5727
1
                    return 0;
5728
1
                }(),
5729
1
                "recycle_stage:get_accessor", &accessor);
5730
5731
1
        if (ret != 0) {
5732
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
5733
0
            return ret;
5734
0
        }
5735
5736
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
5737
1
                .tag("instance_id", instance_id_)
5738
1
                .tag("stage_id", recycle_stage.stage().stage_id())
5739
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
5740
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
5741
1
                .tag("obj_info_id", idx)
5742
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
5743
1
        ret = accessor->delete_all();
5744
1
        if (ret != 0) {
5745
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
5746
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
5747
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
5748
0
                         << ", ret=" << ret;
5749
0
            return -1;
5750
0
        }
5751
1
        metrics_context.total_recycled_num = ++num_recycled;
5752
1
        metrics_context.report();
5753
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
5754
1
        stage_keys.push_back(k);
5755
1
        return 0;
5756
1
    };
5757
5758
11
    auto loop_done = [&stage_keys, this]() -> int {
5759
1
        if (stage_keys.empty()) return 0;
5760
1
        DORIS_CLOUD_DEFER {
5761
1
            stage_keys.clear();
5762
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5760
1
        DORIS_CLOUD_DEFER {
5761
1
            stage_keys.clear();
5762
1
        };
5763
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
5764
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
5765
0
            return -1;
5766
0
        }
5767
1
        return 0;
5768
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
5758
1
    auto loop_done = [&stage_keys, this]() -> int {
5759
1
        if (stage_keys.empty()) return 0;
5760
1
        DORIS_CLOUD_DEFER {
5761
1
            stage_keys.clear();
5762
1
        };
5763
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
5764
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
5765
0
            return -1;
5766
0
        }
5767
1
        return 0;
5768
1
    };
5769
11
    if (config::enable_recycler_stats_metrics) {
5770
0
        scan_and_statistics_stage();
5771
0
    }
5772
    // recycle_func and loop_done for scan and recycle
5773
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
5774
11
}
5775
5776
10
int InstanceRecycler::recycle_expired_stage_objects() {
5777
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
5778
5779
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5780
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
5781
5782
10
    DORIS_CLOUD_DEFER {
5783
10
        int64_t cost =
5784
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5785
10
        metrics_context.finish_report();
5786
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
5787
10
                .tag("instance_id", instance_id_);
5788
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
5782
10
    DORIS_CLOUD_DEFER {
5783
10
        int64_t cost =
5784
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5785
10
        metrics_context.finish_report();
5786
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
5787
10
                .tag("instance_id", instance_id_);
5788
10
    };
5789
5790
10
    int ret = 0;
5791
5792
10
    if (config::enable_recycler_stats_metrics) {
5793
0
        scan_and_statistics_expired_stage_objects();
5794
0
    }
5795
5796
10
    for (const auto& stage : instance_info_.stages()) {
5797
0
        std::stringstream ss;
5798
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
5799
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
5800
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
5801
0
           << ", prefix=" << stage.obj_info().prefix();
5802
5803
0
        if (stopped()) {
5804
0
            break;
5805
0
        }
5806
0
        if (stage.type() == StagePB::EXTERNAL) {
5807
0
            continue;
5808
0
        }
5809
0
        int idx = stoi(stage.obj_info().id());
5810
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5811
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
5812
0
            continue;
5813
0
        }
5814
5815
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
5816
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5817
0
        if (!s3_conf) {
5818
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
5819
0
            continue;
5820
0
        }
5821
5822
0
        s3_conf->prefix = stage.obj_info().prefix();
5823
0
        std::shared_ptr<S3Accessor> accessor;
5824
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
5825
0
        if (ret1 != 0) {
5826
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
5827
0
            ret = -1;
5828
0
            continue;
5829
0
        }
5830
5831
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
5832
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
5833
0
            ret = -1;
5834
0
            continue;
5835
0
        }
5836
5837
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
5838
0
        int64_t expiration_time =
5839
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
5840
0
                config::internal_stage_objects_expire_time_second;
5841
0
        if (config::force_immediate_recycle) {
5842
0
            expiration_time = INT64_MAX;
5843
0
        }
5844
0
        ret1 = accessor->delete_all(expiration_time);
5845
0
        if (ret1 != 0) {
5846
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
5847
0
                         << ss.str();
5848
0
            ret = -1;
5849
0
            continue;
5850
0
        }
5851
0
        metrics_context.total_recycled_num++;
5852
0
        metrics_context.report();
5853
0
    }
5854
10
    return ret;
5855
10
}
5856
5857
160
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
5858
160
    std::lock_guard lock(recycle_tasks_mutex);
5859
160
    running_recycle_tasks[task_name] = start_time;
5860
160
}
5861
5862
160
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
5863
160
    std::lock_guard lock(recycle_tasks_mutex);
5864
160
    DCHECK(running_recycle_tasks[task_name] > 0);
5865
160
    running_recycle_tasks.erase(task_name);
5866
160
}
5867
5868
21
bool InstanceRecycler::check_recycle_tasks() {
5869
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
5870
21
    {
5871
21
        std::lock_guard lock(recycle_tasks_mutex);
5872
21
        tmp_running_recycle_tasks = running_recycle_tasks;
5873
21
    }
5874
5875
21
    bool found = false;
5876
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5877
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
5878
20
        int64_t cost = now - start_time;
5879
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
5880
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
5881
20
                    .tag("instance_id", instance_id_)
5882
20
                    .tag("task", task_name);
5883
20
            found = true;
5884
20
        }
5885
20
    }
5886
5887
21
    return found;
5888
21
}
5889
5890
// Scan and statistics indexes that need to be recycled
5891
0
int InstanceRecycler::scan_and_statistics_indexes() {
5892
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
5893
5894
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
5895
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
5896
0
    std::string index_key0;
5897
0
    std::string index_key1;
5898
0
    recycle_index_key(index_key_info0, &index_key0);
5899
0
    recycle_index_key(index_key_info1, &index_key1);
5900
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5901
5902
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
5903
0
        RecycleIndexPB index_pb;
5904
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
5905
0
            return 0;
5906
0
        }
5907
0
        int64_t current_time = ::time(nullptr);
5908
0
        if (current_time <
5909
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
5910
0
            return 0;
5911
0
        }
5912
        // decode index_id
5913
0
        auto k1 = k;
5914
0
        k1.remove_prefix(1);
5915
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5916
0
        decode_key(&k1, &out);
5917
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
5918
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
5919
0
        std::unique_ptr<Transaction> txn;
5920
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5921
0
        if (err != TxnErrorCode::TXN_OK) {
5922
0
            return 0;
5923
0
        }
5924
0
        std::string val;
5925
0
        err = txn->get(k, &val);
5926
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5927
0
            return 0;
5928
0
        }
5929
0
        if (err != TxnErrorCode::TXN_OK) {
5930
0
            return 0;
5931
0
        }
5932
0
        index_pb.Clear();
5933
0
        if (!index_pb.ParseFromString(val)) {
5934
0
            return 0;
5935
0
        }
5936
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
5937
0
            return 0;
5938
0
        }
5939
0
        metrics_context.total_need_recycle_num++;
5940
0
        return 0;
5941
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5942
5943
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
5944
0
    metrics_context.report(true);
5945
0
    segment_metrics_context_.report(true);
5946
0
    tablet_metrics_context_.report(true);
5947
0
    return ret;
5948
0
}
5949
5950
// Scan and statistics partitions that need to be recycled
5951
0
int InstanceRecycler::scan_and_statistics_partitions() {
5952
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
5953
5954
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
5955
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
5956
0
    std::string part_key0;
5957
0
    std::string part_key1;
5958
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5959
5960
0
    recycle_partition_key(part_key_info0, &part_key0);
5961
0
    recycle_partition_key(part_key_info1, &part_key1);
5962
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
5963
0
        RecyclePartitionPB part_pb;
5964
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
5965
0
            return 0;
5966
0
        }
5967
0
        int64_t current_time = ::time(nullptr);
5968
0
        if (current_time <
5969
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
5970
0
            return 0;
5971
0
        }
5972
        // decode partition_id
5973
0
        auto k1 = k;
5974
0
        k1.remove_prefix(1);
5975
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5976
0
        decode_key(&k1, &out);
5977
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
5978
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
5979
        // Change state to RECYCLING
5980
0
        std::unique_ptr<Transaction> txn;
5981
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5982
0
        if (err != TxnErrorCode::TXN_OK) {
5983
0
            return 0;
5984
0
        }
5985
0
        std::string val;
5986
0
        err = txn->get(k, &val);
5987
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5988
0
            return 0;
5989
0
        }
5990
0
        if (err != TxnErrorCode::TXN_OK) {
5991
0
            return 0;
5992
0
        }
5993
0
        part_pb.Clear();
5994
0
        if (!part_pb.ParseFromString(val)) {
5995
0
            return 0;
5996
0
        }
5997
        // Partitions with PREPARED state MUST have no data
5998
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
5999
0
        int ret = 0;
6000
0
        for (int64_t index_id : part_pb.index_id()) {
6001
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
6002
0
                                            partition_id, is_empty_tablet) != 0) {
6003
0
                ret = 0;
6004
0
            }
6005
0
        }
6006
0
        metrics_context.total_need_recycle_num++;
6007
0
        return ret;
6008
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6009
6010
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
6011
0
    metrics_context.report(true);
6012
0
    segment_metrics_context_.report(true);
6013
0
    tablet_metrics_context_.report(true);
6014
0
    return ret;
6015
0
}
6016
6017
// Scan and statistics rowsets that need to be recycled
6018
0
int InstanceRecycler::scan_and_statistics_rowsets() {
6019
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
6020
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
6021
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
6022
0
    std::string recyc_rs_key0;
6023
0
    std::string recyc_rs_key1;
6024
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
6025
0
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
6026
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6027
6028
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
6029
0
        RecycleRowsetPB rowset;
6030
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6031
0
            return 0;
6032
0
        }
6033
0
        int64_t current_time = ::time(nullptr);
6034
0
        if (current_time <
6035
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
6036
0
            return 0;
6037
0
        }
6038
0
        if (!rowset.has_type()) {
6039
0
            if (!rowset.has_resource_id()) [[unlikely]] {
6040
0
                return 0;
6041
0
            }
6042
0
            if (rowset.resource_id().empty()) [[unlikely]] {
6043
0
                return 0;
6044
0
            }
6045
0
            metrics_context.total_need_recycle_num++;
6046
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6047
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
6048
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6049
0
            return 0;
6050
0
        }
6051
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
6052
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
6053
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
6054
0
                return 0;
6055
0
            }
6056
0
        }
6057
0
        metrics_context.total_need_recycle_num++;
6058
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
6059
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
6060
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
6061
0
        return 0;
6062
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6063
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
6064
0
    metrics_context.report(true);
6065
0
    segment_metrics_context_.report(true);
6066
0
    return ret;
6067
0
}
6068
6069
// Scan and statistics tmp_rowsets that need to be recycled
6070
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
6071
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
6072
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
6073
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
6074
0
    std::string tmp_rs_key0;
6075
0
    std::string tmp_rs_key1;
6076
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
6077
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
6078
6079
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6080
6081
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
6082
0
        doris::RowsetMetaCloudPB rowset;
6083
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6084
0
            return 0;
6085
0
        }
6086
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
6087
0
        int64_t current_time = ::time(nullptr);
6088
0
        if (current_time < expiration) {
6089
0
            return 0;
6090
0
        }
6091
6092
0
        DCHECK_GT(rowset.txn_id(), 0)
6093
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
6094
0
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
6095
0
            return 0;
6096
0
        }
6097
6098
0
        if (!rowset.has_resource_id()) {
6099
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6100
0
                return 0;
6101
0
            }
6102
0
            return 0;
6103
0
        }
6104
6105
0
        metrics_context.total_need_recycle_num++;
6106
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
6107
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
6108
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
6109
0
        return 0;
6110
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6111
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
6112
0
    metrics_context.report(true);
6113
0
    segment_metrics_context_.report(true);
6114
0
    return ret;
6115
0
}
6116
6117
// Scan and statistics abort_timeout_txn that need to be recycled
6118
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
6119
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
6120
6121
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
6122
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6123
0
    std::string begin_txn_running_key;
6124
0
    std::string end_txn_running_key;
6125
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
6126
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
6127
6128
0
    int64_t current_time =
6129
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6130
6131
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
6132
0
                                               std::string_view k, std::string_view v) -> int {
6133
0
        std::unique_ptr<Transaction> txn;
6134
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6135
0
        if (err != TxnErrorCode::TXN_OK) {
6136
0
            return 0;
6137
0
        }
6138
0
        std::string_view k1 = k;
6139
0
        k1.remove_prefix(1);
6140
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6141
0
        if (decode_key(&k1, &out) != 0) {
6142
0
            return 0;
6143
0
        }
6144
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6145
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6146
        // Update txn_info
6147
0
        std::string txn_inf_key, txn_inf_val;
6148
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6149
0
        err = txn->get(txn_inf_key, &txn_inf_val);
6150
0
        if (err != TxnErrorCode::TXN_OK) {
6151
0
            return 0;
6152
0
        }
6153
0
        TxnInfoPB txn_info;
6154
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
6155
0
            return 0;
6156
0
        }
6157
6158
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
6159
0
            TxnRunningPB txn_running_pb;
6160
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6161
0
                return 0;
6162
0
            }
6163
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6164
0
                return 0;
6165
0
            }
6166
0
            metrics_context.total_need_recycle_num++;
6167
0
        }
6168
0
        return 0;
6169
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6170
6171
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
6172
0
    metrics_context.report(true);
6173
0
    return ret;
6174
0
}
6175
6176
// Scan and statistics expired_txn_label that need to be recycled
6177
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
6178
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
6179
6180
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
6181
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6182
0
    std::string begin_recycle_txn_key;
6183
0
    std::string end_recycle_txn_key;
6184
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
6185
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
6186
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6187
0
    int64_t current_time_ms =
6188
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6189
6190
    // for calculate the total num or bytes of recyled objects
6191
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
6192
0
        RecycleTxnPB recycle_txn_pb;
6193
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6194
0
            return 0;
6195
0
        }
6196
0
        if ((config::force_immediate_recycle) ||
6197
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6198
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6199
0
             current_time_ms)) {
6200
0
            metrics_context.total_need_recycle_num++;
6201
0
        }
6202
0
        return 0;
6203
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6204
6205
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
6206
0
    metrics_context.report(true);
6207
0
    return ret;
6208
0
}
6209
6210
// Scan and statistics copy_jobs that need to be recycled
6211
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
6212
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
6213
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6214
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6215
0
    std::string key0;
6216
0
    std::string key1;
6217
0
    copy_job_key(key_info0, &key0);
6218
0
    copy_job_key(key_info1, &key1);
6219
6220
    // for calculate the total num or bytes of recyled objects
6221
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
6222
0
        CopyJobPB copy_job;
6223
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6224
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6225
0
            return 0;
6226
0
        }
6227
6228
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6229
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
6230
0
                int64_t current_time =
6231
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6232
0
                if (copy_job.finish_time_ms() > 0) {
6233
0
                    if (!config::force_immediate_recycle &&
6234
0
                        current_time < copy_job.finish_time_ms() +
6235
0
                                               config::copy_job_max_retention_second * 1000) {
6236
0
                        return 0;
6237
0
                    }
6238
0
                } else {
6239
0
                    if (!config::force_immediate_recycle &&
6240
0
                        current_time < copy_job.start_time_ms() +
6241
0
                                               config::copy_job_max_retention_second * 1000) {
6242
0
                        return 0;
6243
0
                    }
6244
0
                }
6245
0
            }
6246
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6247
0
            int64_t current_time =
6248
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6249
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6250
0
                return 0;
6251
0
            }
6252
0
        }
6253
0
        metrics_context.total_need_recycle_num++;
6254
0
        return 0;
6255
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6256
6257
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
6258
0
    metrics_context.report(true);
6259
0
    return ret;
6260
0
}
6261
6262
// Scan and statistics stage that need to be recycled
6263
0
int InstanceRecycler::scan_and_statistics_stage() {
6264
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
6265
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6266
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6267
0
    std::string key0 = recycle_stage_key(key_info0);
6268
0
    std::string key1 = recycle_stage_key(key_info1);
6269
6270
    // for calculate the total num or bytes of recyled objects
6271
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
6272
0
                                                        std::string_view v) -> int {
6273
0
        RecycleStagePB recycle_stage;
6274
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6275
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6276
0
            return 0;
6277
0
        }
6278
6279
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
6280
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6281
0
            LOG(WARNING) << "invalid idx: " << idx;
6282
0
            return 0;
6283
0
        }
6284
6285
0
        std::shared_ptr<StorageVaultAccessor> accessor;
6286
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6287
0
                [&] {
6288
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6289
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6290
0
                    if (!s3_conf) {
6291
0
                        return 0;
6292
0
                    }
6293
6294
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6295
0
                    std::shared_ptr<S3Accessor> s3_accessor;
6296
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6297
0
                    if (ret != 0) {
6298
0
                        return 0;
6299
0
                    }
6300
6301
0
                    accessor = std::move(s3_accessor);
6302
0
                    return 0;
6303
0
                }(),
6304
0
                "recycle_stage:get_accessor", &accessor);
6305
6306
0
        if (ret != 0) {
6307
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6308
0
            return 0;
6309
0
        }
6310
6311
0
        metrics_context.total_need_recycle_num++;
6312
0
        return 0;
6313
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6314
6315
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
6316
0
    metrics_context.report(true);
6317
0
    return ret;
6318
0
}
6319
6320
// Scan and statistics expired_stage_objects that need to be recycled
6321
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
6322
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6323
6324
    // for calculate the total num or bytes of recyled objects
6325
0
    auto scan_and_statistics = [&metrics_context, this]() {
6326
0
        for (const auto& stage : instance_info_.stages()) {
6327
0
            if (stopped()) {
6328
0
                break;
6329
0
            }
6330
0
            if (stage.type() == StagePB::EXTERNAL) {
6331
0
                continue;
6332
0
            }
6333
0
            int idx = stoi(stage.obj_info().id());
6334
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
6335
0
                continue;
6336
0
            }
6337
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
6338
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6339
0
            if (!s3_conf) {
6340
0
                continue;
6341
0
            }
6342
0
            s3_conf->prefix = stage.obj_info().prefix();
6343
0
            std::shared_ptr<S3Accessor> accessor;
6344
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
6345
0
            if (ret1 != 0) {
6346
0
                continue;
6347
0
            }
6348
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6349
0
                continue;
6350
0
            }
6351
0
            metrics_context.total_need_recycle_num++;
6352
0
        }
6353
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
6354
6355
0
    scan_and_statistics();
6356
0
    metrics_context.report(true);
6357
0
    return 0;
6358
0
}
6359
6360
// Scan and statistics versions that need to be recycled
6361
0
int InstanceRecycler::scan_and_statistics_versions() {
6362
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
6363
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
6364
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
6365
6366
0
    int64_t last_scanned_table_id = 0;
6367
0
    bool is_recycled = false; // Is last scanned kv recycled
6368
    // for calculate the total num or bytes of recyled objects
6369
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
6370
0
                                       std::string_view k, std::string_view) {
6371
0
        auto k1 = k;
6372
0
        k1.remove_prefix(1);
6373
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
6374
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6375
0
        decode_key(&k1, &out);
6376
0
        DCHECK_EQ(out.size(), 6) << k;
6377
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
6378
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
6379
0
            metrics_context.total_need_recycle_num +=
6380
0
                    is_recycled; // Version kv of this table has been recycled
6381
0
            return 0;
6382
0
        }
6383
0
        last_scanned_table_id = table_id;
6384
0
        is_recycled = false;
6385
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
6386
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
6387
0
        std::unique_ptr<Transaction> txn;
6388
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6389
0
        if (err != TxnErrorCode::TXN_OK) {
6390
0
            return 0;
6391
0
        }
6392
0
        std::unique_ptr<RangeGetIterator> iter;
6393
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
6394
0
        if (err != TxnErrorCode::TXN_OK) {
6395
0
            return 0;
6396
0
        }
6397
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
6398
0
            return 0;
6399
0
        }
6400
0
        metrics_context.total_need_recycle_num++;
6401
0
        is_recycled = true;
6402
0
        return 0;
6403
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6404
6405
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
6406
0
    metrics_context.report(true);
6407
0
    return ret;
6408
0
}
6409
6410
// Scan and statistics restore jobs that need to be recycled
6411
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
6412
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
6413
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
6414
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
6415
0
    std::string restore_job_key0;
6416
0
    std::string restore_job_key1;
6417
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
6418
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
6419
6420
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6421
6422
    // for calculate the total num or bytes of recyled objects
6423
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
6424
0
        RestoreJobCloudPB restore_job_pb;
6425
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
6426
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
6427
0
            return 0;
6428
0
        }
6429
0
        int64_t expiration =
6430
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
6431
0
        int64_t current_time = ::time(nullptr);
6432
0
        if (current_time < expiration) { // not expired
6433
0
            return 0;
6434
0
        }
6435
0
        metrics_context.total_need_recycle_num++;
6436
0
        if(restore_job_pb.need_recycle_data()) {
6437
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
6438
0
        }
6439
0
        return 0;
6440
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6441
6442
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
6443
0
    metrics_context.report(true);
6444
0
    return ret;
6445
0
}
6446
6447
} // namespace doris::cloud