Coverage Report

Created: 2026-01-09 15:09

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <initializer_list>
36
#include <memory>
37
#include <numeric>
38
#include <string>
39
#include <string_view>
40
#include <utility>
41
42
#include "common/defer.h"
43
#include "common/stopwatch.h"
44
#include "meta-service/meta_service.h"
45
#include "meta-service/meta_service_helper.h"
46
#include "meta-service/meta_service_schema.h"
47
#include "meta-store/blob_message.h"
48
#include "meta-store/meta_reader.h"
49
#include "meta-store/txn_kv.h"
50
#include "meta-store/txn_kv_error.h"
51
#include "meta-store/versioned_value.h"
52
#include "recycler/checker.h"
53
#ifdef ENABLE_HDFS_STORAGE_VAULT
54
#include "recycler/hdfs_accessor.h"
55
#endif
56
#include "recycler/s3_accessor.h"
57
#include "recycler/storage_vault_accessor.h"
58
#ifdef UNIT_TEST
59
#include "../test/mock_accessor.h"
60
#endif
61
#include "common/bvars.h"
62
#include "common/config.h"
63
#include "common/encryption_util.h"
64
#include "common/logging.h"
65
#include "common/simple_thread_pool.h"
66
#include "common/util.h"
67
#include "cpp/sync_point.h"
68
#include "meta-store/codec.h"
69
#include "meta-store/document_message.h"
70
#include "meta-store/keys.h"
71
#include "recycler/recycler_service.h"
72
#include "recycler/sync_executor.h"
73
#include "recycler/util.h"
74
75
namespace doris::cloud {
76
77
using namespace std::chrono;
78
79
// return 0 for success get a key, 1 for key not found, negative for error
80
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
81
0
    std::unique_ptr<Transaction> txn;
82
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
83
0
    if (err != TxnErrorCode::TXN_OK) {
84
0
        return -1;
85
0
    }
86
0
    switch (txn->get(key, &val, true)) {
87
0
    case TxnErrorCode::TXN_OK:
88
0
        return 0;
89
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
90
0
        return 1;
91
0
    default:
92
0
        return -1;
93
0
    };
94
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
95
96
// 0 for success, negative for error
97
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
98
256
                   std::unique_ptr<RangeGetIterator>& it) {
99
256
    std::unique_ptr<Transaction> txn;
100
256
    TxnErrorCode err = txn_kv->create_txn(&txn);
101
256
    if (err != TxnErrorCode::TXN_OK) {
102
0
        return -1;
103
0
    }
104
256
    switch (txn->get(begin, end, &it, true)) {
105
256
    case TxnErrorCode::TXN_OK:
106
256
        return 0;
107
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
108
0
        return 1;
109
0
    default:
110
0
        return -1;
111
256
    };
112
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
98
237
                   std::unique_ptr<RangeGetIterator>& it) {
99
237
    std::unique_ptr<Transaction> txn;
100
237
    TxnErrorCode err = txn_kv->create_txn(&txn);
101
237
    if (err != TxnErrorCode::TXN_OK) {
102
0
        return -1;
103
0
    }
104
237
    switch (txn->get(begin, end, &it, true)) {
105
237
    case TxnErrorCode::TXN_OK:
106
237
        return 0;
107
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
108
0
        return 1;
109
0
    default:
110
0
        return -1;
111
237
    };
112
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
98
19
                   std::unique_ptr<RangeGetIterator>& it) {
99
19
    std::unique_ptr<Transaction> txn;
100
19
    TxnErrorCode err = txn_kv->create_txn(&txn);
101
19
    if (err != TxnErrorCode::TXN_OK) {
102
0
        return -1;
103
0
    }
104
19
    switch (txn->get(begin, end, &it, true)) {
105
19
    case TxnErrorCode::TXN_OK:
106
19
        return 0;
107
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
108
0
        return 1;
109
0
    default:
110
0
        return -1;
111
19
    };
112
0
}
113
114
// return 0 for success otherwise error
115
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
116
6
    std::unique_ptr<Transaction> txn;
117
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
118
6
    if (err != TxnErrorCode::TXN_OK) {
119
0
        return -1;
120
0
    }
121
10
    for (auto k : keys) {
122
10
        txn->remove(k);
123
10
    }
124
6
    switch (txn->commit()) {
125
6
    case TxnErrorCode::TXN_OK:
126
6
        return 0;
127
0
    case TxnErrorCode::TXN_CONFLICT:
128
0
        return -1;
129
0
    default:
130
0
        return -1;
131
6
    }
132
6
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
115
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
116
5
    std::unique_ptr<Transaction> txn;
117
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
118
5
    if (err != TxnErrorCode::TXN_OK) {
119
0
        return -1;
120
0
    }
121
9
    for (auto k : keys) {
122
9
        txn->remove(k);
123
9
    }
124
5
    switch (txn->commit()) {
125
5
    case TxnErrorCode::TXN_OK:
126
5
        return 0;
127
0
    case TxnErrorCode::TXN_CONFLICT:
128
0
        return -1;
129
0
    default:
130
0
        return -1;
131
5
    }
132
5
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
115
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
116
1
    std::unique_ptr<Transaction> txn;
117
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
118
1
    if (err != TxnErrorCode::TXN_OK) {
119
0
        return -1;
120
0
    }
121
1
    for (auto k : keys) {
122
1
        txn->remove(k);
123
1
    }
124
1
    switch (txn->commit()) {
125
1
    case TxnErrorCode::TXN_OK:
126
1
        return 0;
127
0
    case TxnErrorCode::TXN_CONFLICT:
128
0
        return -1;
129
0
    default:
130
0
        return -1;
131
1
    }
132
1
}
133
134
// return 0 for success otherwise error
135
55
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
136
55
    std::unique_ptr<Transaction> txn;
137
55
    TxnErrorCode err = txn_kv->create_txn(&txn);
138
55
    if (err != TxnErrorCode::TXN_OK) {
139
0
        return -1;
140
0
    }
141
109k
    for (auto& k : keys) {
142
109k
        txn->remove(k);
143
109k
    }
144
55
    switch (txn->commit()) {
145
55
    case TxnErrorCode::TXN_OK:
146
55
        return 0;
147
0
    case TxnErrorCode::TXN_CONFLICT:
148
0
        return -1;
149
0
    default:
150
0
        return -1;
151
55
    }
152
55
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
135
49
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
136
49
    std::unique_ptr<Transaction> txn;
137
49
    TxnErrorCode err = txn_kv->create_txn(&txn);
138
49
    if (err != TxnErrorCode::TXN_OK) {
139
0
        return -1;
140
0
    }
141
105k
    for (auto& k : keys) {
142
105k
        txn->remove(k);
143
105k
    }
144
49
    switch (txn->commit()) {
145
49
    case TxnErrorCode::TXN_OK:
146
49
        return 0;
147
0
    case TxnErrorCode::TXN_CONFLICT:
148
0
        return -1;
149
0
    default:
150
0
        return -1;
151
49
    }
152
49
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
135
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
136
6
    std::unique_ptr<Transaction> txn;
137
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
138
6
    if (err != TxnErrorCode::TXN_OK) {
139
0
        return -1;
140
0
    }
141
4.00k
    for (auto& k : keys) {
142
4.00k
        txn->remove(k);
143
4.00k
    }
144
6
    switch (txn->commit()) {
145
6
    case TxnErrorCode::TXN_OK:
146
6
        return 0;
147
0
    case TxnErrorCode::TXN_CONFLICT:
148
0
        return -1;
149
0
    default:
150
0
        return -1;
151
6
    }
152
6
}
153
154
// return 0 for success otherwise error
155
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
156
106k
                                       std::string_view end) {
157
106k
    std::unique_ptr<Transaction> txn;
158
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
106k
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
106k
    txn->remove(begin, end);
163
106k
    switch (txn->commit()) {
164
106k
    case TxnErrorCode::TXN_OK:
165
106k
        return 0;
166
0
    case TxnErrorCode::TXN_CONFLICT:
167
0
        return -1;
168
0
    default:
169
0
        return -1;
170
106k
    }
171
106k
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
156
106k
                                       std::string_view end) {
157
106k
    std::unique_ptr<Transaction> txn;
158
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
106k
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
106k
    txn->remove(begin, end);
163
106k
    switch (txn->commit()) {
164
106k
    case TxnErrorCode::TXN_OK:
165
106k
        return 0;
166
0
    case TxnErrorCode::TXN_CONFLICT:
167
0
        return -1;
168
0
    default:
169
0
        return -1;
170
106k
    }
171
106k
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
172
173
void scan_restore_job_rowset(
174
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
175
        std::string& msg,
176
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
177
178
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
179
                                      int64_t num_scanned, int64_t num_recycled,
180
52
                                      int64_t start_time) {
181
52
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
182
0
        int64_t cost =
183
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
184
0
        if (cost > config::recycle_task_threshold_seconds) {
185
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
186
0
                    .tag("instance_id", instance_id)
187
0
                    .tag("task", task_name)
188
0
                    .tag("num_scanned", num_scanned)
189
0
                    .tag("num_recycled", num_recycled);
190
0
        }
191
0
    }
192
52
    return;
193
52
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
180
50
                                      int64_t start_time) {
181
50
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
182
0
        int64_t cost =
183
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
184
0
        if (cost > config::recycle_task_threshold_seconds) {
185
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
186
0
                    .tag("instance_id", instance_id)
187
0
                    .tag("task", task_name)
188
0
                    .tag("num_scanned", num_scanned)
189
0
                    .tag("num_recycled", num_recycled);
190
0
        }
191
0
    }
192
50
    return;
193
50
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
180
2
                                      int64_t start_time) {
181
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
182
0
        int64_t cost =
183
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
184
0
        if (cost > config::recycle_task_threshold_seconds) {
185
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
186
0
                    .tag("instance_id", instance_id)
187
0
                    .tag("task", task_name)
188
0
                    .tag("num_scanned", num_scanned)
189
0
                    .tag("num_recycled", num_recycled);
190
0
        }
191
0
    }
192
2
    return;
193
2
}
194
195
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
196
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
197
198
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
199
4
                                                               "s3_producer_pool");
200
4
    s3_producer_pool->start();
201
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
202
4
                                                                  "recycle_tablet_pool");
203
4
    recycle_tablet_pool->start();
204
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
205
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
206
4
    group_recycle_function_pool->start();
207
4
    _thread_pool_group =
208
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
209
4
                                    std::move(group_recycle_function_pool));
210
211
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
212
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
213
4
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
214
4
}
215
216
4
Recycler::~Recycler() {
217
4
    if (!stopped()) {
218
0
        stop();
219
0
    }
220
4
}
221
222
4
void Recycler::instance_scanner_callback() {
223
    // sleep 60 seconds before scheduling for the launch procedure to complete:
224
    // some bad hdfs connection may cause some log to stdout stderr
225
    // which may pollute .out file and affect the script to check success
226
4
    std::this_thread::sleep_for(
227
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
228
8
    while (!stopped()) {
229
4
        std::vector<InstanceInfoPB> instances;
230
4
        get_all_instances(txn_kv_.get(), instances);
231
        // TODO(plat1ko): delete job recycle kv of non-existent instances
232
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
233
4
            std::stringstream ss;
234
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
235
4
            return ss.str();
236
4
        }();
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
232
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
233
4
            std::stringstream ss;
234
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
235
4
            return ss.str();
236
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
237
4
        if (!instances.empty()) {
238
            // enqueue instances
239
3
            std::lock_guard lock(mtx_);
240
30
            for (auto& instance : instances) {
241
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
242
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
243
                // skip instance already in pending queue
244
30
                if (success) {
245
30
                    pending_instance_queue_.push_back(std::move(instance));
246
30
                }
247
30
            }
248
3
            pending_instance_cond_.notify_all();
249
3
        }
250
4
        {
251
4
            std::unique_lock lock(mtx_);
252
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
253
8
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
253
8
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
254
4
        }
255
4
    }
256
4
}
257
258
8
void Recycler::recycle_callback() {
259
38
    while (!stopped()) {
260
38
        InstanceInfoPB instance;
261
38
        {
262
38
            std::unique_lock lock(mtx_);
263
38
            pending_instance_cond_.wait(
264
52
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
264
52
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
265
38
            if (stopped()) {
266
8
                return;
267
8
            }
268
30
            instance = std::move(pending_instance_queue_.front());
269
30
            pending_instance_queue_.pop_front();
270
30
            pending_instance_set_.erase(instance.instance_id());
271
30
        }
272
0
        auto& instance_id = instance.instance_id();
273
30
        {
274
30
            std::lock_guard lock(mtx_);
275
            // skip instance in recycling
276
30
            if (recycling_instance_map_.count(instance_id)) continue;
277
30
        }
278
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
279
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
280
281
30
        if (int r = instance_recycler->init(); r != 0) {
282
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
283
0
                         << " ret=" << r;
284
0
            continue;
285
0
        }
286
30
        std::string recycle_job_key;
287
30
        job_recycle_key({instance_id}, &recycle_job_key);
288
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
289
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
290
30
        if (ret != 0) { // Prepare failed
291
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
292
20
                         << " ret=" << ret;
293
20
            continue;
294
20
        } else {
295
10
            std::lock_guard lock(mtx_);
296
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
297
10
        }
298
10
        if (stopped()) return;
299
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
300
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
301
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
302
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
303
10
        ret = instance_recycler->do_recycle();
304
        // If instance recycler has been aborted, don't finish this job
305
306
10
        if (!instance_recycler->stopped()) {
307
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
308
10
                                        ret == 0, ctime_ms);
309
10
        }
310
10
        if (instance_recycler->stopped() || ret != 0) {
311
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
312
0
        }
313
10
        {
314
10
            std::lock_guard lock(mtx_);
315
10
            recycling_instance_map_.erase(instance_id);
316
10
        }
317
318
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
319
10
        auto elpased_ms = now - ctime_ms;
320
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
321
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
322
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
323
10
                                             now + config::recycle_interval_seconds * 1000);
324
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
325
10
        LOG(INFO) << "recycle instance done, "
326
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
327
10
                  << " now: " << now;
328
329
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
330
331
10
        LOG_WARNING("finish recycle instance")
332
10
                .tag("instance_id", instance_id)
333
10
                .tag("cost_ms", elpased_ms);
334
10
    }
335
8
}
336
337
4
void Recycler::lease_recycle_jobs() {
338
54
    while (!stopped()) {
339
50
        std::vector<std::string> instances;
340
50
        instances.reserve(recycling_instance_map_.size());
341
50
        {
342
50
            std::lock_guard lock(mtx_);
343
50
            for (auto& [id, _] : recycling_instance_map_) {
344
30
                instances.push_back(id);
345
30
            }
346
50
        }
347
50
        for (auto& i : instances) {
348
30
            std::string recycle_job_key;
349
30
            job_recycle_key({i}, &recycle_job_key);
350
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
351
30
            if (ret == 1) {
352
0
                std::lock_guard lock(mtx_);
353
0
                if (auto it = recycling_instance_map_.find(i);
354
0
                    it != recycling_instance_map_.end()) {
355
0
                    it->second->stop();
356
0
                }
357
0
            }
358
30
        }
359
50
        {
360
50
            std::unique_lock lock(mtx_);
361
50
            notifier_.wait_for(lock,
362
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
363
100
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
363
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
364
50
        }
365
50
    }
366
4
}
367
368
4
void Recycler::check_recycle_tasks() {
369
7
    while (!stopped()) {
370
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
371
3
        {
372
3
            std::lock_guard lock(mtx_);
373
3
            recycling_instance_map = recycling_instance_map_;
374
3
        }
375
3
        for (auto& entry : recycling_instance_map) {
376
0
            entry.second->check_recycle_tasks();
377
0
        }
378
379
3
        std::unique_lock lock(mtx_);
380
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
381
6
                           [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
381
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
382
3
    }
383
4
}
384
385
4
int Recycler::start(brpc::Server* server) {
386
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
387
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
388
4
    S3Environment::getInstance();
389
390
4
    if (config::enable_checker) {
391
0
        checker_ = std::make_unique<Checker>(txn_kv_);
392
0
        int ret = checker_->start();
393
0
        std::string msg;
394
0
        if (ret != 0) {
395
0
            msg = "failed to start checker";
396
0
            LOG(ERROR) << msg;
397
0
            std::cerr << msg << std::endl;
398
0
            return ret;
399
0
        }
400
0
        msg = "checker started";
401
0
        LOG(INFO) << msg;
402
0
        std::cout << msg << std::endl;
403
0
    }
404
405
4
    if (server) {
406
        // Add service
407
1
        auto recycler_service =
408
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
409
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
410
1
    }
411
412
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
412
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
413
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
414
8
        workers_.emplace_back([this] { recycle_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
414
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
415
8
    }
416
417
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
418
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
419
420
4
    if (config::enable_snapshot_data_migrator) {
421
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
422
0
        int ret = snapshot_data_migrator_->start();
423
0
        if (ret != 0) {
424
0
            LOG(ERROR) << "failed to start snapshot data migrator";
425
0
            return ret;
426
0
        }
427
0
        LOG(INFO) << "snapshot data migrator started";
428
0
    }
429
430
4
    if (config::enable_snapshot_chain_compactor) {
431
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
432
0
        int ret = snapshot_chain_compactor_->start();
433
0
        if (ret != 0) {
434
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
435
0
            return ret;
436
0
        }
437
0
        LOG(INFO) << "snapshot chain compactor started";
438
0
    }
439
440
4
    return 0;
441
4
}
442
443
4
void Recycler::stop() {
444
4
    stopped_ = true;
445
4
    notifier_.notify_all();
446
4
    pending_instance_cond_.notify_all();
447
4
    {
448
4
        std::lock_guard lock(mtx_);
449
4
        for (auto& [_, recycler] : recycling_instance_map_) {
450
0
            recycler->stop();
451
0
        }
452
4
    }
453
20
    for (auto& w : workers_) {
454
20
        if (w.joinable()) w.join();
455
20
    }
456
4
    if (checker_) {
457
0
        checker_->stop();
458
0
    }
459
4
    if (snapshot_data_migrator_) {
460
0
        snapshot_data_migrator_->stop();
461
0
    }
462
4
    if (snapshot_chain_compactor_) {
463
0
        snapshot_chain_compactor_->stop();
464
0
    }
465
4
}
466
467
class InstanceRecycler::InvertedIndexIdCache {
468
public:
469
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
470
106
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
471
472
    // Return 0 if success, 1 if schema kv not found, negative for error
473
    // For the same index_id, schema_version, res, since `get` is not completely atomic
474
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
475
    // resulting in repeated addition and inaccuracy.
476
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
477
    // repeated addition does not affect correctness.
478
28.0k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
479
28.0k
        {
480
28.0k
            std::lock_guard lock(mtx_);
481
28.0k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
482
4.57k
                return 0;
483
4.57k
            }
484
23.4k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
485
23.4k
                it != inverted_index_id_map_.end()) {
486
17.8k
                res = it->second;
487
17.8k
                return 0;
488
17.8k
            }
489
23.4k
        }
490
        // Get schema from kv
491
        // TODO(plat1ko): Single flight
492
5.53k
        std::unique_ptr<Transaction> txn;
493
5.53k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
494
5.53k
        if (err != TxnErrorCode::TXN_OK) {
495
0
            LOG(WARNING) << "failed to create txn, err=" << err;
496
0
            return -1;
497
0
        }
498
5.53k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
499
5.53k
        ValueBuf val_buf;
500
5.53k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
501
5.53k
        if (err != TxnErrorCode::TXN_OK) {
502
500
            LOG(WARNING) << "failed to get schema, err=" << err;
503
500
            return static_cast<int>(err);
504
500
        }
505
5.03k
        doris::TabletSchemaCloudPB schema;
506
5.03k
        if (!parse_schema_value(val_buf, &schema)) {
507
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
508
0
            return -1;
509
0
        }
510
5.03k
        if (schema.index_size() > 0) {
511
4.01k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
512
4.01k
            if (schema.has_inverted_index_storage_format()) {
513
4.00k
                index_format = schema.inverted_index_storage_format();
514
4.00k
            }
515
4.01k
            res.first = index_format;
516
4.01k
            res.second.reserve(schema.index_size());
517
10.0k
            for (auto& i : schema.index()) {
518
10.0k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
519
10.0k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
520
10.0k
                }
521
10.0k
            }
522
4.01k
        }
523
5.03k
        insert(index_id, schema_version, res);
524
5.03k
        return 0;
525
5.03k
    }
526
527
    // Empty `ids` means this schema has no inverted index
528
5.03k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
529
5.03k
        if (index_info.second.empty()) {
530
1.02k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
531
1.02k
            std::lock_guard lock(mtx_);
532
1.02k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
533
4.01k
        } else {
534
4.01k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
535
4.01k
            std::lock_guard lock(mtx_);
536
4.01k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
537
4.01k
        }
538
5.03k
    }
539
540
private:
541
    std::string instance_id_;
542
    std::shared_ptr<TxnKv> txn_kv_;
543
544
    std::mutex mtx_;
545
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
546
    struct HashOfKey {
547
56.4k
        size_t operator()(const Key& key) const {
548
56.4k
            size_t seed = 0;
549
56.4k
            seed = std::hash<int64_t> {}(key.first);
550
56.4k
            seed = std::hash<int32_t> {}(key.second);
551
56.4k
            return seed;
552
56.4k
        }
553
    };
554
    // <index_id, schema_version> -> inverted_index_ids
555
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
556
    // Store <index_id, schema_version> of schema which doesn't have inverted index
557
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
558
};
559
560
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
561
                                   RecyclerThreadPoolGroup thread_pool_group,
562
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
563
        : txn_kv_(std::move(txn_kv)),
564
          instance_id_(instance.instance_id()),
565
          instance_info_(instance),
566
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
567
          _thread_pool_group(std::move(thread_pool_group)),
568
106
          txn_lazy_committer_(std::move(txn_lazy_committer)) {
569
106
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
570
571
    // Since the recycler's resource manager could not be notified when instance info changes,
572
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
573
106
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
574
106
};
575
576
106
InstanceRecycler::~InstanceRecycler() = default;
577
578
90
int InstanceRecycler::init_obj_store_accessors() {
579
90
    for (const auto& obj_info : instance_info_.obj_info()) {
580
59
#ifdef UNIT_TEST
581
59
        auto accessor = std::make_shared<MockAccessor>();
582
#else
583
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
584
        if (!s3_conf) {
585
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
586
            return -1;
587
        }
588
589
        std::shared_ptr<S3Accessor> accessor;
590
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
591
        if (ret != 0) {
592
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
593
                         << " resource_id=" << obj_info.id();
594
            return ret;
595
        }
596
#endif
597
59
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
598
59
    }
599
600
90
    return 0;
601
90
}
602
603
90
int InstanceRecycler::init_storage_vault_accessors() {
604
90
    if (instance_info_.resource_ids().empty()) {
605
83
        return 0;
606
83
    }
607
608
7
    FullRangeGetOptions opts(txn_kv_);
609
7
    opts.prefetch = true;
610
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
611
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
612
613
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
614
18
        auto [k, v] = *kv;
615
18
        StorageVaultPB vault;
616
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
617
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
618
0
            return -1;
619
0
        }
620
18
        std::string recycler_storage_vault_white_list = accumulate(
621
18
                config::recycler_storage_vault_white_list.begin(),
622
18
                config::recycler_storage_vault_white_list.end(), std::string(),
623
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
623
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
624
18
        LOG_INFO("config::recycler_storage_vault_white_list")
625
18
                .tag("", recycler_storage_vault_white_list);
626
18
        if (!config::recycler_storage_vault_white_list.empty()) {
627
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
628
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
629
8
                it == config::recycler_storage_vault_white_list.end()) {
630
2
                LOG_WARNING(
631
2
                        "failed to init accessor for vault because this vault is not in "
632
2
                        "config::recycler_storage_vault_white_list. ")
633
2
                        .tag(" vault name:", vault.name())
634
2
                        .tag(" config::recycler_storage_vault_white_list:",
635
2
                             recycler_storage_vault_white_list);
636
2
                continue;
637
2
            }
638
8
        }
639
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
640
16
                                 &accessor_map_, &vault);
641
16
        if (vault.has_hdfs_info()) {
642
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
643
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
644
9
            int ret = accessor->init();
645
9
            if (ret != 0) {
646
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
647
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
648
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
649
4
                continue;
650
4
            }
651
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
652
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
653
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
654
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
655
#else
656
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
657
                       << "but HDFS storage vaults were detected";
658
#endif
659
7
        } else if (vault.has_obj_info()) {
660
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
661
7
            if (!s3_conf) {
662
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
663
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
664
1
                continue;
665
1
            }
666
667
6
            std::shared_ptr<S3Accessor> accessor;
668
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
669
6
            if (ret != 0) {
670
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
671
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
672
0
                             << " ret=" << ret
673
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
674
0
                continue;
675
0
            }
676
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
677
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
678
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
679
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
680
6
        }
681
16
    }
682
683
7
    if (!it->is_valid()) {
684
0
        LOG_WARNING("failed to get storage vault kv");
685
0
        return -1;
686
0
    }
687
688
7
    if (accessor_map_.empty()) {
689
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
690
1
        return -2;
691
1
    }
692
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
693
6
             instance_id_);
694
695
6
    return 0;
696
7
}
697
698
90
int InstanceRecycler::init() {
699
90
    int ret = init_obj_store_accessors();
700
90
    if (ret != 0) {
701
0
        return ret;
702
0
    }
703
704
90
    return init_storage_vault_accessors();
705
90
}
706
707
template <typename... Func>
708
110
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
110
    return [funcs...]() {
710
110
        return [](std::initializer_list<int> ret_vals) {
711
110
            int i = 0;
712
130
            for (int ret : ret_vals) {
713
130
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
130
            }
717
110
            return i;
718
110
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
20
            for (int ret : ret_vals) {
713
20
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
20
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
20
            for (int ret : ret_vals) {
713
20
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
20
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
0
                    i = ret;
715
0
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
719
110
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
720
110
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
708
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
709
10
    return [funcs...]() {
710
10
        return [](std::initializer_list<int> ret_vals) {
711
10
            int i = 0;
712
10
            for (int ret : ret_vals) {
713
10
                if (ret != 0) {
714
10
                    i = ret;
715
10
                }
716
10
            }
717
10
            return i;
718
10
        }({funcs()...});
719
10
    };
720
10
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
721
722
10
int InstanceRecycler::do_recycle() {
723
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
724
10
    tablet_metrics_context_.reset();
725
10
    segment_metrics_context_.reset();
726
10
    DORIS_CLOUD_DEFER {
727
10
        tablet_metrics_context_.finish_report();
728
10
        segment_metrics_context_.finish_report();
729
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
726
10
    DORIS_CLOUD_DEFER {
727
10
        tablet_metrics_context_.finish_report();
728
10
        segment_metrics_context_.finish_report();
729
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
730
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
731
0
        int res = recycle_cluster_snapshots();
732
0
        if (res != 0) {
733
0
            return -1;
734
0
        }
735
0
        return recycle_deleted_instance();
736
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
737
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
738
10
                                        fmt::format("instance id {}", instance_id_),
739
110
                                        [](int r) { return r != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
739
110
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
740
10
        sync_executor
741
10
                .add(task_wrapper(
742
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
742
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
743
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
743
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
744
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
745
                                   // becase they may both recycle the same set of tablets
746
                        // recycle dropped table or idexes(mv, rollup)
747
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
747
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
748
                        // recycle dropped partitions
749
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
749
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
750
10
                .add(task_wrapper(
751
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
751
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
752
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
752
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
753
10
                .add(task_wrapper(
754
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
754
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
755
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
755
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
756
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
756
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
757
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
757
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
758
10
                .add(task_wrapper(
759
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
759
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
760
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
760
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
761
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
761
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
762
10
        bool finished = true;
763
10
        std::vector<int> rets = sync_executor.when_all(&finished);
764
110
        for (int ret : rets) {
765
110
            if (ret != 0) {
766
0
                return ret;
767
0
            }
768
110
        }
769
10
        return finished ? 0 : -1;
770
10
    } else {
771
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
772
0
                     << " instance_id=" << instance_id_;
773
0
        return -1;
774
0
    }
775
10
}
776
777
/**
778
* 1. delete all remote data
779
* 2. delete all kv
780
* 3. remove instance kv
781
*/
782
4
int InstanceRecycler::recycle_deleted_instance() {
783
4
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
784
785
4
    int ret = 0;
786
4
    auto start_time = steady_clock::now();
787
788
4
    DORIS_CLOUD_DEFER {
789
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
790
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
791
4
                     << " recycle deleted instance, cost=" << cost
792
4
                     << "s, instance_id=" << instance_id_;
793
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
788
4
    DORIS_CLOUD_DEFER {
789
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
790
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
791
4
                     << " recycle deleted instance, cost=" << cost
792
4
                     << "s, instance_id=" << instance_id_;
793
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
794
795
4
    bool has_snapshots = false;
796
4
    if (has_cluster_snapshots(&has_snapshots) != 0) {
797
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
798
0
        return -1;
799
4
    } else if (has_snapshots) {
800
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
801
1
        return 0;
802
1
    }
803
804
3
    if (recycle_operation_logs() != 0) {
805
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
806
0
        return -1;
807
0
    }
808
809
3
    if (recycle_versioned_rowsets() != 0) {
810
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
811
0
        return -1;
812
0
    }
813
814
3
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
815
3
                            instance_info().snapshot_switch_status() !=
816
0
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
817
3
    if (snapshot_enabled) {
818
0
        bool has_unrecycled_rowsets = false;
819
0
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
820
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
821
0
            return -1;
822
0
        } else if (has_unrecycled_rowsets) {
823
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
824
0
                    .tag("instance_id", instance_id_);
825
0
            return ret;
826
0
        }
827
3
    } else { // delete all remote data if snapshot is disabled
828
3
        for (auto& [_, accessor] : accessor_map_) {
829
3
            if (stopped()) {
830
0
                return ret;
831
0
            }
832
833
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
834
3
            int del_ret = accessor->delete_all();
835
3
            if (del_ret == 0) {
836
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
837
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
838
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
839
                // so the recycling has been successful.
840
0
                ret = -1;
841
0
            }
842
3
        }
843
844
3
        if (ret != 0) {
845
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
846
0
            return ret;
847
0
        }
848
3
    }
849
850
    // delete all kv
851
3
    std::unique_ptr<Transaction> txn;
852
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
853
3
    if (err != TxnErrorCode::TXN_OK) {
854
0
        LOG(WARNING) << "failed to create txn";
855
0
        ret = -1;
856
0
        return -1;
857
0
    }
858
3
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
859
    // delete kv before deleting objects to prevent the checker from misjudging data loss
860
3
    std::string start_txn_key = txn_key_prefix(instance_id_);
861
3
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
862
3
    txn->remove(start_txn_key, end_txn_key);
863
3
    std::string start_version_key = version_key_prefix(instance_id_);
864
3
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
865
3
    txn->remove(start_version_key, end_version_key);
866
3
    std::string start_meta_key = meta_key_prefix(instance_id_);
867
3
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
868
3
    txn->remove(start_meta_key, end_meta_key);
869
3
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
870
3
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
871
3
    txn->remove(start_recycle_key, end_recycle_key);
872
3
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
873
3
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
874
3
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
875
3
    std::string start_copy_key = copy_key_prefix(instance_id_);
876
3
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
877
3
    txn->remove(start_copy_key, end_copy_key);
878
    // should not remove job key range, because we need to reserve job recycle kv
879
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
880
3
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
881
3
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
882
3
    txn->remove(start_job_tablet_key, end_job_tablet_key);
883
3
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
884
3
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
885
3
    std::string start_vault_key = storage_vault_key(key_info0);
886
3
    std::string end_vault_key = storage_vault_key(key_info1);
887
3
    txn->remove(start_vault_key, end_vault_key);
888
3
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
889
3
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
890
3
    txn->remove(versioned_version_key_start, versioned_version_key_end);
891
3
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
892
3
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
893
3
    txn->remove(versioned_index_key_start, versioned_index_key_end);
894
3
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
895
3
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
896
3
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
897
3
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
898
3
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
899
3
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
900
3
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
901
3
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
902
3
    txn->remove(versioned_data_key_start, versioned_data_key_end);
903
3
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
904
3
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
905
3
    txn->remove(versioned_log_key_start, versioned_log_key_end);
906
3
    err = txn->commit();
907
3
    if (err != TxnErrorCode::TXN_OK) {
908
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
909
0
        ret = -1;
910
0
    }
911
912
3
    if (ret == 0) {
913
        // remove instance kv
914
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
915
3
        err = txn_kv_->create_txn(&txn);
916
3
        if (err != TxnErrorCode::TXN_OK) {
917
0
            LOG(WARNING) << "failed to create txn";
918
0
            ret = -1;
919
0
            return ret;
920
0
        }
921
3
        std::string key;
922
3
        instance_key({instance_id_}, &key);
923
3
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
924
3
        txn->remove(key);
925
3
        err = txn->commit();
926
3
        if (err != TxnErrorCode::TXN_OK) {
927
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
928
0
                         << " err=" << err;
929
0
            ret = -1;
930
0
        }
931
3
    }
932
3
    return ret;
933
3
}
934
935
bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id,
936
57.0k
                     int64_t txn_id) {
937
57.0k
    std::unique_ptr<Transaction> txn;
938
57.0k
    TxnErrorCode err = txn_kv->create_txn(&txn);
939
57.0k
    if (err != TxnErrorCode::TXN_OK) {
940
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
941
0
        return false;
942
0
    }
943
944
57.0k
    std::string index_val;
945
57.0k
    const std::string index_key = txn_index_key({instance_id, txn_id});
946
57.0k
    err = txn->get(index_key, &index_val);
947
57.0k
    if (err != TxnErrorCode::TXN_OK) {
948
53.0k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
949
53.0k
            TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled");
950
            // txn has been recycled;
951
53.0k
            LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id
952
53.0k
                      << " instance_id=" << instance_id;
953
53.0k
            return true;
954
53.0k
        }
955
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
956
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
957
0
                     << " err=" << err;
958
0
        return false;
959
53.0k
    }
960
961
4.00k
    TxnIndexPB index_pb;
962
4.00k
    if (!index_pb.ParseFromString(index_val)) {
963
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
964
0
                     << " instance_id=" << instance_id;
965
0
        return false;
966
0
    }
967
968
4.00k
    DCHECK(index_pb.has_tablet_index() == true);
969
4.00k
    if (!index_pb.tablet_index().has_db_id()) {
970
        // In the previous version, the db_id was not set in the index_pb.
971
        // If updating to the version which enable txn lazy commit, the db_id will be set.
972
0
        LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id
973
0
                  << " index=" << index_pb.ShortDebugString();
974
0
        return true;
975
0
    }
976
977
4.00k
    int64_t db_id = index_pb.tablet_index().db_id();
978
4.00k
    DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id
979
0
                        << " instance_id=" << instance_id;
980
981
4.00k
    std::string info_val;
982
4.00k
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
983
4.00k
    err = txn->get(info_key, &info_val);
984
4.00k
    if (err != TxnErrorCode::TXN_OK) {
985
0
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
986
            // txn info has been recycled;
987
0
            LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id
988
0
                      << " instance_id=" << instance_id;
989
0
            return true;
990
0
        }
991
992
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
993
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
994
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
995
0
                     << " err=" << err;
996
0
        return false;
997
0
    }
998
999
4.00k
    TxnInfoPB txn_info;
1000
4.00k
    if (!txn_info.ParseFromString(info_val)) {
1001
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
1002
0
                     << " instance_id=" << instance_id;
1003
0
        return false;
1004
0
    }
1005
1006
4.00k
    DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id
1007
0
                                        << " txn_info=" << txn_info.ShortDebugString();
1008
1009
4.00k
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() ||
1010
4.00k
        TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) {
1011
2.00k
        TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info);
1012
2.00k
        return true;
1013
2.00k
    }
1014
1015
2.00k
    TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info);
1016
2.00k
    return false;
1017
4.00k
}
1018
1019
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1020
5.01k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1021
5.01k
    if (config::force_immediate_recycle) {
1022
8
        return 0L;
1023
8
    }
1024
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1025
5.00k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1026
5.00k
    int64_t retention_seconds = config::retention_seconds;
1027
5.00k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1028
3.90k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1029
3.90k
    }
1030
5.00k
    int64_t final_expiration = expiration + retention_seconds;
1031
5.00k
    if (*earlest_ts > final_expiration) {
1032
4
        *earlest_ts = final_expiration;
1033
4
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1034
4
    }
1035
5.00k
    return final_expiration;
1036
5.01k
}
1037
1038
int64_t calculate_partition_expired_time(
1039
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1040
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1041
9
    if (config::force_immediate_recycle) {
1042
3
        return 0L;
1043
3
    }
1044
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1045
6
                                                            : partition_meta_pb.creation_time();
1046
6
    int64_t retention_seconds = config::retention_seconds;
1047
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1048
6
        retention_seconds =
1049
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1050
6
    }
1051
6
    int64_t final_expiration = expiration + retention_seconds;
1052
6
    if (*earlest_ts > final_expiration) {
1053
2
        *earlest_ts = final_expiration;
1054
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1055
2
    }
1056
6
    return final_expiration;
1057
9
}
1058
1059
int64_t calculate_index_expired_time(const std::string& instance_id_,
1060
                                     const RecycleIndexPB& index_meta_pb,
1061
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1062
10
    if (config::force_immediate_recycle) {
1063
4
        return 0L;
1064
4
    }
1065
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1066
6
                                                        : index_meta_pb.creation_time();
1067
6
    int64_t retention_seconds = config::retention_seconds;
1068
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1069
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1070
6
    }
1071
6
    int64_t final_expiration = expiration + retention_seconds;
1072
6
    if (*earlest_ts > final_expiration) {
1073
2
        *earlest_ts = final_expiration;
1074
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1075
2
    }
1076
6
    return final_expiration;
1077
10
}
1078
1079
int64_t calculate_tmp_rowset_expired_time(
1080
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1081
57.0k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1082
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1083
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1084
    //  duration or timeout always < `retention_time` in practice.
1085
57.0k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1086
57.0k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1087
57.0k
                                 : tmp_rowset_meta_pb.creation_time();
1088
57.0k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1089
57.0k
    int64_t final_expiration = expiration + config::retention_seconds;
1090
57.0k
    if (*earlest_ts > final_expiration) {
1091
6
        *earlest_ts = final_expiration;
1092
6
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1093
6
    }
1094
57.0k
    return final_expiration;
1095
57.0k
}
1096
1097
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1098
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1099
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1100
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1101
8
        *earlest_ts = final_expiration / 1000;
1102
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1103
8
    }
1104
30.0k
    return final_expiration;
1105
30.0k
}
1106
1107
int64_t calculate_restore_job_expired_time(
1108
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1109
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1110
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1111
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1112
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1113
        // final state, recycle immediately
1114
41
        return 0L;
1115
41
    }
1116
    // not final state, wait much longer than the FE's timeout(1 day)
1117
0
    int64_t last_modified_s =
1118
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1119
0
    int64_t expiration = restore_job.expired_at_s() > 0
1120
0
                                 ? last_modified_s + restore_job.expired_at_s()
1121
0
                                 : last_modified_s;
1122
0
    int64_t final_expiration = expiration + config::retention_seconds;
1123
0
    if (*earlest_ts > final_expiration) {
1124
0
        *earlest_ts = final_expiration;
1125
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1126
0
    }
1127
0
    return final_expiration;
1128
41
}
1129
1130
int get_meta_rowset_key(Transaction* txn, const std::string& instance_id, int64_t tablet_id,
1131
                        const std::string& rowset_id, int64_t start_version, int64_t end_version,
1132
0
                        bool load_key, bool* exist) {
1133
0
    std::string key =
1134
0
            load_key ? versioned::meta_rowset_load_key({instance_id, tablet_id, end_version})
1135
0
                     : versioned::meta_rowset_compact_key({instance_id, tablet_id, end_version});
1136
0
    RowsetMetaCloudPB rowset_meta;
1137
0
    Versionstamp version;
1138
0
    TxnErrorCode err = versioned::document_get(txn, key, &rowset_meta, &version);
1139
0
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1140
0
        VLOG_DEBUG << "not found load or compact meta_rowset_key."
1141
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1142
0
                   << " end_version=" << end_version << " key=" << hex(key);
1143
0
    } else if (err != TxnErrorCode::TXN_OK) {
1144
0
        LOG_INFO("failed to get load or compact meta_rowset_key.")
1145
0
                .tag("rowset_id", rowset_id)
1146
0
                .tag("start_version", start_version)
1147
0
                .tag("end_version", end_version)
1148
0
                .tag("key", hex(key))
1149
0
                .tag("error_code", err);
1150
0
        return -1;
1151
0
    } else if (rowset_meta.rowset_id_v2() == rowset_id) {
1152
0
        *exist = true;
1153
0
        VLOG_DEBUG << "found load or compact meta_rowset_key."
1154
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1155
0
                   << " end_version=" << end_version << " key=" << hex(key);
1156
0
    } else {
1157
0
        VLOG_DEBUG << "rowset_id does not match when find load or compact meta_rowset_key."
1158
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1159
0
                   << " end_version=" << end_version << " key=" << hex(key)
1160
0
                   << " found_rowset_id=" << rowset_meta.rowset_id_v2();
1161
0
    }
1162
0
    return 0;
1163
0
}
1164
1165
0
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
1166
0
    const std::string task_name = "recycle_ref_rowsets";
1167
0
    int64_t num_scanned = 0;
1168
0
    int64_t num_recycled = 0;
1169
0
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1170
1171
0
    std::string data_rowset_ref_count_key_start =
1172
0
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
1173
0
    std::string data_rowset_ref_count_key_end =
1174
0
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
1175
1176
0
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
1177
1178
0
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1179
0
    register_recycle_task(task_name, start_time);
1180
1181
0
    DORIS_CLOUD_DEFER {
1182
0
        unregister_recycle_task(task_name);
1183
0
        int64_t cost =
1184
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1185
0
        metrics_context.finish_report();
1186
0
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
1187
0
                .tag("instance_id", instance_id_)
1188
0
                .tag("num_scanned", num_scanned)
1189
0
                .tag("num_recycled", num_recycled);
1190
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
1191
1192
0
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1193
0
        ++num_scanned;
1194
1195
0
        int64_t tablet_id;
1196
0
        std::string rowset_id;
1197
0
        std::string_view key(k);
1198
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
1199
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
1200
0
            return -1;
1201
0
        }
1202
1203
0
        std::unique_ptr<Transaction> txn;
1204
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1205
0
        if (err != TxnErrorCode::TXN_OK) {
1206
0
            return -1;
1207
0
        }
1208
1209
0
        int64_t ref_count;
1210
0
        if (!txn->decode_atomic_int(v, &ref_count)) {
1211
0
            LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(v));
1212
0
            return -1;
1213
0
        }
1214
0
        if (ref_count > 1) {
1215
0
            *has_unrecycled_rowsets = true;
1216
0
            LOG_INFO("skip recycle ref_count > 1 rowset")
1217
0
                    .tag("instance_id", instance_id_)
1218
0
                    .tag("tablet_id", tablet_id)
1219
0
                    .tag("rowset_id", rowset_id)
1220
0
                    .tag("ref_count", ref_count);
1221
0
            return 0;
1222
0
        }
1223
1224
0
        std::string meta_rowset_key =
1225
0
                versioned::meta_rowset_key({instance_id_, tablet_id, rowset_id});
1226
0
        ValueBuf val_buf;
1227
0
        err = blob_get(txn.get(), meta_rowset_key, &val_buf);
1228
0
        if (err != TxnErrorCode::TXN_OK) {
1229
0
            LOG_WARNING("failed to get meta_rowset_key")
1230
0
                    .tag("instance_id", instance_id_)
1231
0
                    .tag("tablet_id", tablet_id)
1232
0
                    .tag("rowset_id", rowset_id)
1233
0
                    .tag("key", hex(meta_rowset_key))
1234
0
                    .tag("err", err);
1235
0
            return -1;
1236
0
        }
1237
0
        doris::RowsetMetaCloudPB rowset_meta;
1238
0
        if (!val_buf.to_pb(&rowset_meta)) {
1239
0
            LOG_WARNING("failed to parse RowsetMetaCloudPB")
1240
0
                    .tag("instance_id", instance_id_)
1241
0
                    .tag("tablet_id", tablet_id)
1242
0
                    .tag("rowset_id", rowset_id)
1243
0
                    .tag("key", hex(meta_rowset_key));
1244
0
            return -1;
1245
0
        }
1246
0
        int64_t start_version = rowset_meta.start_version();
1247
0
        int64_t end_version = rowset_meta.end_version();
1248
1249
        // Check if the meta_rowset_compact_key or meta_rowset_load_key exists:
1250
        // exists: means it's referenced by current instance, can recycle rowset;
1251
        // not exists: means it's referenced by other instances, cannot recycle;
1252
        //
1253
        // end_version = 1: the first rowset;
1254
        // end_version = 0: the rowset is committed by load, but not commit_txn;
1255
        // can recycle in these 2 situations
1256
0
        bool exist = false;
1257
0
        if (end_version > 1) {
1258
0
            if (start_version != end_version) {
1259
0
                if (get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
1260
0
                                        start_version, end_version, false, &exist) != 0) {
1261
0
                    return -1;
1262
0
                }
1263
0
            } else {
1264
0
                if (get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
1265
0
                                        start_version, end_version, true, &exist) != 0) {
1266
0
                    return -1;
1267
0
                }
1268
0
                if (!exist && get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
1269
0
                                                  start_version, end_version, false, &exist) != 0) {
1270
0
                    return -1;
1271
0
                }
1272
0
            }
1273
0
        }
1274
1275
0
        if (end_version > 1 && !exist) {
1276
0
            *has_unrecycled_rowsets = true;
1277
0
            LOG_INFO("skip recycle ref_count = 1 rowset")
1278
0
                    .tag("instance_id", instance_id_)
1279
0
                    .tag("tablet_id", tablet_id)
1280
0
                    .tag("rowset_id", rowset_id)
1281
0
                    .tag("start_version", start_version)
1282
0
                    .tag("end_version", end_version)
1283
0
                    .tag("ref_count", ref_count);
1284
0
            return 0;
1285
0
        }
1286
1287
0
        if (recycle_rowset_meta_and_data("", rowset_meta) != 0) {
1288
0
            LOG_WARNING("failed to recycle_rowset_meta_and_data")
1289
0
                    .tag("instance_id", instance_id_)
1290
0
                    .tag("tablet_id", tablet_id)
1291
0
                    .tag("rowset_id", rowset_id);
1292
0
            return -1;
1293
0
        }
1294
1295
0
        ++num_recycled;
1296
0
        return 0;
1297
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
1298
1299
    // recycle_func and loop_done for scan and recycle
1300
0
    return scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
1301
0
                            std::move(recycle_func));
1302
0
}
1303
1304
17
int InstanceRecycler::recycle_indexes() {
1305
17
    const std::string task_name = "recycle_indexes";
1306
17
    int64_t num_scanned = 0;
1307
17
    int64_t num_expired = 0;
1308
17
    int64_t num_recycled = 0;
1309
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1310
1311
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
1312
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
1313
17
    std::string index_key0;
1314
17
    std::string index_key1;
1315
17
    recycle_index_key(index_key_info0, &index_key0);
1316
17
    recycle_index_key(index_key_info1, &index_key1);
1317
1318
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
1319
1320
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1321
17
    register_recycle_task(task_name, start_time);
1322
1323
17
    DORIS_CLOUD_DEFER {
1324
17
        unregister_recycle_task(task_name);
1325
17
        int64_t cost =
1326
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1327
17
        metrics_context.finish_report();
1328
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1329
17
                .tag("instance_id", instance_id_)
1330
17
                .tag("num_scanned", num_scanned)
1331
17
                .tag("num_expired", num_expired)
1332
17
                .tag("num_recycled", num_recycled);
1333
17
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1323
15
    DORIS_CLOUD_DEFER {
1324
15
        unregister_recycle_task(task_name);
1325
15
        int64_t cost =
1326
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1327
15
        metrics_context.finish_report();
1328
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1329
15
                .tag("instance_id", instance_id_)
1330
15
                .tag("num_scanned", num_scanned)
1331
15
                .tag("num_expired", num_expired)
1332
15
                .tag("num_recycled", num_recycled);
1333
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1323
2
    DORIS_CLOUD_DEFER {
1324
2
        unregister_recycle_task(task_name);
1325
2
        int64_t cost =
1326
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1327
2
        metrics_context.finish_report();
1328
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1329
2
                .tag("instance_id", instance_id_)
1330
2
                .tag("num_scanned", num_scanned)
1331
2
                .tag("num_expired", num_expired)
1332
2
                .tag("num_recycled", num_recycled);
1333
2
    };
1334
1335
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1336
1337
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
1338
17
    std::vector<std::string_view> index_keys;
1339
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1340
10
        ++num_scanned;
1341
10
        RecycleIndexPB index_pb;
1342
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1343
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1344
0
            return -1;
1345
0
        }
1346
10
        int64_t current_time = ::time(nullptr);
1347
10
        if (current_time <
1348
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1349
0
            return 0;
1350
0
        }
1351
10
        ++num_expired;
1352
        // decode index_id
1353
10
        auto k1 = k;
1354
10
        k1.remove_prefix(1);
1355
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1356
10
        decode_key(&k1, &out);
1357
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1358
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1359
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1360
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1361
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1362
        // Change state to RECYCLING
1363
10
        std::unique_ptr<Transaction> txn;
1364
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1365
10
        if (err != TxnErrorCode::TXN_OK) {
1366
0
            LOG_WARNING("failed to create txn").tag("err", err);
1367
0
            return -1;
1368
0
        }
1369
10
        std::string val;
1370
10
        err = txn->get(k, &val);
1371
10
        if (err ==
1372
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1373
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1374
0
            return 0;
1375
0
        }
1376
10
        if (err != TxnErrorCode::TXN_OK) {
1377
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1378
0
            return -1;
1379
0
        }
1380
10
        index_pb.Clear();
1381
10
        if (!index_pb.ParseFromString(val)) {
1382
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1383
0
            return -1;
1384
0
        }
1385
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1386
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1387
9
            txn->put(k, index_pb.SerializeAsString());
1388
9
            err = txn->commit();
1389
9
            if (err != TxnErrorCode::TXN_OK) {
1390
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1391
0
                return -1;
1392
0
            }
1393
9
        }
1394
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1395
1
            LOG_WARNING("failed to recycle tablets under index")
1396
1
                    .tag("table_id", index_pb.table_id())
1397
1
                    .tag("instance_id", instance_id_)
1398
1
                    .tag("index_id", index_id);
1399
1
            return -1;
1400
1
        }
1401
1402
9
        if (index_pb.has_db_id()) {
1403
            // Recycle the versioned keys
1404
3
            std::unique_ptr<Transaction> txn;
1405
3
            err = txn_kv_->create_txn(&txn);
1406
3
            if (err != TxnErrorCode::TXN_OK) {
1407
0
                LOG_WARNING("failed to create txn").tag("err", err);
1408
0
                return -1;
1409
0
            }
1410
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1411
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1412
3
            std::string index_inverted_key = versioned::index_inverted_key(
1413
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1414
3
            versioned_remove_all(txn.get(), meta_key);
1415
3
            txn->remove(index_key);
1416
3
            txn->remove(index_inverted_key);
1417
3
            err = txn->commit();
1418
3
            if (err != TxnErrorCode::TXN_OK) {
1419
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1420
0
                return -1;
1421
0
            }
1422
3
        }
1423
1424
9
        metrics_context.total_recycled_num = ++num_recycled;
1425
9
        metrics_context.report();
1426
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1427
9
        index_keys.push_back(k);
1428
9
        return 0;
1429
9
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1339
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1340
8
        ++num_scanned;
1341
8
        RecycleIndexPB index_pb;
1342
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1343
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1344
0
            return -1;
1345
0
        }
1346
8
        int64_t current_time = ::time(nullptr);
1347
8
        if (current_time <
1348
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1349
0
            return 0;
1350
0
        }
1351
8
        ++num_expired;
1352
        // decode index_id
1353
8
        auto k1 = k;
1354
8
        k1.remove_prefix(1);
1355
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1356
8
        decode_key(&k1, &out);
1357
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1358
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1359
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1360
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1361
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1362
        // Change state to RECYCLING
1363
8
        std::unique_ptr<Transaction> txn;
1364
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1365
8
        if (err != TxnErrorCode::TXN_OK) {
1366
0
            LOG_WARNING("failed to create txn").tag("err", err);
1367
0
            return -1;
1368
0
        }
1369
8
        std::string val;
1370
8
        err = txn->get(k, &val);
1371
8
        if (err ==
1372
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1373
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1374
0
            return 0;
1375
0
        }
1376
8
        if (err != TxnErrorCode::TXN_OK) {
1377
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1378
0
            return -1;
1379
0
        }
1380
8
        index_pb.Clear();
1381
8
        if (!index_pb.ParseFromString(val)) {
1382
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1383
0
            return -1;
1384
0
        }
1385
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1386
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1387
8
            txn->put(k, index_pb.SerializeAsString());
1388
8
            err = txn->commit();
1389
8
            if (err != TxnErrorCode::TXN_OK) {
1390
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1391
0
                return -1;
1392
0
            }
1393
8
        }
1394
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1395
0
            LOG_WARNING("failed to recycle tablets under index")
1396
0
                    .tag("table_id", index_pb.table_id())
1397
0
                    .tag("instance_id", instance_id_)
1398
0
                    .tag("index_id", index_id);
1399
0
            return -1;
1400
0
        }
1401
1402
8
        if (index_pb.has_db_id()) {
1403
            // Recycle the versioned keys
1404
2
            std::unique_ptr<Transaction> txn;
1405
2
            err = txn_kv_->create_txn(&txn);
1406
2
            if (err != TxnErrorCode::TXN_OK) {
1407
0
                LOG_WARNING("failed to create txn").tag("err", err);
1408
0
                return -1;
1409
0
            }
1410
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1411
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1412
2
            std::string index_inverted_key = versioned::index_inverted_key(
1413
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1414
2
            versioned_remove_all(txn.get(), meta_key);
1415
2
            txn->remove(index_key);
1416
2
            txn->remove(index_inverted_key);
1417
2
            err = txn->commit();
1418
2
            if (err != TxnErrorCode::TXN_OK) {
1419
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1420
0
                return -1;
1421
0
            }
1422
2
        }
1423
1424
8
        metrics_context.total_recycled_num = ++num_recycled;
1425
8
        metrics_context.report();
1426
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1427
8
        index_keys.push_back(k);
1428
8
        return 0;
1429
8
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1339
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1340
2
        ++num_scanned;
1341
2
        RecycleIndexPB index_pb;
1342
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1343
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1344
0
            return -1;
1345
0
        }
1346
2
        int64_t current_time = ::time(nullptr);
1347
2
        if (current_time <
1348
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1349
0
            return 0;
1350
0
        }
1351
2
        ++num_expired;
1352
        // decode index_id
1353
2
        auto k1 = k;
1354
2
        k1.remove_prefix(1);
1355
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1356
2
        decode_key(&k1, &out);
1357
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1358
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1359
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1360
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1361
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1362
        // Change state to RECYCLING
1363
2
        std::unique_ptr<Transaction> txn;
1364
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1365
2
        if (err != TxnErrorCode::TXN_OK) {
1366
0
            LOG_WARNING("failed to create txn").tag("err", err);
1367
0
            return -1;
1368
0
        }
1369
2
        std::string val;
1370
2
        err = txn->get(k, &val);
1371
2
        if (err ==
1372
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1373
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1374
0
            return 0;
1375
0
        }
1376
2
        if (err != TxnErrorCode::TXN_OK) {
1377
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1378
0
            return -1;
1379
0
        }
1380
2
        index_pb.Clear();
1381
2
        if (!index_pb.ParseFromString(val)) {
1382
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1383
0
            return -1;
1384
0
        }
1385
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1386
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1387
1
            txn->put(k, index_pb.SerializeAsString());
1388
1
            err = txn->commit();
1389
1
            if (err != TxnErrorCode::TXN_OK) {
1390
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1391
0
                return -1;
1392
0
            }
1393
1
        }
1394
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1395
1
            LOG_WARNING("failed to recycle tablets under index")
1396
1
                    .tag("table_id", index_pb.table_id())
1397
1
                    .tag("instance_id", instance_id_)
1398
1
                    .tag("index_id", index_id);
1399
1
            return -1;
1400
1
        }
1401
1402
1
        if (index_pb.has_db_id()) {
1403
            // Recycle the versioned keys
1404
1
            std::unique_ptr<Transaction> txn;
1405
1
            err = txn_kv_->create_txn(&txn);
1406
1
            if (err != TxnErrorCode::TXN_OK) {
1407
0
                LOG_WARNING("failed to create txn").tag("err", err);
1408
0
                return -1;
1409
0
            }
1410
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1411
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1412
1
            std::string index_inverted_key = versioned::index_inverted_key(
1413
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1414
1
            versioned_remove_all(txn.get(), meta_key);
1415
1
            txn->remove(index_key);
1416
1
            txn->remove(index_inverted_key);
1417
1
            err = txn->commit();
1418
1
            if (err != TxnErrorCode::TXN_OK) {
1419
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1420
0
                return -1;
1421
0
            }
1422
1
        }
1423
1424
1
        metrics_context.total_recycled_num = ++num_recycled;
1425
1
        metrics_context.report();
1426
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1427
1
        index_keys.push_back(k);
1428
1
        return 0;
1429
1
    };
1430
1431
17
    auto loop_done = [&index_keys, this]() -> int {
1432
6
        if (index_keys.empty()) return 0;
1433
5
        DORIS_CLOUD_DEFER {
1434
5
            index_keys.clear();
1435
5
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1433
4
        DORIS_CLOUD_DEFER {
1434
4
            index_keys.clear();
1435
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1433
1
        DORIS_CLOUD_DEFER {
1434
1
            index_keys.clear();
1435
1
        };
1436
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1437
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1438
0
            return -1;
1439
0
        }
1440
5
        return 0;
1441
5
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1431
4
    auto loop_done = [&index_keys, this]() -> int {
1432
4
        if (index_keys.empty()) return 0;
1433
4
        DORIS_CLOUD_DEFER {
1434
4
            index_keys.clear();
1435
4
        };
1436
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1437
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1438
0
            return -1;
1439
0
        }
1440
4
        return 0;
1441
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1431
2
    auto loop_done = [&index_keys, this]() -> int {
1432
2
        if (index_keys.empty()) return 0;
1433
1
        DORIS_CLOUD_DEFER {
1434
1
            index_keys.clear();
1435
1
        };
1436
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1437
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1438
0
            return -1;
1439
0
        }
1440
1
        return 0;
1441
1
    };
1442
1443
17
    if (config::enable_recycler_stats_metrics) {
1444
0
        scan_and_statistics_indexes();
1445
0
    }
1446
    // recycle_func and loop_done for scan and recycle
1447
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
1448
17
}
1449
1450
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
1451
8.24k
                             int64_t tablet_id) {
1452
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
1453
1454
8.23k
    std::unique_ptr<Transaction> txn;
1455
8.23k
    TxnErrorCode err = txn_kv->create_txn(&txn);
1456
8.23k
    if (err != TxnErrorCode::TXN_OK) {
1457
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
1458
0
                     << " tablet_id=" << tablet_id << " err=" << err;
1459
0
        return false;
1460
0
    }
1461
1462
8.23k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
1463
8.23k
    std::string tablet_idx_val;
1464
8.23k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
1465
8.23k
    if (TxnErrorCode::TXN_OK != err) {
1466
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
1467
0
                     << " tablet_id=" << tablet_id << " err=" << err
1468
0
                     << " key=" << hex(tablet_idx_key);
1469
0
        return false;
1470
0
    }
1471
1472
8.23k
    TabletIndexPB tablet_idx_pb;
1473
8.23k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
1474
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
1475
0
                     << " tablet_id=" << tablet_id;
1476
0
        return false;
1477
0
    }
1478
1479
8.23k
    if (!tablet_idx_pb.has_db_id()) {
1480
        // In the previous version, the db_id was not set in the index_pb.
1481
        // If updating to the version which enable txn lazy commit, the db_id will be set.
1482
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
1483
0
                  << " instance_id=" << instance_id
1484
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
1485
0
        return true;
1486
0
    }
1487
1488
8.23k
    std::string ver_val;
1489
8.23k
    std::string ver_key =
1490
8.23k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
1491
8.23k
                                   tablet_idx_pb.partition_id()});
1492
8.23k
    err = txn->get(ver_key, &ver_val);
1493
1494
8.23k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1495
201
        LOG(INFO) << ""
1496
201
                     "partition version not found, instance_id="
1497
201
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
1498
201
                  << " table_id=" << tablet_idx_pb.table_id()
1499
201
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
1500
201
                  << " key=" << hex(ver_key);
1501
201
        return true;
1502
201
    }
1503
1504
8.03k
    if (TxnErrorCode::TXN_OK != err) {
1505
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
1506
0
                     << " db_id=" << tablet_idx_pb.db_id()
1507
0
                     << " table_id=" << tablet_idx_pb.table_id()
1508
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1509
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
1510
0
        return false;
1511
0
    }
1512
1513
8.03k
    VersionPB version_pb;
1514
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
1515
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
1516
0
                     << " db_id=" << tablet_idx_pb.db_id()
1517
0
                     << " table_id=" << tablet_idx_pb.table_id()
1518
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1519
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
1520
0
        return false;
1521
0
    }
1522
1523
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
1524
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
1525
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
1526
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
1527
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
1528
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
1529
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
1530
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
1531
4.00k
                     << " key=" << hex(ver_key);
1532
4.00k
        return false;
1533
4.00k
    }
1534
4.03k
    return true;
1535
8.03k
}
1536
1537
15
int InstanceRecycler::recycle_partitions() {
1538
15
    const std::string task_name = "recycle_partitions";
1539
15
    int64_t num_scanned = 0;
1540
15
    int64_t num_expired = 0;
1541
15
    int64_t num_recycled = 0;
1542
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1543
1544
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
1545
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
1546
15
    std::string part_key0;
1547
15
    std::string part_key1;
1548
15
    recycle_partition_key(part_key_info0, &part_key0);
1549
15
    recycle_partition_key(part_key_info1, &part_key1);
1550
1551
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
1552
1553
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1554
15
    register_recycle_task(task_name, start_time);
1555
1556
15
    DORIS_CLOUD_DEFER {
1557
15
        unregister_recycle_task(task_name);
1558
15
        int64_t cost =
1559
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1560
15
        metrics_context.finish_report();
1561
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1562
15
                .tag("instance_id", instance_id_)
1563
15
                .tag("num_scanned", num_scanned)
1564
15
                .tag("num_expired", num_expired)
1565
15
                .tag("num_recycled", num_recycled);
1566
15
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1556
13
    DORIS_CLOUD_DEFER {
1557
13
        unregister_recycle_task(task_name);
1558
13
        int64_t cost =
1559
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1560
13
        metrics_context.finish_report();
1561
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1562
13
                .tag("instance_id", instance_id_)
1563
13
                .tag("num_scanned", num_scanned)
1564
13
                .tag("num_expired", num_expired)
1565
13
                .tag("num_recycled", num_recycled);
1566
13
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1556
2
    DORIS_CLOUD_DEFER {
1557
2
        unregister_recycle_task(task_name);
1558
2
        int64_t cost =
1559
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1560
2
        metrics_context.finish_report();
1561
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1562
2
                .tag("instance_id", instance_id_)
1563
2
                .tag("num_scanned", num_scanned)
1564
2
                .tag("num_expired", num_expired)
1565
2
                .tag("num_recycled", num_recycled);
1566
2
    };
1567
1568
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1569
1570
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
1571
15
    std::vector<std::string_view> partition_keys;
1572
15
    std::vector<std::string> partition_version_keys;
1573
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1574
9
        ++num_scanned;
1575
9
        RecyclePartitionPB part_pb;
1576
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1577
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1578
0
            return -1;
1579
0
        }
1580
9
        int64_t current_time = ::time(nullptr);
1581
9
        if (current_time <
1582
9
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1583
0
            return 0;
1584
0
        }
1585
9
        ++num_expired;
1586
        // decode partition_id
1587
9
        auto k1 = k;
1588
9
        k1.remove_prefix(1);
1589
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1590
9
        decode_key(&k1, &out);
1591
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1592
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1593
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1594
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1595
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1596
        // Change state to RECYCLING
1597
9
        std::unique_ptr<Transaction> txn;
1598
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1599
9
        if (err != TxnErrorCode::TXN_OK) {
1600
0
            LOG_WARNING("failed to create txn").tag("err", err);
1601
0
            return -1;
1602
0
        }
1603
9
        std::string val;
1604
9
        err = txn->get(k, &val);
1605
9
        if (err ==
1606
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1607
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1608
0
            return 0;
1609
0
        }
1610
9
        if (err != TxnErrorCode::TXN_OK) {
1611
0
            LOG_WARNING("failed to get kv");
1612
0
            return -1;
1613
0
        }
1614
9
        part_pb.Clear();
1615
9
        if (!part_pb.ParseFromString(val)) {
1616
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1617
0
            return -1;
1618
0
        }
1619
        // Partitions with PREPARED state MUST have no data
1620
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1621
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1622
8
            txn->put(k, part_pb.SerializeAsString());
1623
8
            err = txn->commit();
1624
8
            if (err != TxnErrorCode::TXN_OK) {
1625
0
                LOG_WARNING("failed to commit txn: {}", err);
1626
0
                return -1;
1627
0
            }
1628
8
        }
1629
1630
9
        int ret = 0;
1631
33
        for (int64_t index_id : part_pb.index_id()) {
1632
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1633
1
                LOG_WARNING("failed to recycle tablets under partition")
1634
1
                        .tag("table_id", part_pb.table_id())
1635
1
                        .tag("instance_id", instance_id_)
1636
1
                        .tag("index_id", index_id)
1637
1
                        .tag("partition_id", partition_id);
1638
1
                ret = -1;
1639
1
            }
1640
33
        }
1641
9
        if (ret == 0 && part_pb.has_db_id()) {
1642
            // Recycle the versioned keys
1643
8
            std::unique_ptr<Transaction> txn;
1644
8
            err = txn_kv_->create_txn(&txn);
1645
8
            if (err != TxnErrorCode::TXN_OK) {
1646
0
                LOG_WARNING("failed to create txn").tag("err", err);
1647
0
                return -1;
1648
0
            }
1649
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1650
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1651
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1652
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1653
8
            std::string partition_version_key =
1654
8
                    versioned::partition_version_key({instance_id_, partition_id});
1655
8
            versioned_remove_all(txn.get(), meta_key);
1656
8
            txn->remove(index_key);
1657
8
            txn->remove(inverted_index_key);
1658
8
            versioned_remove_all(txn.get(), partition_version_key);
1659
8
            err = txn->commit();
1660
8
            if (err != TxnErrorCode::TXN_OK) {
1661
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1662
0
                return -1;
1663
0
            }
1664
8
        }
1665
1666
9
        if (ret == 0) {
1667
8
            ++num_recycled;
1668
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1669
8
            partition_keys.push_back(k);
1670
8
            if (part_pb.db_id() > 0) {
1671
8
                partition_version_keys.push_back(partition_version_key(
1672
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1673
8
            }
1674
8
            metrics_context.total_recycled_num = num_recycled;
1675
8
            metrics_context.report();
1676
8
        }
1677
9
        return ret;
1678
9
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1573
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1574
7
        ++num_scanned;
1575
7
        RecyclePartitionPB part_pb;
1576
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1577
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1578
0
            return -1;
1579
0
        }
1580
7
        int64_t current_time = ::time(nullptr);
1581
7
        if (current_time <
1582
7
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1583
0
            return 0;
1584
0
        }
1585
7
        ++num_expired;
1586
        // decode partition_id
1587
7
        auto k1 = k;
1588
7
        k1.remove_prefix(1);
1589
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1590
7
        decode_key(&k1, &out);
1591
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1592
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1593
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1594
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1595
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1596
        // Change state to RECYCLING
1597
7
        std::unique_ptr<Transaction> txn;
1598
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1599
7
        if (err != TxnErrorCode::TXN_OK) {
1600
0
            LOG_WARNING("failed to create txn").tag("err", err);
1601
0
            return -1;
1602
0
        }
1603
7
        std::string val;
1604
7
        err = txn->get(k, &val);
1605
7
        if (err ==
1606
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1607
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1608
0
            return 0;
1609
0
        }
1610
7
        if (err != TxnErrorCode::TXN_OK) {
1611
0
            LOG_WARNING("failed to get kv");
1612
0
            return -1;
1613
0
        }
1614
7
        part_pb.Clear();
1615
7
        if (!part_pb.ParseFromString(val)) {
1616
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1617
0
            return -1;
1618
0
        }
1619
        // Partitions with PREPARED state MUST have no data
1620
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1621
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1622
7
            txn->put(k, part_pb.SerializeAsString());
1623
7
            err = txn->commit();
1624
7
            if (err != TxnErrorCode::TXN_OK) {
1625
0
                LOG_WARNING("failed to commit txn: {}", err);
1626
0
                return -1;
1627
0
            }
1628
7
        }
1629
1630
7
        int ret = 0;
1631
31
        for (int64_t index_id : part_pb.index_id()) {
1632
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1633
0
                LOG_WARNING("failed to recycle tablets under partition")
1634
0
                        .tag("table_id", part_pb.table_id())
1635
0
                        .tag("instance_id", instance_id_)
1636
0
                        .tag("index_id", index_id)
1637
0
                        .tag("partition_id", partition_id);
1638
0
                ret = -1;
1639
0
            }
1640
31
        }
1641
7
        if (ret == 0 && part_pb.has_db_id()) {
1642
            // Recycle the versioned keys
1643
7
            std::unique_ptr<Transaction> txn;
1644
7
            err = txn_kv_->create_txn(&txn);
1645
7
            if (err != TxnErrorCode::TXN_OK) {
1646
0
                LOG_WARNING("failed to create txn").tag("err", err);
1647
0
                return -1;
1648
0
            }
1649
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1650
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1651
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1652
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1653
7
            std::string partition_version_key =
1654
7
                    versioned::partition_version_key({instance_id_, partition_id});
1655
7
            versioned_remove_all(txn.get(), meta_key);
1656
7
            txn->remove(index_key);
1657
7
            txn->remove(inverted_index_key);
1658
7
            versioned_remove_all(txn.get(), partition_version_key);
1659
7
            err = txn->commit();
1660
7
            if (err != TxnErrorCode::TXN_OK) {
1661
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1662
0
                return -1;
1663
0
            }
1664
7
        }
1665
1666
7
        if (ret == 0) {
1667
7
            ++num_recycled;
1668
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1669
7
            partition_keys.push_back(k);
1670
7
            if (part_pb.db_id() > 0) {
1671
7
                partition_version_keys.push_back(partition_version_key(
1672
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1673
7
            }
1674
7
            metrics_context.total_recycled_num = num_recycled;
1675
7
            metrics_context.report();
1676
7
        }
1677
7
        return ret;
1678
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1573
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1574
2
        ++num_scanned;
1575
2
        RecyclePartitionPB part_pb;
1576
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1577
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1578
0
            return -1;
1579
0
        }
1580
2
        int64_t current_time = ::time(nullptr);
1581
2
        if (current_time <
1582
2
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1583
0
            return 0;
1584
0
        }
1585
2
        ++num_expired;
1586
        // decode partition_id
1587
2
        auto k1 = k;
1588
2
        k1.remove_prefix(1);
1589
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1590
2
        decode_key(&k1, &out);
1591
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1592
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1593
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1594
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1595
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1596
        // Change state to RECYCLING
1597
2
        std::unique_ptr<Transaction> txn;
1598
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1599
2
        if (err != TxnErrorCode::TXN_OK) {
1600
0
            LOG_WARNING("failed to create txn").tag("err", err);
1601
0
            return -1;
1602
0
        }
1603
2
        std::string val;
1604
2
        err = txn->get(k, &val);
1605
2
        if (err ==
1606
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1607
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1608
0
            return 0;
1609
0
        }
1610
2
        if (err != TxnErrorCode::TXN_OK) {
1611
0
            LOG_WARNING("failed to get kv");
1612
0
            return -1;
1613
0
        }
1614
2
        part_pb.Clear();
1615
2
        if (!part_pb.ParseFromString(val)) {
1616
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1617
0
            return -1;
1618
0
        }
1619
        // Partitions with PREPARED state MUST have no data
1620
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1621
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1622
1
            txn->put(k, part_pb.SerializeAsString());
1623
1
            err = txn->commit();
1624
1
            if (err != TxnErrorCode::TXN_OK) {
1625
0
                LOG_WARNING("failed to commit txn: {}", err);
1626
0
                return -1;
1627
0
            }
1628
1
        }
1629
1630
2
        int ret = 0;
1631
2
        for (int64_t index_id : part_pb.index_id()) {
1632
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1633
1
                LOG_WARNING("failed to recycle tablets under partition")
1634
1
                        .tag("table_id", part_pb.table_id())
1635
1
                        .tag("instance_id", instance_id_)
1636
1
                        .tag("index_id", index_id)
1637
1
                        .tag("partition_id", partition_id);
1638
1
                ret = -1;
1639
1
            }
1640
2
        }
1641
2
        if (ret == 0 && part_pb.has_db_id()) {
1642
            // Recycle the versioned keys
1643
1
            std::unique_ptr<Transaction> txn;
1644
1
            err = txn_kv_->create_txn(&txn);
1645
1
            if (err != TxnErrorCode::TXN_OK) {
1646
0
                LOG_WARNING("failed to create txn").tag("err", err);
1647
0
                return -1;
1648
0
            }
1649
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1650
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1651
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1652
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1653
1
            std::string partition_version_key =
1654
1
                    versioned::partition_version_key({instance_id_, partition_id});
1655
1
            versioned_remove_all(txn.get(), meta_key);
1656
1
            txn->remove(index_key);
1657
1
            txn->remove(inverted_index_key);
1658
1
            versioned_remove_all(txn.get(), partition_version_key);
1659
1
            err = txn->commit();
1660
1
            if (err != TxnErrorCode::TXN_OK) {
1661
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1662
0
                return -1;
1663
0
            }
1664
1
        }
1665
1666
2
        if (ret == 0) {
1667
1
            ++num_recycled;
1668
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1669
1
            partition_keys.push_back(k);
1670
1
            if (part_pb.db_id() > 0) {
1671
1
                partition_version_keys.push_back(partition_version_key(
1672
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1673
1
            }
1674
1
            metrics_context.total_recycled_num = num_recycled;
1675
1
            metrics_context.report();
1676
1
        }
1677
2
        return ret;
1678
2
    };
1679
1680
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1681
5
        if (partition_keys.empty()) return 0;
1682
4
        DORIS_CLOUD_DEFER {
1683
4
            partition_keys.clear();
1684
4
            partition_version_keys.clear();
1685
4
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1682
3
        DORIS_CLOUD_DEFER {
1683
3
            partition_keys.clear();
1684
3
            partition_version_keys.clear();
1685
3
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1682
1
        DORIS_CLOUD_DEFER {
1683
1
            partition_keys.clear();
1684
1
            partition_version_keys.clear();
1685
1
        };
1686
4
        std::unique_ptr<Transaction> txn;
1687
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1688
4
        if (err != TxnErrorCode::TXN_OK) {
1689
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1690
0
            return -1;
1691
0
        }
1692
8
        for (auto& k : partition_keys) {
1693
8
            txn->remove(k);
1694
8
        }
1695
8
        for (auto& k : partition_version_keys) {
1696
8
            txn->remove(k);
1697
8
        }
1698
4
        err = txn->commit();
1699
4
        if (err != TxnErrorCode::TXN_OK) {
1700
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1701
0
                         << " err=" << err;
1702
0
            return -1;
1703
0
        }
1704
4
        return 0;
1705
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1680
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1681
3
        if (partition_keys.empty()) return 0;
1682
3
        DORIS_CLOUD_DEFER {
1683
3
            partition_keys.clear();
1684
3
            partition_version_keys.clear();
1685
3
        };
1686
3
        std::unique_ptr<Transaction> txn;
1687
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1688
3
        if (err != TxnErrorCode::TXN_OK) {
1689
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1690
0
            return -1;
1691
0
        }
1692
7
        for (auto& k : partition_keys) {
1693
7
            txn->remove(k);
1694
7
        }
1695
7
        for (auto& k : partition_version_keys) {
1696
7
            txn->remove(k);
1697
7
        }
1698
3
        err = txn->commit();
1699
3
        if (err != TxnErrorCode::TXN_OK) {
1700
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1701
0
                         << " err=" << err;
1702
0
            return -1;
1703
0
        }
1704
3
        return 0;
1705
3
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1680
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1681
2
        if (partition_keys.empty()) return 0;
1682
1
        DORIS_CLOUD_DEFER {
1683
1
            partition_keys.clear();
1684
1
            partition_version_keys.clear();
1685
1
        };
1686
1
        std::unique_ptr<Transaction> txn;
1687
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1688
1
        if (err != TxnErrorCode::TXN_OK) {
1689
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1690
0
            return -1;
1691
0
        }
1692
1
        for (auto& k : partition_keys) {
1693
1
            txn->remove(k);
1694
1
        }
1695
1
        for (auto& k : partition_version_keys) {
1696
1
            txn->remove(k);
1697
1
        }
1698
1
        err = txn->commit();
1699
1
        if (err != TxnErrorCode::TXN_OK) {
1700
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1701
0
                         << " err=" << err;
1702
0
            return -1;
1703
0
        }
1704
1
        return 0;
1705
1
    };
1706
1707
15
    if (config::enable_recycler_stats_metrics) {
1708
0
        scan_and_statistics_partitions();
1709
0
    }
1710
    // recycle_func and loop_done for scan and recycle
1711
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
1712
15
}
1713
1714
14
int InstanceRecycler::recycle_versions() {
1715
14
    if (should_recycle_versioned_keys()) {
1716
2
        return recycle_orphan_partitions();
1717
2
    }
1718
1719
12
    int64_t num_scanned = 0;
1720
12
    int64_t num_recycled = 0;
1721
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
1722
1723
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
1724
1725
12
    auto start_time = steady_clock::now();
1726
1727
12
    DORIS_CLOUD_DEFER {
1728
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1729
12
        metrics_context.finish_report();
1730
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1731
12
                .tag("instance_id", instance_id_)
1732
12
                .tag("num_scanned", num_scanned)
1733
12
                .tag("num_recycled", num_recycled);
1734
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
1727
12
    DORIS_CLOUD_DEFER {
1728
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1729
12
        metrics_context.finish_report();
1730
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1731
12
                .tag("instance_id", instance_id_)
1732
12
                .tag("num_scanned", num_scanned)
1733
12
                .tag("num_recycled", num_recycled);
1734
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
1735
1736
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
1737
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
1738
12
    int64_t last_scanned_table_id = 0;
1739
12
    bool is_recycled = false; // Is last scanned kv recycled
1740
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
1741
12
                         &metrics_context, this](std::string_view k, std::string_view) {
1742
2
        ++num_scanned;
1743
2
        auto k1 = k;
1744
2
        k1.remove_prefix(1);
1745
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1746
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1747
2
        decode_key(&k1, &out);
1748
2
        DCHECK_EQ(out.size(), 6) << k;
1749
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1750
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1751
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1752
0
            return 0;
1753
0
        }
1754
2
        last_scanned_table_id = table_id;
1755
2
        is_recycled = false;
1756
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1757
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1758
2
        std::unique_ptr<Transaction> txn;
1759
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1760
2
        if (err != TxnErrorCode::TXN_OK) {
1761
0
            return -1;
1762
0
        }
1763
2
        std::unique_ptr<RangeGetIterator> iter;
1764
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1765
2
        if (err != TxnErrorCode::TXN_OK) {
1766
0
            return -1;
1767
0
        }
1768
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1769
1
            return 0;
1770
1
        }
1771
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1772
        // 1. Remove all partition version kvs of this table
1773
1
        auto partition_version_key_begin =
1774
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1775
1
        auto partition_version_key_end =
1776
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1777
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1778
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1779
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1780
1
                     << " table_id=" << table_id;
1781
        // 2. Remove the table version kv of this table
1782
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1783
1
        txn->remove(tbl_version_key);
1784
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1785
        // 3. Remove mow delete bitmap update lock and tablet job lock
1786
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1787
1
        txn->remove(lock_key);
1788
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1789
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1790
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1791
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1792
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1793
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1794
1
                     << " table_id=" << table_id;
1795
1
        err = txn->commit();
1796
1
        if (err != TxnErrorCode::TXN_OK) {
1797
0
            return -1;
1798
0
        }
1799
1
        metrics_context.total_recycled_num = ++num_recycled;
1800
1
        metrics_context.report();
1801
1
        is_recycled = true;
1802
1
        return 0;
1803
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1741
2
                         &metrics_context, this](std::string_view k, std::string_view) {
1742
2
        ++num_scanned;
1743
2
        auto k1 = k;
1744
2
        k1.remove_prefix(1);
1745
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1746
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1747
2
        decode_key(&k1, &out);
1748
2
        DCHECK_EQ(out.size(), 6) << k;
1749
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1750
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1751
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1752
0
            return 0;
1753
0
        }
1754
2
        last_scanned_table_id = table_id;
1755
2
        is_recycled = false;
1756
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1757
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1758
2
        std::unique_ptr<Transaction> txn;
1759
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1760
2
        if (err != TxnErrorCode::TXN_OK) {
1761
0
            return -1;
1762
0
        }
1763
2
        std::unique_ptr<RangeGetIterator> iter;
1764
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1765
2
        if (err != TxnErrorCode::TXN_OK) {
1766
0
            return -1;
1767
0
        }
1768
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1769
1
            return 0;
1770
1
        }
1771
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1772
        // 1. Remove all partition version kvs of this table
1773
1
        auto partition_version_key_begin =
1774
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1775
1
        auto partition_version_key_end =
1776
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1777
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1778
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1779
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1780
1
                     << " table_id=" << table_id;
1781
        // 2. Remove the table version kv of this table
1782
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1783
1
        txn->remove(tbl_version_key);
1784
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1785
        // 3. Remove mow delete bitmap update lock and tablet job lock
1786
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1787
1
        txn->remove(lock_key);
1788
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1789
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1790
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1791
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1792
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1793
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1794
1
                     << " table_id=" << table_id;
1795
1
        err = txn->commit();
1796
1
        if (err != TxnErrorCode::TXN_OK) {
1797
0
            return -1;
1798
0
        }
1799
1
        metrics_context.total_recycled_num = ++num_recycled;
1800
1
        metrics_context.report();
1801
1
        is_recycled = true;
1802
1
        return 0;
1803
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1804
1805
12
    if (config::enable_recycler_stats_metrics) {
1806
0
        scan_and_statistics_versions();
1807
0
    }
1808
    // recycle_func and loop_done for scan and recycle
1809
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
1810
14
}
1811
1812
3
int InstanceRecycler::recycle_orphan_partitions() {
1813
3
    int64_t num_scanned = 0;
1814
3
    int64_t num_recycled = 0;
1815
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
1816
1817
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
1818
3
            .tag("instance_id", instance_id_);
1819
1820
3
    auto start_time = steady_clock::now();
1821
1822
3
    DORIS_CLOUD_DEFER {
1823
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1824
3
        metrics_context.finish_report();
1825
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
1826
3
                .tag("instance_id", instance_id_)
1827
3
                .tag("num_scanned", num_scanned)
1828
3
                .tag("num_recycled", num_recycled);
1829
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
1822
3
    DORIS_CLOUD_DEFER {
1823
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1824
3
        metrics_context.finish_report();
1825
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
1826
3
                .tag("instance_id", instance_id_)
1827
3
                .tag("num_scanned", num_scanned)
1828
3
                .tag("num_recycled", num_recycled);
1829
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
1830
1831
3
    bool is_empty_table = false;        // whether the table has no indexes
1832
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
1833
3
    int64_t current_table_id = 0;       // current scanning table id
1834
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
1835
3
                         &current_table_id, &is_table_kvs_recycled,
1836
3
                         this](std::string_view k, std::string_view) {
1837
2
        ++num_scanned;
1838
1839
2
        std::string_view k1(k);
1840
2
        int64_t db_id, table_id, partition_id;
1841
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
1842
2
                                                            &partition_id)) {
1843
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
1844
0
            return -1;
1845
2
        } else if (table_id != current_table_id) {
1846
2
            current_table_id = table_id;
1847
2
            is_table_kvs_recycled = false;
1848
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
1849
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
1850
2
            if (err != TxnErrorCode::TXN_OK) {
1851
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
1852
0
                             << " table_id=" << table_id << " err=" << err;
1853
0
                return -1;
1854
0
            }
1855
2
        }
1856
1857
2
        if (!is_empty_table) {
1858
            // table is not empty, skip recycle
1859
1
            return 0;
1860
1
        }
1861
1862
1
        std::unique_ptr<Transaction> txn;
1863
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1864
1
        if (err != TxnErrorCode::TXN_OK) {
1865
0
            return -1;
1866
0
        }
1867
1868
        // 1. Remove all partition related kvs
1869
1
        std::string partition_meta_key =
1870
1
                versioned::meta_partition_key({instance_id_, partition_id});
1871
1
        std::string partition_index_key =
1872
1
                versioned::partition_index_key({instance_id_, partition_id});
1873
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
1874
1
                {instance_id_, db_id, table_id, partition_id});
1875
1
        std::string partition_version_key =
1876
1
                versioned::partition_version_key({instance_id_, partition_id});
1877
1
        txn->remove(partition_index_key);
1878
1
        txn->remove(partition_inverted_key);
1879
1
        versioned_remove_all(txn.get(), partition_meta_key);
1880
1
        versioned_remove_all(txn.get(), partition_version_key);
1881
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
1882
1
                     << " table_id=" << table_id << " db_id=" << db_id
1883
1
                     << " partition_meta_key=" << hex(partition_meta_key)
1884
1
                     << " partition_version_key=" << hex(partition_version_key);
1885
1886
1
        if (!is_table_kvs_recycled) {
1887
1
            is_table_kvs_recycled = true;
1888
1889
            // 2. Remove the table version kv of this table
1890
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
1891
1
            versioned_remove_all(txn.get(), table_version_key);
1892
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
1893
            // 3. Remove mow delete bitmap update lock and tablet job lock
1894
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1895
1
            txn->remove(lock_key);
1896
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1897
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1898
1
            std::string tablet_job_key_end =
1899
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1900
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
1901
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1902
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1903
1
                         << " table_id=" << table_id;
1904
1
        }
1905
1906
1
        err = txn->commit();
1907
1
        if (err != TxnErrorCode::TXN_OK) {
1908
0
            return -1;
1909
0
        }
1910
1
        metrics_context.total_recycled_num = ++num_recycled;
1911
1
        metrics_context.report();
1912
1
        return 0;
1913
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1836
2
                         this](std::string_view k, std::string_view) {
1837
2
        ++num_scanned;
1838
1839
2
        std::string_view k1(k);
1840
2
        int64_t db_id, table_id, partition_id;
1841
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
1842
2
                                                            &partition_id)) {
1843
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
1844
0
            return -1;
1845
2
        } else if (table_id != current_table_id) {
1846
2
            current_table_id = table_id;
1847
2
            is_table_kvs_recycled = false;
1848
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
1849
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
1850
2
            if (err != TxnErrorCode::TXN_OK) {
1851
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
1852
0
                             << " table_id=" << table_id << " err=" << err;
1853
0
                return -1;
1854
0
            }
1855
2
        }
1856
1857
2
        if (!is_empty_table) {
1858
            // table is not empty, skip recycle
1859
1
            return 0;
1860
1
        }
1861
1862
1
        std::unique_ptr<Transaction> txn;
1863
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1864
1
        if (err != TxnErrorCode::TXN_OK) {
1865
0
            return -1;
1866
0
        }
1867
1868
        // 1. Remove all partition related kvs
1869
1
        std::string partition_meta_key =
1870
1
                versioned::meta_partition_key({instance_id_, partition_id});
1871
1
        std::string partition_index_key =
1872
1
                versioned::partition_index_key({instance_id_, partition_id});
1873
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
1874
1
                {instance_id_, db_id, table_id, partition_id});
1875
1
        std::string partition_version_key =
1876
1
                versioned::partition_version_key({instance_id_, partition_id});
1877
1
        txn->remove(partition_index_key);
1878
1
        txn->remove(partition_inverted_key);
1879
1
        versioned_remove_all(txn.get(), partition_meta_key);
1880
1
        versioned_remove_all(txn.get(), partition_version_key);
1881
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
1882
1
                     << " table_id=" << table_id << " db_id=" << db_id
1883
1
                     << " partition_meta_key=" << hex(partition_meta_key)
1884
1
                     << " partition_version_key=" << hex(partition_version_key);
1885
1886
1
        if (!is_table_kvs_recycled) {
1887
1
            is_table_kvs_recycled = true;
1888
1889
            // 2. Remove the table version kv of this table
1890
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
1891
1
            versioned_remove_all(txn.get(), table_version_key);
1892
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
1893
            // 3. Remove mow delete bitmap update lock and tablet job lock
1894
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1895
1
            txn->remove(lock_key);
1896
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1897
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1898
1
            std::string tablet_job_key_end =
1899
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1900
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
1901
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1902
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1903
1
                         << " table_id=" << table_id;
1904
1
        }
1905
1906
1
        err = txn->commit();
1907
1
        if (err != TxnErrorCode::TXN_OK) {
1908
0
            return -1;
1909
0
        }
1910
1
        metrics_context.total_recycled_num = ++num_recycled;
1911
1
        metrics_context.report();
1912
1
        return 0;
1913
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1914
1915
    // recycle_func and loop_done for scan and recycle
1916
3
    return scan_and_recycle(
1917
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
1918
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
1919
3
            std::move(recycle_func));
1920
3
}
1921
1922
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
1923
                                      RecyclerMetricsContext& metrics_context,
1924
46
                                      int64_t partition_id) {
1925
46
    bool is_multi_version =
1926
46
            instance_info_.has_multi_version_status() &&
1927
46
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
1928
46
    int64_t num_scanned = 0;
1929
46
    std::atomic_long num_recycled = 0;
1930
1931
46
    std::string tablet_key_begin, tablet_key_end;
1932
46
    std::string stats_key_begin, stats_key_end;
1933
46
    std::string job_key_begin, job_key_end;
1934
1935
46
    std::string tablet_belongs;
1936
46
    if (partition_id > 0) {
1937
        // recycle tablets in a partition belonging to the index
1938
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
1939
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
1940
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
1941
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
1942
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
1943
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
1944
33
        tablet_belongs = "partition";
1945
33
    } else {
1946
        // recycle tablets in the index
1947
13
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
1948
13
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
1949
13
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
1950
13
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
1951
13
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
1952
13
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
1953
13
        tablet_belongs = "index";
1954
13
    }
1955
1956
46
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
1957
46
            .tag("table_id", table_id)
1958
46
            .tag("index_id", index_id)
1959
46
            .tag("partition_id", partition_id);
1960
1961
46
    auto start_time = steady_clock::now();
1962
1963
46
    DORIS_CLOUD_DEFER {
1964
46
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1965
46
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1966
46
                .tag("instance_id", instance_id_)
1967
46
                .tag("table_id", table_id)
1968
46
                .tag("index_id", index_id)
1969
46
                .tag("partition_id", partition_id)
1970
46
                .tag("num_scanned", num_scanned)
1971
46
                .tag("num_recycled", num_recycled);
1972
46
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
1963
42
    DORIS_CLOUD_DEFER {
1964
42
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1965
42
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1966
42
                .tag("instance_id", instance_id_)
1967
42
                .tag("table_id", table_id)
1968
42
                .tag("index_id", index_id)
1969
42
                .tag("partition_id", partition_id)
1970
42
                .tag("num_scanned", num_scanned)
1971
42
                .tag("num_recycled", num_recycled);
1972
42
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
1963
4
    DORIS_CLOUD_DEFER {
1964
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1965
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1966
4
                .tag("instance_id", instance_id_)
1967
4
                .tag("table_id", table_id)
1968
4
                .tag("index_id", index_id)
1969
4
                .tag("partition_id", partition_id)
1970
4
                .tag("num_scanned", num_scanned)
1971
4
                .tag("num_recycled", num_recycled);
1972
4
    };
1973
1974
    // The first string_view represents the tablet key which has been recycled
1975
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
1976
46
    using TabletKeyPair = std::pair<std::string_view, bool>;
1977
46
    SyncExecutor<TabletKeyPair> sync_executor(
1978
46
            _thread_pool_group.recycle_tablet_pool,
1979
46
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
1980
46
                        index_id, partition_id),
1981
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1981
234
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1981
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
1982
1983
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
1984
46
    std::vector<std::string> tablet_idx_keys;
1985
46
    std::vector<std::string> restore_job_keys;
1986
46
    std::vector<std::string> init_rs_keys;
1987
46
    std::vector<std::string> tablet_compact_stats_keys;
1988
46
    std::vector<std::string> tablet_load_stats_keys;
1989
46
    std::vector<std::string> versioned_meta_tablet_keys;
1990
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1991
8.24k
        bool use_range_remove = true;
1992
8.24k
        ++num_scanned;
1993
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
1994
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1995
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1996
0
            use_range_remove = false;
1997
0
            return -1;
1998
0
        }
1999
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2000
2001
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2002
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2003
4.00k
            return -1;
2004
4.00k
        }
2005
2006
4.23k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2007
4.23k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2008
4.23k
        if (is_multi_version) {
2009
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2010
6
            tablet_compact_stats_keys.push_back(
2011
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2012
6
            tablet_load_stats_keys.push_back(
2013
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2014
6
            versioned_meta_tablet_keys.push_back(
2015
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2016
6
        }
2017
4.23k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2018
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2019
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2020
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2021
0
                LOG_WARNING("failed to recycle tablet")
2022
0
                        .tag("instance_id", instance_id_)
2023
0
                        .tag("tablet_id", tid);
2024
0
                range_move = false;
2025
0
                return {std::string_view(), range_move};
2026
0
            }
2027
4.23k
            ++num_recycled;
2028
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2029
4.23k
            return {k, range_move};
2030
4.23k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2019
234
                           &metrics_context, k]() mutable -> TabletKeyPair {
2020
234
            if (recycle_tablet(tid, metrics_context) != 0) {
2021
0
                LOG_WARNING("failed to recycle tablet")
2022
0
                        .tag("instance_id", instance_id_)
2023
0
                        .tag("tablet_id", tid);
2024
0
                range_move = false;
2025
0
                return {std::string_view(), range_move};
2026
0
            }
2027
234
            ++num_recycled;
2028
234
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2029
234
            return {k, range_move};
2030
234
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2019
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2020
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2021
0
                LOG_WARNING("failed to recycle tablet")
2022
0
                        .tag("instance_id", instance_id_)
2023
0
                        .tag("tablet_id", tid);
2024
0
                range_move = false;
2025
0
                return {std::string_view(), range_move};
2026
0
            }
2027
4.00k
            ++num_recycled;
2028
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2029
4.00k
            return {k, range_move};
2030
4.00k
        });
2031
4.23k
        return 0;
2032
4.23k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1990
237
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1991
237
        bool use_range_remove = true;
1992
237
        ++num_scanned;
1993
237
        doris::TabletMetaCloudPB tablet_meta_pb;
1994
237
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1995
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1996
0
            use_range_remove = false;
1997
0
            return -1;
1998
0
        }
1999
237
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2000
2001
237
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2002
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2003
0
            return -1;
2004
0
        }
2005
2006
237
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2007
237
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2008
237
        if (is_multi_version) {
2009
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2010
6
            tablet_compact_stats_keys.push_back(
2011
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2012
6
            tablet_load_stats_keys.push_back(
2013
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2014
6
            versioned_meta_tablet_keys.push_back(
2015
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2016
6
        }
2017
237
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2018
234
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2019
234
                           &metrics_context, k]() mutable -> TabletKeyPair {
2020
234
            if (recycle_tablet(tid, metrics_context) != 0) {
2021
234
                LOG_WARNING("failed to recycle tablet")
2022
234
                        .tag("instance_id", instance_id_)
2023
234
                        .tag("tablet_id", tid);
2024
234
                range_move = false;
2025
234
                return {std::string_view(), range_move};
2026
234
            }
2027
234
            ++num_recycled;
2028
234
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2029
234
            return {k, range_move};
2030
234
        });
2031
234
        return 0;
2032
237
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1990
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1991
8.00k
        bool use_range_remove = true;
1992
8.00k
        ++num_scanned;
1993
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
1994
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1995
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1996
0
            use_range_remove = false;
1997
0
            return -1;
1998
0
        }
1999
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2000
2001
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2002
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2003
4.00k
            return -1;
2004
4.00k
        }
2005
2006
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2007
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2008
4.00k
        if (is_multi_version) {
2009
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2010
0
            tablet_compact_stats_keys.push_back(
2011
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2012
0
            tablet_load_stats_keys.push_back(
2013
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2014
0
            versioned_meta_tablet_keys.push_back(
2015
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2016
0
        }
2017
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2018
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2019
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2020
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2021
4.00k
                LOG_WARNING("failed to recycle tablet")
2022
4.00k
                        .tag("instance_id", instance_id_)
2023
4.00k
                        .tag("tablet_id", tid);
2024
4.00k
                range_move = false;
2025
4.00k
                return {std::string_view(), range_move};
2026
4.00k
            }
2027
4.00k
            ++num_recycled;
2028
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2029
4.00k
            return {k, range_move};
2030
4.00k
        });
2031
4.00k
        return 0;
2032
4.00k
    };
2033
2034
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2035
46
    auto loop_done = [&, this]() -> int {
2036
46
        bool finished = true;
2037
46
        auto tablet_keys = sync_executor.when_all(&finished);
2038
46
        if (!finished) {
2039
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2040
0
            return -1;
2041
0
        }
2042
46
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2043
        // sort the vector using key's order
2044
44
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2045
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2045
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2045
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2046
44
        bool use_range_remove = true;
2047
4.23k
        for (auto& [_, remove] : tablet_keys) {
2048
4.23k
            if (!remove) {
2049
0
                use_range_remove = remove;
2050
0
                break;
2051
0
            }
2052
4.23k
        }
2053
44
        DORIS_CLOUD_DEFER {
2054
44
            tablet_idx_keys.clear();
2055
44
            restore_job_keys.clear();
2056
44
            init_rs_keys.clear();
2057
44
            tablet_compact_stats_keys.clear();
2058
44
            tablet_load_stats_keys.clear();
2059
44
            versioned_meta_tablet_keys.clear();
2060
44
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2053
42
        DORIS_CLOUD_DEFER {
2054
42
            tablet_idx_keys.clear();
2055
42
            restore_job_keys.clear();
2056
42
            init_rs_keys.clear();
2057
42
            tablet_compact_stats_keys.clear();
2058
42
            tablet_load_stats_keys.clear();
2059
42
            versioned_meta_tablet_keys.clear();
2060
42
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2053
2
        DORIS_CLOUD_DEFER {
2054
2
            tablet_idx_keys.clear();
2055
2
            restore_job_keys.clear();
2056
2
            init_rs_keys.clear();
2057
2
            tablet_compact_stats_keys.clear();
2058
2
            tablet_load_stats_keys.clear();
2059
2
            versioned_meta_tablet_keys.clear();
2060
2
        };
2061
44
        std::unique_ptr<Transaction> txn;
2062
44
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2063
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2064
0
            return -1;
2065
0
        }
2066
44
        std::string tablet_key_end;
2067
44
        if (!tablet_keys.empty()) {
2068
42
            if (use_range_remove) {
2069
42
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2070
42
                txn->remove(tablet_keys.front().first, tablet_key_end);
2071
42
            } else {
2072
0
                for (auto& [k, _] : tablet_keys) {
2073
0
                    txn->remove(k);
2074
0
                }
2075
0
            }
2076
42
        }
2077
44
        if (is_multi_version) {
2078
6
            for (auto& k : tablet_compact_stats_keys) {
2079
                // Remove all versions of tablet compact stats for recycled tablet
2080
6
                LOG_INFO("remove versioned tablet compact stats key")
2081
6
                        .tag("compact_stats_key", hex(k));
2082
6
                versioned_remove_all(txn.get(), k);
2083
6
            }
2084
6
            for (auto& k : tablet_load_stats_keys) {
2085
                // Remove all versions of tablet load stats for recycled tablet
2086
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2087
6
                versioned_remove_all(txn.get(), k);
2088
6
            }
2089
6
            for (auto& k : versioned_meta_tablet_keys) {
2090
                // Remove all versions of meta tablet for recycled tablet
2091
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2092
6
                versioned_remove_all(txn.get(), k);
2093
6
            }
2094
5
        }
2095
4.23k
        for (auto& k : tablet_idx_keys) {
2096
4.23k
            txn->remove(k);
2097
4.23k
        }
2098
4.23k
        for (auto& k : restore_job_keys) {
2099
4.23k
            txn->remove(k);
2100
4.23k
        }
2101
44
        for (auto& k : init_rs_keys) {
2102
0
            txn->remove(k);
2103
0
        }
2104
44
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2105
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2106
0
                         << ", err=" << err;
2107
0
            return -1;
2108
0
        }
2109
44
        return 0;
2110
44
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2035
42
    auto loop_done = [&, this]() -> int {
2036
42
        bool finished = true;
2037
42
        auto tablet_keys = sync_executor.when_all(&finished);
2038
42
        if (!finished) {
2039
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2040
0
            return -1;
2041
0
        }
2042
42
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2043
        // sort the vector using key's order
2044
42
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2045
42
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2046
42
        bool use_range_remove = true;
2047
234
        for (auto& [_, remove] : tablet_keys) {
2048
234
            if (!remove) {
2049
0
                use_range_remove = remove;
2050
0
                break;
2051
0
            }
2052
234
        }
2053
42
        DORIS_CLOUD_DEFER {
2054
42
            tablet_idx_keys.clear();
2055
42
            restore_job_keys.clear();
2056
42
            init_rs_keys.clear();
2057
42
            tablet_compact_stats_keys.clear();
2058
42
            tablet_load_stats_keys.clear();
2059
42
            versioned_meta_tablet_keys.clear();
2060
42
        };
2061
42
        std::unique_ptr<Transaction> txn;
2062
42
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2063
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2064
0
            return -1;
2065
0
        }
2066
42
        std::string tablet_key_end;
2067
42
        if (!tablet_keys.empty()) {
2068
40
            if (use_range_remove) {
2069
40
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2070
40
                txn->remove(tablet_keys.front().first, tablet_key_end);
2071
40
            } else {
2072
0
                for (auto& [k, _] : tablet_keys) {
2073
0
                    txn->remove(k);
2074
0
                }
2075
0
            }
2076
40
        }
2077
42
        if (is_multi_version) {
2078
6
            for (auto& k : tablet_compact_stats_keys) {
2079
                // Remove all versions of tablet compact stats for recycled tablet
2080
6
                LOG_INFO("remove versioned tablet compact stats key")
2081
6
                        .tag("compact_stats_key", hex(k));
2082
6
                versioned_remove_all(txn.get(), k);
2083
6
            }
2084
6
            for (auto& k : tablet_load_stats_keys) {
2085
                // Remove all versions of tablet load stats for recycled tablet
2086
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2087
6
                versioned_remove_all(txn.get(), k);
2088
6
            }
2089
6
            for (auto& k : versioned_meta_tablet_keys) {
2090
                // Remove all versions of meta tablet for recycled tablet
2091
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2092
6
                versioned_remove_all(txn.get(), k);
2093
6
            }
2094
5
        }
2095
237
        for (auto& k : tablet_idx_keys) {
2096
237
            txn->remove(k);
2097
237
        }
2098
237
        for (auto& k : restore_job_keys) {
2099
237
            txn->remove(k);
2100
237
        }
2101
42
        for (auto& k : init_rs_keys) {
2102
0
            txn->remove(k);
2103
0
        }
2104
42
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2105
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2106
0
                         << ", err=" << err;
2107
0
            return -1;
2108
0
        }
2109
42
        return 0;
2110
42
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2035
4
    auto loop_done = [&, this]() -> int {
2036
4
        bool finished = true;
2037
4
        auto tablet_keys = sync_executor.when_all(&finished);
2038
4
        if (!finished) {
2039
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2040
0
            return -1;
2041
0
        }
2042
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2043
        // sort the vector using key's order
2044
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2045
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2046
2
        bool use_range_remove = true;
2047
4.00k
        for (auto& [_, remove] : tablet_keys) {
2048
4.00k
            if (!remove) {
2049
0
                use_range_remove = remove;
2050
0
                break;
2051
0
            }
2052
4.00k
        }
2053
2
        DORIS_CLOUD_DEFER {
2054
2
            tablet_idx_keys.clear();
2055
2
            restore_job_keys.clear();
2056
2
            init_rs_keys.clear();
2057
2
            tablet_compact_stats_keys.clear();
2058
2
            tablet_load_stats_keys.clear();
2059
2
            versioned_meta_tablet_keys.clear();
2060
2
        };
2061
2
        std::unique_ptr<Transaction> txn;
2062
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2063
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2064
0
            return -1;
2065
0
        }
2066
2
        std::string tablet_key_end;
2067
2
        if (!tablet_keys.empty()) {
2068
2
            if (use_range_remove) {
2069
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2070
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2071
2
            } else {
2072
0
                for (auto& [k, _] : tablet_keys) {
2073
0
                    txn->remove(k);
2074
0
                }
2075
0
            }
2076
2
        }
2077
2
        if (is_multi_version) {
2078
0
            for (auto& k : tablet_compact_stats_keys) {
2079
                // Remove all versions of tablet compact stats for recycled tablet
2080
0
                LOG_INFO("remove versioned tablet compact stats key")
2081
0
                        .tag("compact_stats_key", hex(k));
2082
0
                versioned_remove_all(txn.get(), k);
2083
0
            }
2084
0
            for (auto& k : tablet_load_stats_keys) {
2085
                // Remove all versions of tablet load stats for recycled tablet
2086
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2087
0
                versioned_remove_all(txn.get(), k);
2088
0
            }
2089
0
            for (auto& k : versioned_meta_tablet_keys) {
2090
                // Remove all versions of meta tablet for recycled tablet
2091
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2092
0
                versioned_remove_all(txn.get(), k);
2093
0
            }
2094
0
        }
2095
4.00k
        for (auto& k : tablet_idx_keys) {
2096
4.00k
            txn->remove(k);
2097
4.00k
        }
2098
4.00k
        for (auto& k : restore_job_keys) {
2099
4.00k
            txn->remove(k);
2100
4.00k
        }
2101
2
        for (auto& k : init_rs_keys) {
2102
0
            txn->remove(k);
2103
0
        }
2104
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2105
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2106
0
                         << ", err=" << err;
2107
0
            return -1;
2108
0
        }
2109
2
        return 0;
2110
2
    };
2111
2112
46
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
2113
46
                               std::move(loop_done));
2114
46
    if (ret != 0) {
2115
2
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
2116
2
        return ret;
2117
2
    }
2118
2119
    // directly remove tablet stats and tablet jobs of these dropped index or partition
2120
44
    std::unique_ptr<Transaction> txn;
2121
44
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2122
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
2123
0
        return -1;
2124
0
    }
2125
44
    txn->remove(stats_key_begin, stats_key_end);
2126
44
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
2127
44
                 << " end=" << hex(stats_key_end);
2128
44
    txn->remove(job_key_begin, job_key_end);
2129
44
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
2130
44
    std::string schema_key_begin, schema_key_end;
2131
44
    std::string schema_dict_key;
2132
44
    std::string versioned_schema_key_begin, versioned_schema_key_end;
2133
44
    if (partition_id <= 0) {
2134
        // Delete schema kv of this index
2135
12
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
2136
12
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
2137
12
        txn->remove(schema_key_begin, schema_key_end);
2138
12
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
2139
12
                     << " end=" << hex(schema_key_end);
2140
12
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
2141
12
        txn->remove(schema_dict_key);
2142
12
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
2143
12
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
2144
12
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
2145
12
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
2146
12
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
2147
12
                     << " end=" << hex(versioned_schema_key_end);
2148
12
    }
2149
2150
44
    TxnErrorCode err = txn->commit();
2151
44
    if (err != TxnErrorCode::TXN_OK) {
2152
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
2153
0
                     << " err=" << err;
2154
0
        return -1;
2155
0
    }
2156
2157
44
    return ret;
2158
44
}
2159
2160
4.81k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
2161
4.81k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
2162
4.81k
    int64_t num_segments = rs_meta_pb.num_segments();
2163
4.81k
    if (num_segments <= 0) return 0;
2164
2165
    // Process inverted indexes
2166
4.81k
    std::vector<std::pair<int64_t, std::string>> index_ids;
2167
    // default format as v1.
2168
4.81k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2169
4.81k
    bool delete_rowset_data_by_prefix = false;
2170
4.81k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2171
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2172
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2173
0
        delete_rowset_data_by_prefix = true;
2174
4.81k
    } else if (rs_meta_pb.has_tablet_schema()) {
2175
9.00k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
2176
9.00k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2177
9.00k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2178
9.00k
            }
2179
9.00k
        }
2180
4.40k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
2181
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
2182
2.00k
        }
2183
4.40k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
2184
        // schema version and index id are not found, delete rowset data by prefix directly.
2185
0
        delete_rowset_data_by_prefix = true;
2186
409
    } else {
2187
        // otherwise, try to get schema kv
2188
409
        InvertedIndexInfo index_info;
2189
409
        int inverted_index_get_ret = inverted_index_id_cache_->get(
2190
409
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
2191
409
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2192
409
                                 &inverted_index_get_ret);
2193
409
        if (inverted_index_get_ret == 0) {
2194
409
            index_format = index_info.first;
2195
409
            index_ids = index_info.second;
2196
409
        } else if (inverted_index_get_ret == 1) {
2197
            // 1. Schema kv not found means tablet has been recycled
2198
            // Maybe some tablet recycle failed by some bugs
2199
            // We need to delete again to double check
2200
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2201
            // because we are uncertain about the inverted index information.
2202
            // If there are inverted indexes, some data might not be deleted,
2203
            // but this is acceptable as we have made our best effort to delete the data.
2204
0
            LOG_INFO(
2205
0
                    "delete rowset data schema kv not found, need to delete again to double "
2206
0
                    "check")
2207
0
                    .tag("instance_id", instance_id_)
2208
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2209
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
2210
            // Currently index_ids is guaranteed to be empty,
2211
            // but we clear it again here as a safeguard against future code changes
2212
            // that might cause index_ids to no longer be empty
2213
0
            index_format = InvertedIndexStorageFormatPB::V2;
2214
0
            index_ids.clear();
2215
0
        } else {
2216
            // failed to get schema kv, delete rowset data by prefix directly.
2217
0
            delete_rowset_data_by_prefix = true;
2218
0
        }
2219
409
    }
2220
2221
4.81k
    if (delete_rowset_data_by_prefix) {
2222
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
2223
0
                                  rs_meta_pb.rowset_id_v2());
2224
0
    }
2225
2226
4.81k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
2227
4.81k
    if (it == accessor_map_.end()) {
2228
800
        LOG_WARNING("instance has no such resource id")
2229
800
                .tag("instance_id", instance_id_)
2230
800
                .tag("resource_id", rs_meta_pb.resource_id());
2231
800
        return -1;
2232
800
    }
2233
4.01k
    auto& accessor = it->second;
2234
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
2235
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
2236
4.01k
    std::vector<std::string> file_paths;
2237
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
2238
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2239
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
2240
40.0k
            for (const auto& index_id : index_ids) {
2241
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
2242
40.0k
                                                            index_id.second));
2243
40.0k
            }
2244
20.0k
        } else if (!index_ids.empty()) {
2245
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
2246
0
        }
2247
20.0k
    }
2248
2249
    // Process delete bitmap
2250
4.01k
    file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2251
    // TODO(AlexYue): seems could do do batch
2252
4.01k
    return accessor->delete_files(file_paths);
2253
4.81k
}
2254
2255
int InstanceRecycler::delete_rowset_data(
2256
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
2257
45
        RecyclerMetricsContext& metrics_context) {
2258
45
    int ret = 0;
2259
    // resource_id -> file_paths
2260
45
    std::map<std::string, std::vector<std::string>> resource_file_paths;
2261
    // (resource_id, tablet_id, rowset_id)
2262
45
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
2263
45
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
2264
2265
54.1k
    for (const auto& [_, rs] : rowsets) {
2266
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
2267
        // due to aborted schema change.
2268
54.1k
        if (is_formal_rowset) {
2269
3.15k
            std::lock_guard lock(recycled_tablets_mtx_);
2270
3.15k
            if (recycled_tablets_.count(rs.tablet_id())) {
2271
0
                continue; // Rowset data has already been deleted
2272
0
            }
2273
3.15k
        }
2274
2275
54.1k
        auto it = accessor_map_.find(rs.resource_id());
2276
        // possible if the accessor is not initilized correctly
2277
54.1k
        if (it == accessor_map_.end()) [[unlikely]] {
2278
1
            LOG_WARNING("instance has no such resource id")
2279
1
                    .tag("instance_id", instance_id_)
2280
1
                    .tag("resource_id", rs.resource_id());
2281
1
            ret = -1;
2282
1
            continue;
2283
1
        }
2284
2285
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
2286
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
2287
54.1k
        int64_t tablet_id = rs.tablet_id();
2288
54.1k
        int64_t num_segments = rs.num_segments();
2289
54.1k
        if (num_segments <= 0) {
2290
0
            metrics_context.total_recycled_num++;
2291
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
2292
0
            continue;
2293
0
        }
2294
2295
        // Process delete bitmap
2296
54.1k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2297
2298
        // Process inverted indexes
2299
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
2300
        // default format as v1.
2301
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2302
54.1k
        int inverted_index_get_ret = 0;
2303
54.1k
        if (rs.has_tablet_schema()) {
2304
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
2305
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2306
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2307
53.5k
                }
2308
53.5k
            }
2309
26.5k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
2310
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
2311
26.5k
            }
2312
27.5k
        } else {
2313
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
2314
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
2315
0
                                "instance_id="
2316
0
                             << instance_id_ << " tablet_id=" << tablet_id
2317
0
                             << " rowset_id=" << rowset_id;
2318
0
                ret = -1;
2319
0
                continue;
2320
0
            }
2321
27.5k
            InvertedIndexInfo index_info;
2322
27.5k
            inverted_index_get_ret =
2323
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
2324
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2325
27.5k
                                     &inverted_index_get_ret);
2326
27.5k
            if (inverted_index_get_ret == 0) {
2327
27.0k
                index_format = index_info.first;
2328
27.0k
                index_ids = index_info.second;
2329
27.0k
            } else if (inverted_index_get_ret == 1) {
2330
                // 1. Schema kv not found means tablet has been recycled
2331
                // Maybe some tablet recycle failed by some bugs
2332
                // We need to delete again to double check
2333
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2334
                // because we are uncertain about the inverted index information.
2335
                // If there are inverted indexes, some data might not be deleted,
2336
                // but this is acceptable as we have made our best effort to delete the data.
2337
503
                LOG_INFO(
2338
503
                        "delete rowset data schema kv not found, need to delete again to double "
2339
503
                        "check")
2340
503
                        .tag("instance_id", instance_id_)
2341
503
                        .tag("tablet_id", tablet_id)
2342
503
                        .tag("rowset", rs.ShortDebugString());
2343
                // Currently index_ids is guaranteed to be empty,
2344
                // but we clear it again here as a safeguard against future code changes
2345
                // that might cause index_ids to no longer be empty
2346
503
                index_format = InvertedIndexStorageFormatPB::V2;
2347
503
                index_ids.clear();
2348
18.4E
            } else {
2349
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
2350
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
2351
18.4E
                ret = -1;
2352
18.4E
                continue;
2353
18.4E
            }
2354
27.5k
        }
2355
54.1k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2356
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2357
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2358
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
2359
5
            continue;
2360
5
        }
2361
324k
        for (int64_t i = 0; i < num_segments; ++i) {
2362
270k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2363
270k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
2364
539k
                for (const auto& index_id : index_ids) {
2365
539k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
2366
539k
                                                                index_id.first, index_id.second));
2367
539k
                }
2368
268k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
2369
                // try to recycle inverted index v2 when get_ret == 1
2370
                // we treat schema not found as if it has a v2 format inverted index
2371
                // to reduce chance of data leakage
2372
2.50k
                if (inverted_index_get_ret == 1) {
2373
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
2374
2.50k
                            .tag("instance_id", instance_id_)
2375
2.50k
                            .tag("inverted index v2 path",
2376
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
2377
2.50k
                }
2378
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
2379
2.50k
            }
2380
270k
        }
2381
54.1k
    }
2382
2383
45
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
2384
45
                                                 "delete_rowset_data",
2385
47
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
2385
47
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
2386
45
    for (auto& [resource_id, file_paths] : resource_file_paths) {
2387
42
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
2388
42
            DCHECK(accessor_map_.count(*rid))
2389
0
                    << "uninitilized accessor, instance_id=" << instance_id_
2390
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
2391
42
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
2392
42
                                     &accessor_map_);
2393
42
            if (!accessor_map_.contains(*rid)) {
2394
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
2395
0
                        .tag("resource_id", resource_id)
2396
0
                        .tag("instance_id", instance_id_);
2397
0
                return -1;
2398
0
            }
2399
42
            auto& accessor = accessor_map_[*rid];
2400
42
            int ret = accessor->delete_files(*paths);
2401
42
            if (!ret) {
2402
                // deduplication of different files with the same rowset id
2403
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
2404
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
2405
42
                std::set<std::string> deleted_rowset_id;
2406
2407
42
                std::for_each(paths->begin(), paths->end(),
2408
42
                              [&metrics_context, &rowsets, &deleted_rowset_id,
2409
863k
                               this](const std::string& path) {
2410
863k
                                  std::vector<std::string> str;
2411
863k
                                  butil::SplitString(path, '/', &str);
2412
863k
                                  std::string rowset_id;
2413
863k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
2414
860k
                                      rowset_id = str.back().substr(0, pos);
2415
860k
                                  } else {
2416
3.45k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
2417
3.45k
                                      return;
2418
3.45k
                                  }
2419
860k
                                  auto rs_meta = rowsets.find(rowset_id);
2420
860k
                                  if (rs_meta != rowsets.end() &&
2421
862k
                                      !deleted_rowset_id.contains(rowset_id)) {
2422
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
2423
54.1k
                                      metrics_context.total_recycled_data_size +=
2424
54.1k
                                              rs_meta->second.total_disk_size();
2425
54.1k
                                      segment_metrics_context_.total_recycled_num +=
2426
54.1k
                                              rs_meta->second.num_segments();
2427
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
2428
54.1k
                                              rs_meta->second.total_disk_size();
2429
54.1k
                                      metrics_context.total_recycled_num++;
2430
54.1k
                                  }
2431
860k
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
2409
863k
                               this](const std::string& path) {
2410
863k
                                  std::vector<std::string> str;
2411
863k
                                  butil::SplitString(path, '/', &str);
2412
863k
                                  std::string rowset_id;
2413
863k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
2414
860k
                                      rowset_id = str.back().substr(0, pos);
2415
860k
                                  } else {
2416
3.45k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
2417
3.45k
                                      return;
2418
3.45k
                                  }
2419
860k
                                  auto rs_meta = rowsets.find(rowset_id);
2420
860k
                                  if (rs_meta != rowsets.end() &&
2421
862k
                                      !deleted_rowset_id.contains(rowset_id)) {
2422
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
2423
54.1k
                                      metrics_context.total_recycled_data_size +=
2424
54.1k
                                              rs_meta->second.total_disk_size();
2425
54.1k
                                      segment_metrics_context_.total_recycled_num +=
2426
54.1k
                                              rs_meta->second.num_segments();
2427
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
2428
54.1k
                                              rs_meta->second.total_disk_size();
2429
54.1k
                                      metrics_context.total_recycled_num++;
2430
54.1k
                                  }
2431
860k
                              });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
2432
42
                segment_metrics_context_.report();
2433
42
                metrics_context.report();
2434
42
            }
2435
42
            return ret;
2436
42
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2387
42
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
2388
42
            DCHECK(accessor_map_.count(*rid))
2389
0
                    << "uninitilized accessor, instance_id=" << instance_id_
2390
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
2391
42
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
2392
42
                                     &accessor_map_);
2393
42
            if (!accessor_map_.contains(*rid)) {
2394
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
2395
0
                        .tag("resource_id", resource_id)
2396
0
                        .tag("instance_id", instance_id_);
2397
0
                return -1;
2398
0
            }
2399
42
            auto& accessor = accessor_map_[*rid];
2400
42
            int ret = accessor->delete_files(*paths);
2401
42
            if (!ret) {
2402
                // deduplication of different files with the same rowset id
2403
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
2404
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
2405
42
                std::set<std::string> deleted_rowset_id;
2406
2407
42
                std::for_each(paths->begin(), paths->end(),
2408
42
                              [&metrics_context, &rowsets, &deleted_rowset_id,
2409
42
                               this](const std::string& path) {
2410
42
                                  std::vector<std::string> str;
2411
42
                                  butil::SplitString(path, '/', &str);
2412
42
                                  std::string rowset_id;
2413
42
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
2414
42
                                      rowset_id = str.back().substr(0, pos);
2415
42
                                  } else {
2416
42
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
2417
42
                                      return;
2418
42
                                  }
2419
42
                                  auto rs_meta = rowsets.find(rowset_id);
2420
42
                                  if (rs_meta != rowsets.end() &&
2421
42
                                      !deleted_rowset_id.contains(rowset_id)) {
2422
42
                                      deleted_rowset_id.emplace(rowset_id);
2423
42
                                      metrics_context.total_recycled_data_size +=
2424
42
                                              rs_meta->second.total_disk_size();
2425
42
                                      segment_metrics_context_.total_recycled_num +=
2426
42
                                              rs_meta->second.num_segments();
2427
42
                                      segment_metrics_context_.total_recycled_data_size +=
2428
42
                                              rs_meta->second.total_disk_size();
2429
42
                                      metrics_context.total_recycled_num++;
2430
42
                                  }
2431
42
                              });
2432
42
                segment_metrics_context_.report();
2433
42
                metrics_context.report();
2434
42
            }
2435
42
            return ret;
2436
42
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
2437
42
    }
2438
45
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
2439
5
        LOG_INFO(
2440
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
2441
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
2442
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
2443
5
        concurrent_delete_executor.add([&]() -> int {
2444
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
2445
5
            if (!ret) {
2446
5
                auto rs = rowsets.at(rowset_id);
2447
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
2448
5
                metrics_context.total_recycled_num++;
2449
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
2450
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
2451
5
                metrics_context.report();
2452
5
                segment_metrics_context_.report();
2453
5
            }
2454
5
            return ret;
2455
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
2443
5
        concurrent_delete_executor.add([&]() -> int {
2444
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
2445
5
            if (!ret) {
2446
5
                auto rs = rowsets.at(rowset_id);
2447
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
2448
5
                metrics_context.total_recycled_num++;
2449
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
2450
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
2451
5
                metrics_context.report();
2452
5
                segment_metrics_context_.report();
2453
5
            }
2454
5
            return ret;
2455
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
2456
5
    }
2457
2458
45
    bool finished = true;
2459
45
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2460
47
    for (int r : rets) {
2461
47
        if (r != 0) {
2462
0
            ret = -1;
2463
0
            break;
2464
0
        }
2465
47
    }
2466
45
    ret = finished ? ret : -1;
2467
45
    return ret;
2468
45
}
2469
2470
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
2471
3.10k
                                         const std::string& rowset_id) {
2472
3.10k
    auto it = accessor_map_.find(resource_id);
2473
3.10k
    if (it == accessor_map_.end()) {
2474
200
        LOG_WARNING("instance has no such resource id")
2475
200
                .tag("instance_id", instance_id_)
2476
200
                .tag("resource_id", resource_id)
2477
200
                .tag("tablet_id", tablet_id)
2478
200
                .tag("rowset_id", rowset_id);
2479
200
        return -1;
2480
200
    }
2481
2.90k
    auto& accessor = it->second;
2482
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
2483
3.10k
}
2484
2485
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
2486
                                                  RecyclerMetricsContext& metrics_context,
2487
0
                                                  int64_t partition_id, bool is_empty_tablet) {
2488
0
    std::string tablet_key_begin, tablet_key_end;
2489
2490
0
    if (partition_id > 0) {
2491
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2492
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2493
0
    } else {
2494
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2495
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2496
0
    }
2497
    // for calculate the total num or bytes of recyled objects
2498
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
2499
0
                                                          std::string_view v) -> int {
2500
0
        doris::TabletMetaCloudPB tablet_meta_pb;
2501
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2502
0
            return 0;
2503
0
        }
2504
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2505
2506
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2507
0
            return 0;
2508
0
        }
2509
2510
0
        if (!is_empty_tablet) {
2511
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
2512
0
                return 0;
2513
0
            }
2514
0
            tablet_metrics_context_.total_need_recycle_num++;
2515
0
        }
2516
0
        return 0;
2517
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
2518
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
2519
0
    metrics_context.report(true);
2520
0
    tablet_metrics_context_.report(true);
2521
0
    segment_metrics_context_.report(true);
2522
0
    return ret;
2523
0
}
2524
2525
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
2526
0
                                                 RecyclerMetricsContext& metrics_context) {
2527
0
    int ret = 0;
2528
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
2529
0
    std::unique_ptr<Transaction> txn;
2530
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2531
0
        LOG_WARNING("failed to recycle tablet ")
2532
0
                .tag("tablet id", tablet_id)
2533
0
                .tag("instance_id", instance_id_)
2534
0
                .tag("reason", "failed to create txn");
2535
0
        ret = -1;
2536
0
    }
2537
0
    GetRowsetResponse resp;
2538
0
    std::string msg;
2539
0
    MetaServiceCode code = MetaServiceCode::OK;
2540
    // get rowsets in tablet
2541
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2542
0
                        tablet_id, code, msg, &resp);
2543
0
    if (code != MetaServiceCode::OK) {
2544
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2545
0
                .tag("tablet id", tablet_id)
2546
0
                .tag("msg", msg)
2547
0
                .tag("code", code)
2548
0
                .tag("instance id", instance_id_);
2549
0
        ret = -1;
2550
0
    }
2551
0
    for (const auto& rs_meta : resp.rowset_meta()) {
2552
        /*
2553
        * For compatibility, we skip the loop for [0-1] here.
2554
        * The purpose of this loop is to delete object files,
2555
        * and since [0-1] only has meta and doesn't have object files,
2556
        * skipping it doesn't affect system correctness.
2557
        *
2558
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
2559
        * would return error -1 directly, causing the recycle operation to fail.
2560
        *
2561
        * [0-1] doesn't have resource id is a bug.
2562
        * In the future, we will fix this problem, after that,
2563
        * we can remove this if statement.
2564
        *
2565
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
2566
        */
2567
2568
0
        if (rs_meta.end_version() == 1) {
2569
            // Assert that [0-1] has no resource_id to make sure
2570
            // this if statement will not be forgetted to remove
2571
            // when the resource id bug is fixed
2572
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2573
0
            continue;
2574
0
        }
2575
0
        if (!rs_meta.has_resource_id()) {
2576
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2577
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2578
0
                    .tag("instance_id", instance_id_)
2579
0
                    .tag("tablet_id", tablet_id);
2580
0
            continue;
2581
0
        }
2582
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2583
0
        auto it = accessor_map_.find(rs_meta.resource_id());
2584
        // possible if the accessor is not initilized correctly
2585
0
        if (it == accessor_map_.end()) [[unlikely]] {
2586
0
            LOG_WARNING(
2587
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2588
0
                    "recycle process")
2589
0
                    .tag("tablet id", tablet_id)
2590
0
                    .tag("instance_id", instance_id_)
2591
0
                    .tag("resource_id", rs_meta.resource_id())
2592
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2593
0
            continue;
2594
0
        }
2595
2596
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
2597
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2598
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2599
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
2600
0
    }
2601
0
    return ret;
2602
0
}
2603
2604
4.24k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
2605
4.24k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
2606
4.24k
            .tag("instance_id", instance_id_)
2607
4.24k
            .tag("tablet_id", tablet_id);
2608
2609
4.24k
    if (should_recycle_versioned_keys()) {
2610
11
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
2611
11
        if (ret != 0) {
2612
0
            return ret;
2613
0
        }
2614
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
2615
        // during the recycle_versioned_tablet process.
2616
        //
2617
        // .. And remove restore job rowsets of this tablet too
2618
11
    }
2619
2620
4.24k
    int ret = 0;
2621
4.24k
    auto start_time = steady_clock::now();
2622
2623
4.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
2624
2625
    // collect resource ids
2626
245
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
2627
245
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
2628
245
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
2629
245
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
2630
245
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
2631
245
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
2632
2633
245
    std::set<std::string> resource_ids;
2634
245
    int64_t recycle_rowsets_number = 0;
2635
245
    int64_t recycle_segments_number = 0;
2636
245
    int64_t recycle_rowsets_data_size = 0;
2637
245
    int64_t recycle_rowsets_index_size = 0;
2638
245
    int64_t recycle_restore_job_rowsets_number = 0;
2639
245
    int64_t recycle_restore_job_segments_number = 0;
2640
245
    int64_t recycle_restore_job_rowsets_data_size = 0;
2641
245
    int64_t recycle_restore_job_rowsets_index_size = 0;
2642
245
    int64_t max_rowset_version = 0;
2643
245
    int64_t min_rowset_creation_time = INT64_MAX;
2644
245
    int64_t max_rowset_creation_time = 0;
2645
245
    int64_t min_rowset_expiration_time = INT64_MAX;
2646
245
    int64_t max_rowset_expiration_time = 0;
2647
2648
245
    DORIS_CLOUD_DEFER {
2649
245
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2650
245
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2651
245
                .tag("instance_id", instance_id_)
2652
245
                .tag("tablet_id", tablet_id)
2653
245
                .tag("recycle rowsets number", recycle_rowsets_number)
2654
245
                .tag("recycle segments number", recycle_segments_number)
2655
245
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2656
245
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2657
245
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
2658
245
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
2659
245
                .tag("all restore job rowsets recycle data size",
2660
245
                     recycle_restore_job_rowsets_data_size)
2661
245
                .tag("all restore job rowsets recycle index size",
2662
245
                     recycle_restore_job_rowsets_index_size)
2663
245
                .tag("max rowset version", max_rowset_version)
2664
245
                .tag("min rowset creation time", min_rowset_creation_time)
2665
245
                .tag("max rowset creation time", max_rowset_creation_time)
2666
245
                .tag("min rowset expiration time", min_rowset_expiration_time)
2667
245
                .tag("max rowset expiration time", max_rowset_expiration_time)
2668
245
                .tag("task type", metrics_context.operation_type)
2669
245
                .tag("ret", ret);
2670
245
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2648
245
    DORIS_CLOUD_DEFER {
2649
245
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2650
245
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2651
245
                .tag("instance_id", instance_id_)
2652
245
                .tag("tablet_id", tablet_id)
2653
245
                .tag("recycle rowsets number", recycle_rowsets_number)
2654
245
                .tag("recycle segments number", recycle_segments_number)
2655
245
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2656
245
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2657
245
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
2658
245
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
2659
245
                .tag("all restore job rowsets recycle data size",
2660
245
                     recycle_restore_job_rowsets_data_size)
2661
245
                .tag("all restore job rowsets recycle index size",
2662
245
                     recycle_restore_job_rowsets_index_size)
2663
245
                .tag("max rowset version", max_rowset_version)
2664
245
                .tag("min rowset creation time", min_rowset_creation_time)
2665
245
                .tag("max rowset creation time", max_rowset_creation_time)
2666
245
                .tag("min rowset expiration time", min_rowset_expiration_time)
2667
245
                .tag("max rowset expiration time", max_rowset_expiration_time)
2668
245
                .tag("task type", metrics_context.operation_type)
2669
245
                .tag("ret", ret);
2670
245
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
2671
2672
245
    std::unique_ptr<Transaction> txn;
2673
245
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2674
0
        LOG_WARNING("failed to recycle tablet ")
2675
0
                .tag("tablet id", tablet_id)
2676
0
                .tag("instance_id", instance_id_)
2677
0
                .tag("reason", "failed to create txn");
2678
0
        ret = -1;
2679
0
    }
2680
245
    GetRowsetResponse resp;
2681
245
    std::string msg;
2682
245
    MetaServiceCode code = MetaServiceCode::OK;
2683
    // get rowsets in tablet
2684
245
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2685
245
                        tablet_id, code, msg, &resp);
2686
245
    if (code != MetaServiceCode::OK) {
2687
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2688
0
                .tag("tablet id", tablet_id)
2689
0
                .tag("msg", msg)
2690
0
                .tag("code", code)
2691
0
                .tag("instance id", instance_id_);
2692
0
        ret = -1;
2693
0
    }
2694
245
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
2695
2696
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
2697
        // The rowset has no resource id and segments when it was generated by compaction
2698
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
2699
2.50k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
2700
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
2701
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2702
0
                    .tag("instance_id", instance_id_)
2703
0
                    .tag("tablet_id", tablet_id);
2704
0
            recycle_rowsets_number += 1;
2705
0
            continue;
2706
0
        }
2707
2.50k
        if (!rs_meta.has_resource_id()) {
2708
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2709
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
2710
1
                    .tag("instance_id", instance_id_)
2711
1
                    .tag("tablet_id", tablet_id);
2712
1
            return -1;
2713
1
        }
2714
2.50k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2715
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
2716
        // possible if the accessor is not initilized correctly
2717
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
2718
1
            LOG_WARNING(
2719
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2720
1
                    "recycle process")
2721
1
                    .tag("tablet id", tablet_id)
2722
1
                    .tag("instance_id", instance_id_)
2723
1
                    .tag("resource_id", rs_meta.resource_id())
2724
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2725
1
            return -1;
2726
1
        }
2727
2.50k
        recycle_rowsets_number += 1;
2728
2.50k
        recycle_segments_number += rs_meta.num_segments();
2729
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2730
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2731
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2732
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2733
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2734
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2735
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2736
2.50k
        resource_ids.emplace(rs_meta.resource_id());
2737
2.50k
    }
2738
2739
    // get restore job rowset in tablet
2740
243
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
2741
243
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
2742
243
    if (code != MetaServiceCode::OK) {
2743
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
2744
0
                .tag("tablet id", tablet_id)
2745
0
                .tag("msg", msg)
2746
0
                .tag("code", code)
2747
0
                .tag("instance id", instance_id_);
2748
0
        return -1;
2749
0
    }
2750
2751
243
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
2752
0
        if (!rs_meta.has_resource_id()) {
2753
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2754
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2755
0
                    .tag("instance_id", instance_id_)
2756
0
                    .tag("tablet_id", tablet_id);
2757
0
            return -1;
2758
0
        }
2759
2760
0
        auto it = accessor_map_.find(rs_meta.resource_id());
2761
        // possible if the accessor is not initilized correctly
2762
0
        if (it == accessor_map_.end()) [[unlikely]] {
2763
0
            LOG_WARNING(
2764
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2765
0
                    "recycle process")
2766
0
                    .tag("tablet id", tablet_id)
2767
0
                    .tag("instance_id", instance_id_)
2768
0
                    .tag("resource_id", rs_meta.resource_id())
2769
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2770
0
            return -1;
2771
0
        }
2772
0
        recycle_restore_job_rowsets_number += 1;
2773
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
2774
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
2775
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
2776
0
        resource_ids.emplace(rs_meta.resource_id());
2777
0
    }
2778
2779
243
    LOG_INFO("recycle tablet start to delete object")
2780
243
            .tag("instance id", instance_id_)
2781
243
            .tag("tablet id", tablet_id)
2782
243
            .tag("recycle tablet resource ids are",
2783
243
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
2784
243
                                 [](std::string rs_id, const auto& it) {
2785
203
                                     return rs_id.empty() ? it : rs_id + ", " + it;
2786
203
                                 }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
2784
203
                                 [](std::string rs_id, const auto& it) {
2785
203
                                     return rs_id.empty() ? it : rs_id + ", " + it;
2786
203
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
2787
2788
243
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
2789
243
            _thread_pool_group.s3_producer_pool,
2790
243
            fmt::format("delete tablet {} s3 rowset", tablet_id),
2791
243
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
2791
203
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
2792
2793
    // delete all rowset data in this tablet
2794
    // ATTN: there may be data leak if not all accessor initilized successfully
2795
    //       partial data deleted if the tablet is stored cross-storage vault
2796
    //       vault id is not attached to TabletMeta...
2797
243
    for (const auto& resource_id : resource_ids) {
2798
203
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
2799
203
        concurrent_delete_executor.add(
2800
203
                [&, rs_id = resource_id,
2801
203
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
2802
203
                    std::unique_ptr<int, std::function<void(int*)>> defer(
2803
203
                            (int*)0x01, [&](int*) { metrics_context.report(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
Line
Count
Source
2803
203
                            (int*)0x01, [&](int*) { metrics_context.report(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
2804
203
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2805
203
                    if (res != 0) {
2806
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2807
1
                                     << " path=" << accessor_ptr->uri()
2808
1
                                     << " task type=" << metrics_context.operation_type;
2809
1
                        return std::make_pair(-1, rs_id);
2810
1
                    }
2811
202
                    return std::make_pair(0, rs_id);
2812
203
                });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
2801
203
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
2802
203
                    std::unique_ptr<int, std::function<void(int*)>> defer(
2803
203
                            (int*)0x01, [&](int*) { metrics_context.report(); });
2804
203
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2805
203
                    if (res != 0) {
2806
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2807
1
                                     << " path=" << accessor_ptr->uri()
2808
1
                                     << " task type=" << metrics_context.operation_type;
2809
1
                        return std::make_pair(-1, rs_id);
2810
1
                    }
2811
202
                    return std::make_pair(0, rs_id);
2812
203
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
2813
203
    }
2814
2815
243
    bool finished = true;
2816
243
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
2817
243
    for (auto& r : rets) {
2818
203
        if (r.first != 0) {
2819
1
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
2820
1
            ret = -1;
2821
1
        }
2822
203
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
2823
203
    }
2824
243
    ret = finished ? ret : -1;
2825
2826
243
    if (ret != 0) { // failed recycle tablet data
2827
1
        LOG_WARNING("ret!=0")
2828
1
                .tag("finished", finished)
2829
1
                .tag("ret", ret)
2830
1
                .tag("instance_id", instance_id_)
2831
1
                .tag("tablet_id", tablet_id);
2832
1
        return ret;
2833
1
    }
2834
2835
242
    tablet_metrics_context_.total_recycled_data_size +=
2836
242
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2837
242
    tablet_metrics_context_.total_recycled_num += 1;
2838
242
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
2839
242
    segment_metrics_context_.total_recycled_data_size +=
2840
242
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2841
242
    metrics_context.total_recycled_data_size +=
2842
242
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2843
242
    tablet_metrics_context_.report();
2844
242
    segment_metrics_context_.report();
2845
242
    metrics_context.report();
2846
2847
242
    txn.reset();
2848
242
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2849
0
        LOG_WARNING("failed to recycle tablet ")
2850
0
                .tag("tablet id", tablet_id)
2851
0
                .tag("instance_id", instance_id_)
2852
0
                .tag("reason", "failed to create txn");
2853
0
        ret = -1;
2854
0
    }
2855
    // delete all rowset kv in this tablet
2856
242
    txn->remove(rs_key0, rs_key1);
2857
242
    txn->remove(recyc_rs_key0, recyc_rs_key1);
2858
242
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
2859
2860
    // remove delete bitmap for MoW table
2861
242
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
2862
242
    txn->remove(pending_key);
2863
242
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
2864
242
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
2865
242
    txn->remove(delete_bitmap_start, delete_bitmap_end);
2866
2867
242
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
2868
242
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
2869
242
    txn->remove(dbm_start_key, dbm_end_key);
2870
242
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
2871
242
              << " end=" << hex(dbm_end_key);
2872
2873
242
    TxnErrorCode err = txn->commit();
2874
242
    if (err != TxnErrorCode::TXN_OK) {
2875
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
2876
0
        ret = -1;
2877
0
    }
2878
2879
242
    if (ret == 0) {
2880
        // All object files under tablet have been deleted
2881
242
        std::lock_guard lock(recycled_tablets_mtx_);
2882
242
        recycled_tablets_.insert(tablet_id);
2883
242
    }
2884
2885
242
    return ret;
2886
243
}
2887
2888
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
2889
11
                                               RecyclerMetricsContext& metrics_context) {
2890
11
    int ret = 0;
2891
11
    auto start_time = steady_clock::now();
2892
2893
11
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
2894
2895
    // collect resource ids
2896
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
2897
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
2898
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
2899
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
2900
2901
11
    int64_t recycle_rowsets_number = 0;
2902
11
    int64_t recycle_segments_number = 0;
2903
11
    int64_t recycle_rowsets_data_size = 0;
2904
11
    int64_t recycle_rowsets_index_size = 0;
2905
11
    int64_t max_rowset_version = 0;
2906
11
    int64_t min_rowset_creation_time = INT64_MAX;
2907
11
    int64_t max_rowset_creation_time = 0;
2908
11
    int64_t min_rowset_expiration_time = INT64_MAX;
2909
11
    int64_t max_rowset_expiration_time = 0;
2910
2911
11
    DORIS_CLOUD_DEFER {
2912
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2913
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2914
11
                .tag("instance_id", instance_id_)
2915
11
                .tag("tablet_id", tablet_id)
2916
11
                .tag("recycle rowsets number", recycle_rowsets_number)
2917
11
                .tag("recycle segments number", recycle_segments_number)
2918
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2919
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2920
11
                .tag("max rowset version", max_rowset_version)
2921
11
                .tag("min rowset creation time", min_rowset_creation_time)
2922
11
                .tag("max rowset creation time", max_rowset_creation_time)
2923
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
2924
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
2925
11
                .tag("ret", ret);
2926
11
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2911
11
    DORIS_CLOUD_DEFER {
2912
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2913
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2914
11
                .tag("instance_id", instance_id_)
2915
11
                .tag("tablet_id", tablet_id)
2916
11
                .tag("recycle rowsets number", recycle_rowsets_number)
2917
11
                .tag("recycle segments number", recycle_segments_number)
2918
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2919
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2920
11
                .tag("max rowset version", max_rowset_version)
2921
11
                .tag("min rowset creation time", min_rowset_creation_time)
2922
11
                .tag("max rowset creation time", max_rowset_creation_time)
2923
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
2924
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
2925
11
                .tag("ret", ret);
2926
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
2927
2928
11
    std::unique_ptr<Transaction> txn;
2929
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2930
0
        LOG_WARNING("failed to recycle tablet ")
2931
0
                .tag("tablet id", tablet_id)
2932
0
                .tag("instance_id", instance_id_)
2933
0
                .tag("reason", "failed to create txn");
2934
0
        ret = -1;
2935
0
    }
2936
2937
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
2938
    // by the related operation logs.
2939
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
2940
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
2941
11
    MetaReader meta_reader(instance_id_);
2942
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
2943
11
    if (err == TxnErrorCode::TXN_OK) {
2944
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
2945
11
    }
2946
11
    if (err != TxnErrorCode::TXN_OK) {
2947
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2948
0
                .tag("tablet id", tablet_id)
2949
0
                .tag("err", err)
2950
0
                .tag("instance id", instance_id_);
2951
0
        ret = -1;
2952
0
    }
2953
2954
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
2955
11
             load_rowset_metas.size(), compact_rowset_metas.size())
2956
11
            .tag("instance_id", instance_id_)
2957
11
            .tag("tablet_id", tablet_id);
2958
2959
11
    SyncExecutor<int> concurrent_delete_executor(
2960
11
            _thread_pool_group.s3_producer_pool,
2961
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
2962
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
2963
2964
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
2965
60
        recycle_rowsets_number += 1;
2966
60
        recycle_segments_number += rs_meta.num_segments();
2967
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2968
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2969
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2970
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2971
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2972
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2973
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2974
60
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
2964
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
2965
60
        recycle_rowsets_number += 1;
2966
60
        recycle_segments_number += rs_meta.num_segments();
2967
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2968
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2969
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2970
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2971
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2972
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2973
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2974
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
2975
2976
11
    std::vector<RowsetDeleteTask> all_tasks;
2977
2978
11
    auto create_delete_task = [this](const RowsetMetaCloudPB& rs_meta, std::string_view recycle_key,
2979
11
                                     std::string_view non_versioned_rowset_key =
2980
60
                                             "") -> RowsetDeleteTask {
2981
60
        RowsetDeleteTask task;
2982
60
        task.rowset_meta = rs_meta;
2983
60
        task.recycle_rowset_key = std::string(recycle_key);
2984
60
        task.non_versioned_rowset_key = std::string(non_versioned_rowset_key);
2985
60
        task.versioned_rowset_key = versioned::meta_rowset_key(
2986
60
                {instance_id_, rs_meta.tablet_id(), rs_meta.rowset_id_v2()});
2987
60
        return task;
2988
60
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clERKNS_17RowsetMetaCloudPBESt17basic_string_viewIcSt11char_traitsIcEESB_
Line
Count
Source
2980
60
                                             "") -> RowsetDeleteTask {
2981
60
        RowsetDeleteTask task;
2982
60
        task.rowset_meta = rs_meta;
2983
60
        task.recycle_rowset_key = std::string(recycle_key);
2984
60
        task.non_versioned_rowset_key = std::string(non_versioned_rowset_key);
2985
60
        task.versioned_rowset_key = versioned::meta_rowset_key(
2986
60
                {instance_id_, rs_meta.tablet_id(), rs_meta.rowset_id_v2()});
2987
60
        return task;
2988
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clERKNS_17RowsetMetaCloudPBESt17basic_string_viewIcSt11char_traitsIcEESB_
2989
2990
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
2991
60
        update_rowset_stats(rs_meta);
2992
        // Version 0-1 rowset has no resource_id and no actual data files,
2993
        // but still needs ref_count key cleanup, so we add it to all_tasks.
2994
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
2995
60
        std::string rowset_load_key =
2996
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
2997
60
        std::string rowset_key = meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
2998
60
        RowsetDeleteTask task = create_delete_task(
2999
60
                rs_meta, encode_versioned_key(rowset_load_key, versionstamp), rowset_key);
3000
60
        all_tasks.push_back(std::move(task));
3001
60
    }
3002
3003
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
3004
0
        update_rowset_stats(rs_meta);
3005
        // Version 0-1 rowset has no resource_id and no actual data files,
3006
        // but still needs ref_count key cleanup, so we add it to all_tasks.
3007
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
3008
0
        std::string rowset_compact_key = versioned::meta_rowset_compact_key(
3009
0
                {instance_id_, tablet_id, rs_meta.end_version()});
3010
0
        std::string rowset_key = meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
3011
0
        RowsetDeleteTask task = create_delete_task(
3012
0
                rs_meta, encode_versioned_key(rowset_compact_key, versionstamp), rowset_key);
3013
0
        all_tasks.push_back(std::move(task));
3014
0
    }
3015
3016
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
3017
0
        RecycleRowsetPB recycle_rowset;
3018
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
3019
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3020
0
            return -1;
3021
0
        }
3022
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
3023
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
3024
                // in old version, keep this key-value pair and it needs to be checked manually
3025
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3026
0
                return -1;
3027
0
            }
3028
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
3029
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3030
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3031
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
3032
0
                return -1;
3033
0
            }
3034
            // decode rowset_id
3035
0
            auto k1 = k;
3036
0
            k1.remove_prefix(1);
3037
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3038
0
            decode_key(&k1, &out);
3039
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3040
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3041
0
            LOG_INFO("delete old-version rowset data")
3042
0
                    .tag("instance_id", instance_id_)
3043
0
                    .tag("tablet_id", tablet_id)
3044
0
                    .tag("rowset_id", rowset_id);
3045
3046
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
3047
            // so we must use prefix deletion directly instead of batch delete.
3048
0
            concurrent_delete_executor.add(
3049
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
3050
                        // delete by prefix, the recycle rowset key will be deleted by range later.
3051
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
3052
0
                    });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
3053
0
        } else {
3054
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
3055
            // Version 0-1 rowset has no resource_id and no actual data files,
3056
            // but still needs ref_count key cleanup, so we add it to all_tasks.
3057
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
3058
0
            RowsetDeleteTask task = create_delete_task(rowset_meta, k);
3059
0
            all_tasks.push_back(std::move(task));
3060
0
        }
3061
0
        return 0;
3062
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_
3063
3064
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
3065
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
3066
0
                .tag("tablet id", tablet_id)
3067
0
                .tag("instance_id", instance_id_)
3068
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
3069
0
        ret = -1;
3070
0
    }
3071
3072
    // Phase 1: Classify tasks by ref_count
3073
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
3074
60
    for (auto& task : all_tasks) {
3075
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
3076
60
        if (classify_ret < 0) {
3077
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
3078
0
                    .tag("instance_id", instance_id_)
3079
0
                    .tag("tablet_id", tablet_id)
3080
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
3081
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
3082
0
                return recycle_rowset_meta_and_data(t.recycle_rowset_key, t.rowset_meta,
3083
0
                                                    t.non_versioned_rowset_key);
3084
0
            });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_5clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_5clEv
3085
0
        }
3086
60
    }
3087
3088
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
3089
3090
11
    LOG_INFO("batch delete plan created")
3091
11
            .tag("instance_id", instance_id_)
3092
11
            .tag("tablet_id", tablet_id)
3093
11
            .tag("plan_count", batch_delete_tasks.size());
3094
3095
    // Phase 2: Execute batch delete using existing delete_rowset_data
3096
11
    if (!batch_delete_tasks.empty()) {
3097
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
3098
49
        for (const auto& task : batch_delete_tasks) {
3099
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
3100
49
            if (task.rowset_meta.resource_id().empty()) {
3101
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
3102
10
                        .tag("instance_id", instance_id_)
3103
10
                        .tag("tablet_id", tablet_id)
3104
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
3105
10
                continue;
3106
10
            }
3107
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
3108
39
        }
3109
3110
        // Only call delete_rowset_data if there are rowsets with actual data to delete
3111
10
        bool delete_success = true;
3112
10
        if (!rowsets_to_delete.empty()) {
3113
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
3114
9
                                                         "batch_delete_versioned_tablet");
3115
9
            int delete_ret = delete_rowset_data(
3116
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
3117
9
            if (delete_ret != 0) {
3118
0
                LOG_WARNING("batch delete execution failed")
3119
0
                        .tag("instance_id", instance_id_)
3120
0
                        .tag("tablet_id", tablet_id);
3121
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
3122
0
                ret = -1;
3123
0
                delete_success = false;
3124
0
            }
3125
9
        }
3126
3127
        // Phase 3: Only cleanup metadata if data deletion succeeded.
3128
        // If deletion failed, keep recycle_rowset_key so next round will retry.
3129
10
        if (delete_success) {
3130
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
3131
10
            if (cleanup_ret != 0) {
3132
0
                LOG_WARNING("batch delete cleanup failed")
3133
0
                        .tag("instance_id", instance_id_)
3134
0
                        .tag("tablet_id", tablet_id);
3135
0
                ret = -1;
3136
0
            }
3137
10
        }
3138
10
    }
3139
3140
    // Always wait for fallback tasks to complete before returning
3141
11
    bool finished = true;
3142
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3143
11
    for (int r : rets) {
3144
0
        if (r != 0) {
3145
0
            ret = -1;
3146
0
        }
3147
0
    }
3148
3149
11
    ret = finished ? ret : -1;
3150
3151
11
    if (ret != 0) { // failed recycle tablet data
3152
0
        LOG_WARNING("recycle versioned tablet failed")
3153
0
                .tag("finished", finished)
3154
0
                .tag("ret", ret)
3155
0
                .tag("instance_id", instance_id_)
3156
0
                .tag("tablet_id", tablet_id);
3157
0
        return ret;
3158
0
    }
3159
3160
11
    tablet_metrics_context_.total_recycled_data_size +=
3161
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3162
11
    tablet_metrics_context_.total_recycled_num += 1;
3163
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
3164
11
    segment_metrics_context_.total_recycled_data_size +=
3165
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3166
11
    metrics_context.total_recycled_data_size +=
3167
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3168
11
    tablet_metrics_context_.report();
3169
11
    segment_metrics_context_.report();
3170
11
    metrics_context.report();
3171
3172
11
    txn.reset();
3173
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3174
0
        LOG_WARNING("failed to recycle tablet ")
3175
0
                .tag("tablet id", tablet_id)
3176
0
                .tag("instance_id", instance_id_)
3177
0
                .tag("reason", "failed to create txn");
3178
0
        ret = -1;
3179
0
    }
3180
    // delete all rowset kv in this tablet
3181
11
    txn->remove(rs_key0, rs_key1);
3182
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
3183
3184
    // remove delete bitmap for MoW table
3185
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
3186
11
    txn->remove(pending_key);
3187
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
3188
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
3189
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
3190
3191
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
3192
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
3193
11
    txn->remove(dbm_start_key, dbm_end_key);
3194
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
3195
11
              << " end=" << hex(dbm_end_key);
3196
3197
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
3198
11
    std::string tablet_index_val;
3199
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
3200
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
3201
0
        LOG_WARNING("failed to get tablet index kv")
3202
0
                .tag("instance_id", instance_id_)
3203
0
                .tag("tablet_id", tablet_id)
3204
0
                .tag("err", err);
3205
0
        ret = -1;
3206
11
    } else if (err == TxnErrorCode::TXN_OK) {
3207
        // If the tablet index kv exists, we need to delete it
3208
10
        TabletIndexPB tablet_index_pb;
3209
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
3210
0
            LOG_WARNING("failed to parse tablet index pb")
3211
0
                    .tag("instance_id", instance_id_)
3212
0
                    .tag("tablet_id", tablet_id);
3213
0
            ret = -1;
3214
10
        } else {
3215
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
3216
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
3217
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
3218
10
            txn->remove(versioned_inverted_idx_key);
3219
10
            txn->remove(versioned_idx_key);
3220
10
        }
3221
10
    }
3222
3223
11
    err = txn->commit();
3224
11
    if (err != TxnErrorCode::TXN_OK) {
3225
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
3226
0
        ret = -1;
3227
0
    }
3228
3229
11
    if (ret == 0) {
3230
        // All object files under tablet have been deleted
3231
11
        std::lock_guard lock(recycled_tablets_mtx_);
3232
11
        recycled_tablets_.insert(tablet_id);
3233
11
    }
3234
3235
11
    return ret;
3236
11
}
3237
3238
18
int InstanceRecycler::recycle_rowsets() {
3239
18
    if (should_recycle_versioned_keys()) {
3240
5
        return recycle_versioned_rowsets();
3241
5
    }
3242
3243
13
    const std::string task_name = "recycle_rowsets";
3244
13
    int64_t num_scanned = 0;
3245
13
    int64_t num_expired = 0;
3246
13
    int64_t num_prepare = 0;
3247
13
    int64_t num_compacted = 0;
3248
13
    int64_t num_empty_rowset = 0;
3249
13
    size_t total_rowset_key_size = 0;
3250
13
    size_t total_rowset_value_size = 0;
3251
13
    size_t expired_rowset_size = 0;
3252
13
    std::atomic_long num_recycled = 0;
3253
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3254
3255
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
3256
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
3257
13
    std::string recyc_rs_key0;
3258
13
    std::string recyc_rs_key1;
3259
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
3260
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
3261
3262
13
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
3263
3264
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3265
13
    register_recycle_task(task_name, start_time);
3266
3267
13
    DORIS_CLOUD_DEFER {
3268
13
        unregister_recycle_task(task_name);
3269
13
        int64_t cost =
3270
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3271
13
        metrics_context.finish_report();
3272
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3273
13
                .tag("instance_id", instance_id_)
3274
13
                .tag("num_scanned", num_scanned)
3275
13
                .tag("num_expired", num_expired)
3276
13
                .tag("num_recycled", num_recycled)
3277
13
                .tag("num_recycled.prepare", num_prepare)
3278
13
                .tag("num_recycled.compacted", num_compacted)
3279
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3280
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3281
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3282
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
3283
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
3267
13
    DORIS_CLOUD_DEFER {
3268
13
        unregister_recycle_task(task_name);
3269
13
        int64_t cost =
3270
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3271
13
        metrics_context.finish_report();
3272
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3273
13
                .tag("instance_id", instance_id_)
3274
13
                .tag("num_scanned", num_scanned)
3275
13
                .tag("num_expired", num_expired)
3276
13
                .tag("num_recycled", num_recycled)
3277
13
                .tag("num_recycled.prepare", num_prepare)
3278
13
                .tag("num_recycled.compacted", num_compacted)
3279
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3280
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3281
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3282
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
3283
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
3284
3285
13
    std::vector<std::string> rowset_keys;
3286
    // rowset_id -> rowset_meta
3287
    // store rowset id and meta for statistics rs size when delete
3288
13
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
3289
3290
    // Store keys of rowset recycled by background workers
3291
13
    std::mutex async_recycled_rowset_keys_mutex;
3292
13
    std::vector<std::string> async_recycled_rowset_keys;
3293
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
3294
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
3295
13
    worker_pool->start();
3296
    // TODO bacth delete
3297
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3298
4.00k
        std::string dbm_start_key =
3299
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3300
4.00k
        std::string dbm_end_key = dbm_start_key;
3301
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
3302
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
3303
4.00k
        if (ret != 0) {
3304
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
3305
0
                         << instance_id_;
3306
0
        }
3307
4.00k
        return ret;
3308
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3297
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3298
4.00k
        std::string dbm_start_key =
3299
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3300
4.00k
        std::string dbm_end_key = dbm_start_key;
3301
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
3302
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
3303
4.00k
        if (ret != 0) {
3304
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
3305
0
                         << instance_id_;
3306
0
        }
3307
4.00k
        return ret;
3308
4.00k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
3309
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
3310
900
                                            int64_t tablet_id, const std::string& rowset_id) {
3311
        // Try to delete rowset data in background thread
3312
900
        int ret = worker_pool->submit_with_timeout(
3313
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3314
800
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3315
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3316
0
                        return;
3317
0
                    }
3318
800
                    std::vector<std::string> keys;
3319
800
                    {
3320
800
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3321
800
                        async_recycled_rowset_keys.push_back(std::move(key));
3322
800
                        if (async_recycled_rowset_keys.size() > 100) {
3323
7
                            keys.swap(async_recycled_rowset_keys);
3324
7
                        }
3325
800
                    }
3326
800
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3327
800
                    if (keys.empty()) return;
3328
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3329
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3330
0
                                     << instance_id_;
3331
7
                    } else {
3332
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3333
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3334
7
                                           num_recycled, start_time);
3335
7
                    }
3336
7
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
3313
800
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3314
800
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3315
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3316
0
                        return;
3317
0
                    }
3318
800
                    std::vector<std::string> keys;
3319
800
                    {
3320
800
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3321
800
                        async_recycled_rowset_keys.push_back(std::move(key));
3322
800
                        if (async_recycled_rowset_keys.size() > 100) {
3323
7
                            keys.swap(async_recycled_rowset_keys);
3324
7
                        }
3325
800
                    }
3326
800
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3327
800
                    if (keys.empty()) return;
3328
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3329
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3330
0
                                     << instance_id_;
3331
7
                    } else {
3332
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3333
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3334
7
                                           num_recycled, start_time);
3335
7
                    }
3336
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
3337
900
                0);
3338
900
        if (ret == 0) return 0;
3339
        // Submit task failed, delete rowset data in current thread
3340
100
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3341
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3342
0
            return -1;
3343
0
        }
3344
100
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
3345
0
            return -1;
3346
0
        }
3347
100
        rowset_keys.push_back(std::move(key));
3348
100
        return 0;
3349
100
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
3310
900
                                            int64_t tablet_id, const std::string& rowset_id) {
3311
        // Try to delete rowset data in background thread
3312
900
        int ret = worker_pool->submit_with_timeout(
3313
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3314
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3315
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3316
900
                        return;
3317
900
                    }
3318
900
                    std::vector<std::string> keys;
3319
900
                    {
3320
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3321
900
                        async_recycled_rowset_keys.push_back(std::move(key));
3322
900
                        if (async_recycled_rowset_keys.size() > 100) {
3323
900
                            keys.swap(async_recycled_rowset_keys);
3324
900
                        }
3325
900
                    }
3326
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3327
900
                    if (keys.empty()) return;
3328
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3329
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3330
900
                                     << instance_id_;
3331
900
                    } else {
3332
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3333
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3334
900
                                           num_recycled, start_time);
3335
900
                    }
3336
900
                },
3337
900
                0);
3338
900
        if (ret == 0) return 0;
3339
        // Submit task failed, delete rowset data in current thread
3340
100
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3341
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3342
0
            return -1;
3343
0
        }
3344
100
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
3345
0
            return -1;
3346
0
        }
3347
100
        rowset_keys.push_back(std::move(key));
3348
100
        return 0;
3349
100
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
3350
3351
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3352
3353
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
3354
4.00k
        ++num_scanned;
3355
4.00k
        total_rowset_key_size += k.size();
3356
4.00k
        total_rowset_value_size += v.size();
3357
4.00k
        RecycleRowsetPB rowset;
3358
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3359
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3360
0
            return -1;
3361
0
        }
3362
3363
4.00k
        int64_t current_time = ::time(nullptr);
3364
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3365
3366
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3367
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3368
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3369
4.00k
        if (current_time < expiration) { // not expired
3370
0
            return 0;
3371
0
        }
3372
4.00k
        ++num_expired;
3373
4.00k
        expired_rowset_size += v.size();
3374
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3375
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3376
                // in old version, keep this key-value pair and it needs to be checked manually
3377
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3378
0
                return -1;
3379
0
            }
3380
250
            if (rowset.resource_id().empty()) [[unlikely]] {
3381
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3382
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3383
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3384
0
                rowset_keys.emplace_back(k);
3385
0
                return -1;
3386
0
            }
3387
            // decode rowset_id
3388
250
            auto k1 = k;
3389
250
            k1.remove_prefix(1);
3390
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3391
250
            decode_key(&k1, &out);
3392
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3393
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3394
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3395
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
3396
250
                      << " task_type=" << metrics_context.operation_type;
3397
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3398
250
                                             rowset.tablet_id(), rowset_id) != 0) {
3399
0
                return -1;
3400
0
            }
3401
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
3402
250
            metrics_context.total_recycled_num++;
3403
250
            segment_metrics_context_.total_recycled_data_size +=
3404
250
                    rowset.rowset_meta().total_disk_size();
3405
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
3406
250
            segment_metrics_context_.report();
3407
250
            metrics_context.report();
3408
250
            return 0;
3409
250
        }
3410
        // TODO(plat1ko): check rowset not referenced
3411
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
3412
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3413
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3414
0
                LOG_INFO("recycle rowset that has empty resource id");
3415
0
            } else {
3416
                // other situations, keep this key-value pair and it needs to be checked manually
3417
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3418
0
                return -1;
3419
0
            }
3420
0
        }
3421
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3422
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
3423
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3424
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3425
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
3426
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3427
3.75k
                  << " rowset_meta_size=" << v.size()
3428
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
3429
3.75k
                  << " task_type=" << metrics_context.operation_type;
3430
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3431
            // unable to calculate file path, can only be deleted by rowset id prefix
3432
650
            num_prepare += 1;
3433
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3434
650
                                             rowset_meta->tablet_id(),
3435
650
                                             rowset_meta->rowset_id_v2()) != 0) {
3436
0
                return -1;
3437
0
            }
3438
3.10k
        } else {
3439
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
3440
3.10k
            rowset_keys.emplace_back(k);
3441
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
3442
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
3443
3.10k
                ++num_empty_rowset;
3444
3.10k
            }
3445
3.10k
        }
3446
3.75k
        return 0;
3447
3.75k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3353
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
3354
4.00k
        ++num_scanned;
3355
4.00k
        total_rowset_key_size += k.size();
3356
4.00k
        total_rowset_value_size += v.size();
3357
4.00k
        RecycleRowsetPB rowset;
3358
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3359
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3360
0
            return -1;
3361
0
        }
3362
3363
4.00k
        int64_t current_time = ::time(nullptr);
3364
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3365
3366
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3367
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3368
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3369
4.00k
        if (current_time < expiration) { // not expired
3370
0
            return 0;
3371
0
        }
3372
4.00k
        ++num_expired;
3373
4.00k
        expired_rowset_size += v.size();
3374
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3375
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3376
                // in old version, keep this key-value pair and it needs to be checked manually
3377
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3378
0
                return -1;
3379
0
            }
3380
250
            if (rowset.resource_id().empty()) [[unlikely]] {
3381
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3382
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3383
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3384
0
                rowset_keys.emplace_back(k);
3385
0
                return -1;
3386
0
            }
3387
            // decode rowset_id
3388
250
            auto k1 = k;
3389
250
            k1.remove_prefix(1);
3390
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3391
250
            decode_key(&k1, &out);
3392
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3393
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3394
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3395
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
3396
250
                      << " task_type=" << metrics_context.operation_type;
3397
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3398
250
                                             rowset.tablet_id(), rowset_id) != 0) {
3399
0
                return -1;
3400
0
            }
3401
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
3402
250
            metrics_context.total_recycled_num++;
3403
250
            segment_metrics_context_.total_recycled_data_size +=
3404
250
                    rowset.rowset_meta().total_disk_size();
3405
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
3406
250
            segment_metrics_context_.report();
3407
250
            metrics_context.report();
3408
250
            return 0;
3409
250
        }
3410
        // TODO(plat1ko): check rowset not referenced
3411
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
3412
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3413
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3414
0
                LOG_INFO("recycle rowset that has empty resource id");
3415
0
            } else {
3416
                // other situations, keep this key-value pair and it needs to be checked manually
3417
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3418
0
                return -1;
3419
0
            }
3420
0
        }
3421
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3422
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
3423
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3424
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3425
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
3426
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3427
3.75k
                  << " rowset_meta_size=" << v.size()
3428
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
3429
3.75k
                  << " task_type=" << metrics_context.operation_type;
3430
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3431
            // unable to calculate file path, can only be deleted by rowset id prefix
3432
650
            num_prepare += 1;
3433
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3434
650
                                             rowset_meta->tablet_id(),
3435
650
                                             rowset_meta->rowset_id_v2()) != 0) {
3436
0
                return -1;
3437
0
            }
3438
3.10k
        } else {
3439
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
3440
3.10k
            rowset_keys.emplace_back(k);
3441
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
3442
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
3443
3.10k
                ++num_empty_rowset;
3444
3.10k
            }
3445
3.10k
        }
3446
3.75k
        return 0;
3447
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3448
3449
21
    auto loop_done = [&]() -> int {
3450
21
        std::vector<std::string> rowset_keys_to_delete;
3451
        // rowset_id -> rowset_meta
3452
        // store rowset id and meta for statistics rs size when delete
3453
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
3454
21
        rowset_keys_to_delete.swap(rowset_keys);
3455
21
        rowsets_to_delete.swap(rowsets);
3456
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
3457
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
3458
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
3459
21
                                   metrics_context) != 0) {
3460
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
3461
0
                return;
3462
0
            }
3463
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
3464
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3465
0
                    return;
3466
0
                }
3467
3.10k
            }
3468
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
3469
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3470
0
                return;
3471
0
            }
3472
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
3473
21
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
3457
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
3458
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
3459
21
                                   metrics_context) != 0) {
3460
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
3461
0
                return;
3462
0
            }
3463
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
3464
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3465
0
                    return;
3466
0
                }
3467
3.10k
            }
3468
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
3469
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3470
0
                return;
3471
0
            }
3472
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
3473
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
3474
21
        return 0;
3475
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
3449
21
    auto loop_done = [&]() -> int {
3450
21
        std::vector<std::string> rowset_keys_to_delete;
3451
        // rowset_id -> rowset_meta
3452
        // store rowset id and meta for statistics rs size when delete
3453
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
3454
21
        rowset_keys_to_delete.swap(rowset_keys);
3455
21
        rowsets_to_delete.swap(rowsets);
3456
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
3457
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
3458
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
3459
21
                                   metrics_context) != 0) {
3460
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
3461
21
                return;
3462
21
            }
3463
21
            for (const auto& [_, rs] : rowsets_to_delete) {
3464
21
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3465
21
                    return;
3466
21
                }
3467
21
            }
3468
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
3469
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3470
21
                return;
3471
21
            }
3472
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
3473
21
        });
3474
21
        return 0;
3475
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
3476
3477
13
    if (config::enable_recycler_stats_metrics) {
3478
0
        scan_and_statistics_rowsets();
3479
0
    }
3480
    // recycle_func and loop_done for scan and recycle
3481
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
3482
13
                               std::move(loop_done));
3483
3484
13
    worker_pool->stop();
3485
3486
13
    if (!async_recycled_rowset_keys.empty()) {
3487
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
3488
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3489
0
            return -1;
3490
2
        } else {
3491
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
3492
2
        }
3493
2
    }
3494
13
    return ret;
3495
13
}
3496
3497
13
int InstanceRecycler::recycle_restore_jobs() {
3498
13
    const std::string task_name = "recycle_restore_jobs";
3499
13
    int64_t num_scanned = 0;
3500
13
    int64_t num_expired = 0;
3501
13
    int64_t num_recycled = 0;
3502
13
    int64_t num_aborted = 0;
3503
3504
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3505
3506
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
3507
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
3508
13
    std::string restore_job_key0;
3509
13
    std::string restore_job_key1;
3510
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
3511
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
3512
3513
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
3514
3515
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3516
13
    register_recycle_task(task_name, start_time);
3517
3518
13
    DORIS_CLOUD_DEFER {
3519
13
        unregister_recycle_task(task_name);
3520
13
        int64_t cost =
3521
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3522
13
        metrics_context.finish_report();
3523
3524
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
3525
13
                .tag("instance_id", instance_id_)
3526
13
                .tag("num_scanned", num_scanned)
3527
13
                .tag("num_expired", num_expired)
3528
13
                .tag("num_recycled", num_recycled)
3529
13
                .tag("num_aborted", num_aborted);
3530
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
3518
13
    DORIS_CLOUD_DEFER {
3519
13
        unregister_recycle_task(task_name);
3520
13
        int64_t cost =
3521
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3522
13
        metrics_context.finish_report();
3523
3524
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
3525
13
                .tag("instance_id", instance_id_)
3526
13
                .tag("num_scanned", num_scanned)
3527
13
                .tag("num_expired", num_expired)
3528
13
                .tag("num_recycled", num_recycled)
3529
13
                .tag("num_aborted", num_aborted);
3530
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
3531
3532
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3533
3534
13
    std::vector<std::string_view> restore_job_keys;
3535
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
3536
41
        ++num_scanned;
3537
41
        RestoreJobCloudPB restore_job_pb;
3538
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
3539
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
3540
0
            return -1;
3541
0
        }
3542
41
        int64_t expiration =
3543
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
3544
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
3545
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
3546
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
3547
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
3548
0
                   << " state=" << restore_job_pb.state();
3549
41
        int64_t current_time = ::time(nullptr);
3550
41
        if (current_time < expiration) { // not expired
3551
0
            return 0;
3552
0
        }
3553
41
        ++num_expired;
3554
3555
41
        int64_t tablet_id = restore_job_pb.tablet_id();
3556
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
3557
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
3558
3559
41
        std::unique_ptr<Transaction> txn;
3560
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3561
41
        if (err != TxnErrorCode::TXN_OK) {
3562
0
            LOG_WARNING("failed to recycle restore job")
3563
0
                    .tag("err", err)
3564
0
                    .tag("tablet id", tablet_id)
3565
0
                    .tag("instance_id", instance_id_)
3566
0
                    .tag("reason", "failed to create txn");
3567
0
            return -1;
3568
0
        }
3569
3570
41
        std::string val;
3571
41
        err = txn->get(k, &val);
3572
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
3573
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
3574
0
            return 0;
3575
0
        }
3576
41
        if (err != TxnErrorCode::TXN_OK) {
3577
0
            LOG_WARNING("failed to get kv");
3578
0
            return -1;
3579
0
        }
3580
41
        restore_job_pb.Clear();
3581
41
        if (!restore_job_pb.ParseFromString(val)) {
3582
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
3583
0
            return -1;
3584
0
        }
3585
3586
        // PREPARED or COMMITTED, change state to DROPPED and return
3587
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
3588
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
3589
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
3590
0
            restore_job_pb.set_need_recycle_data(true);
3591
0
            txn->put(k, restore_job_pb.SerializeAsString());
3592
0
            err = txn->commit();
3593
0
            if (err != TxnErrorCode::TXN_OK) {
3594
0
                LOG_WARNING("failed to commit txn: {}", err);
3595
0
                return -1;
3596
0
            }
3597
0
            num_aborted++;
3598
0
            return 0;
3599
0
        }
3600
3601
        // Change state to RECYCLING
3602
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
3603
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
3604
21
            txn->put(k, restore_job_pb.SerializeAsString());
3605
21
            err = txn->commit();
3606
21
            if (err != TxnErrorCode::TXN_OK) {
3607
0
                LOG_WARNING("failed to commit txn: {}", err);
3608
0
                return -1;
3609
0
            }
3610
21
            return 0;
3611
21
        }
3612
3613
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
3614
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
3615
3616
        // Recycle all data associated with the restore job.
3617
        // This includes rowsets, segments, and related resources.
3618
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
3619
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
3620
0
            LOG_WARNING("failed to recycle tablet")
3621
0
                    .tag("tablet_id", tablet_id)
3622
0
                    .tag("instance_id", instance_id_);
3623
0
            return -1;
3624
0
        }
3625
3626
        // delete all restore job rowset kv
3627
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
3628
3629
20
        err = txn->commit();
3630
20
        if (err != TxnErrorCode::TXN_OK) {
3631
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
3632
0
                    .tag("err", err)
3633
0
                    .tag("tablet id", tablet_id)
3634
0
                    .tag("instance_id", instance_id_)
3635
0
                    .tag("reason", "failed to commit txn");
3636
0
            return -1;
3637
0
        }
3638
3639
20
        metrics_context.total_recycled_num = ++num_recycled;
3640
20
        metrics_context.report();
3641
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3642
20
        restore_job_keys.push_back(k);
3643
3644
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
3645
20
                  << " tablet_id=" << tablet_id;
3646
20
        return 0;
3647
20
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3535
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
3536
41
        ++num_scanned;
3537
41
        RestoreJobCloudPB restore_job_pb;
3538
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
3539
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
3540
0
            return -1;
3541
0
        }
3542
41
        int64_t expiration =
3543
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
3544
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
3545
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
3546
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
3547
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
3548
0
                   << " state=" << restore_job_pb.state();
3549
41
        int64_t current_time = ::time(nullptr);
3550
41
        if (current_time < expiration) { // not expired
3551
0
            return 0;
3552
0
        }
3553
41
        ++num_expired;
3554
3555
41
        int64_t tablet_id = restore_job_pb.tablet_id();
3556
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
3557
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
3558
3559
41
        std::unique_ptr<Transaction> txn;
3560
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3561
41
        if (err != TxnErrorCode::TXN_OK) {
3562
0
            LOG_WARNING("failed to recycle restore job")
3563
0
                    .tag("err", err)
3564
0
                    .tag("tablet id", tablet_id)
3565
0
                    .tag("instance_id", instance_id_)
3566
0
                    .tag("reason", "failed to create txn");
3567
0
            return -1;
3568
0
        }
3569
3570
41
        std::string val;
3571
41
        err = txn->get(k, &val);
3572
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
3573
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
3574
0
            return 0;
3575
0
        }
3576
41
        if (err != TxnErrorCode::TXN_OK) {
3577
0
            LOG_WARNING("failed to get kv");
3578
0
            return -1;
3579
0
        }
3580
41
        restore_job_pb.Clear();
3581
41
        if (!restore_job_pb.ParseFromString(val)) {
3582
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
3583
0
            return -1;
3584
0
        }
3585
3586
        // PREPARED or COMMITTED, change state to DROPPED and return
3587
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
3588
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
3589
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
3590
0
            restore_job_pb.set_need_recycle_data(true);
3591
0
            txn->put(k, restore_job_pb.SerializeAsString());
3592
0
            err = txn->commit();
3593
0
            if (err != TxnErrorCode::TXN_OK) {
3594
0
                LOG_WARNING("failed to commit txn: {}", err);
3595
0
                return -1;
3596
0
            }
3597
0
            num_aborted++;
3598
0
            return 0;
3599
0
        }
3600
3601
        // Change state to RECYCLING
3602
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
3603
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
3604
21
            txn->put(k, restore_job_pb.SerializeAsString());
3605
21
            err = txn->commit();
3606
21
            if (err != TxnErrorCode::TXN_OK) {
3607
0
                LOG_WARNING("failed to commit txn: {}", err);
3608
0
                return -1;
3609
0
            }
3610
21
            return 0;
3611
21
        }
3612
3613
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
3614
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
3615
3616
        // Recycle all data associated with the restore job.
3617
        // This includes rowsets, segments, and related resources.
3618
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
3619
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
3620
0
            LOG_WARNING("failed to recycle tablet")
3621
0
                    .tag("tablet_id", tablet_id)
3622
0
                    .tag("instance_id", instance_id_);
3623
0
            return -1;
3624
0
        }
3625
3626
        // delete all restore job rowset kv
3627
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
3628
3629
20
        err = txn->commit();
3630
20
        if (err != TxnErrorCode::TXN_OK) {
3631
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
3632
0
                    .tag("err", err)
3633
0
                    .tag("tablet id", tablet_id)
3634
0
                    .tag("instance_id", instance_id_)
3635
0
                    .tag("reason", "failed to commit txn");
3636
0
            return -1;
3637
0
        }
3638
3639
20
        metrics_context.total_recycled_num = ++num_recycled;
3640
20
        metrics_context.report();
3641
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3642
20
        restore_job_keys.push_back(k);
3643
3644
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
3645
20
                  << " tablet_id=" << tablet_id;
3646
20
        return 0;
3647
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
3648
3649
13
    auto loop_done = [&restore_job_keys, this]() -> int {
3650
3
        if (restore_job_keys.empty()) return 0;
3651
1
        DORIS_CLOUD_DEFER {
3652
1
            restore_job_keys.clear();
3653
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3651
1
        DORIS_CLOUD_DEFER {
3652
1
            restore_job_keys.clear();
3653
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
3654
3655
1
        std::unique_ptr<Transaction> txn;
3656
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3657
1
        if (err != TxnErrorCode::TXN_OK) {
3658
0
            LOG_WARNING("failed to recycle restore job")
3659
0
                    .tag("err", err)
3660
0
                    .tag("instance_id", instance_id_)
3661
0
                    .tag("reason", "failed to create txn");
3662
0
            return -1;
3663
0
        }
3664
20
        for (auto& k : restore_job_keys) {
3665
20
            txn->remove(k);
3666
20
        }
3667
1
        err = txn->commit();
3668
1
        if (err != TxnErrorCode::TXN_OK) {
3669
0
            LOG_WARNING("failed to recycle restore job")
3670
0
                    .tag("err", err)
3671
0
                    .tag("instance_id", instance_id_)
3672
0
                    .tag("reason", "failed to commit txn");
3673
0
            return -1;
3674
0
        }
3675
1
        return 0;
3676
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
3649
3
    auto loop_done = [&restore_job_keys, this]() -> int {
3650
3
        if (restore_job_keys.empty()) return 0;
3651
1
        DORIS_CLOUD_DEFER {
3652
1
            restore_job_keys.clear();
3653
1
        };
3654
3655
1
        std::unique_ptr<Transaction> txn;
3656
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3657
1
        if (err != TxnErrorCode::TXN_OK) {
3658
0
            LOG_WARNING("failed to recycle restore job")
3659
0
                    .tag("err", err)
3660
0
                    .tag("instance_id", instance_id_)
3661
0
                    .tag("reason", "failed to create txn");
3662
0
            return -1;
3663
0
        }
3664
20
        for (auto& k : restore_job_keys) {
3665
20
            txn->remove(k);
3666
20
        }
3667
1
        err = txn->commit();
3668
1
        if (err != TxnErrorCode::TXN_OK) {
3669
0
            LOG_WARNING("failed to recycle restore job")
3670
0
                    .tag("err", err)
3671
0
                    .tag("instance_id", instance_id_)
3672
0
                    .tag("reason", "failed to commit txn");
3673
0
            return -1;
3674
0
        }
3675
1
        return 0;
3676
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
3677
3678
13
    if (config::enable_recycler_stats_metrics) {
3679
0
        scan_and_statistics_restore_jobs();
3680
0
    }
3681
3682
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
3683
13
                            std::move(loop_done));
3684
13
}
3685
3686
8
int InstanceRecycler::recycle_versioned_rowsets() {
3687
8
    const std::string task_name = "recycle_rowsets";
3688
8
    int64_t num_scanned = 0;
3689
8
    int64_t num_expired = 0;
3690
8
    int64_t num_prepare = 0;
3691
8
    int64_t num_compacted = 0;
3692
8
    int64_t num_empty_rowset = 0;
3693
8
    size_t total_rowset_key_size = 0;
3694
8
    size_t total_rowset_value_size = 0;
3695
8
    size_t expired_rowset_size = 0;
3696
8
    std::atomic_long num_recycled = 0;
3697
8
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3698
3699
8
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
3700
8
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
3701
8
    std::string recyc_rs_key0;
3702
8
    std::string recyc_rs_key1;
3703
8
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
3704
8
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
3705
3706
8
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
3707
3708
8
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3709
8
    register_recycle_task(task_name, start_time);
3710
3711
8
    DORIS_CLOUD_DEFER {
3712
8
        unregister_recycle_task(task_name);
3713
8
        int64_t cost =
3714
8
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3715
8
        metrics_context.finish_report();
3716
8
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3717
8
                .tag("instance_id", instance_id_)
3718
8
                .tag("num_scanned", num_scanned)
3719
8
                .tag("num_expired", num_expired)
3720
8
                .tag("num_recycled", num_recycled)
3721
8
                .tag("num_recycled.prepare", num_prepare)
3722
8
                .tag("num_recycled.compacted", num_compacted)
3723
8
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3724
8
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3725
8
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3726
8
                .tag("expired_rowset_meta_size", expired_rowset_size);
3727
8
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
3711
8
    DORIS_CLOUD_DEFER {
3712
8
        unregister_recycle_task(task_name);
3713
8
        int64_t cost =
3714
8
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3715
8
        metrics_context.finish_report();
3716
8
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3717
8
                .tag("instance_id", instance_id_)
3718
8
                .tag("num_scanned", num_scanned)
3719
8
                .tag("num_expired", num_expired)
3720
8
                .tag("num_recycled", num_recycled)
3721
8
                .tag("num_recycled.prepare", num_prepare)
3722
8
                .tag("num_recycled.compacted", num_compacted)
3723
8
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3724
8
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3725
8
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3726
8
                .tag("expired_rowset_meta_size", expired_rowset_size);
3727
8
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
3728
3729
8
    std::vector<std::string> orphan_rowset_keys;
3730
3731
    // Store keys of rowset recycled by background workers
3732
8
    std::mutex async_recycled_rowset_keys_mutex;
3733
8
    std::vector<std::string> async_recycled_rowset_keys;
3734
8
    auto worker_pool = std::make_unique<SimpleThreadPool>(
3735
8
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
3736
8
    worker_pool->start();
3737
8
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
3738
200
                                            int64_t tablet_id, const std::string& rowset_id) {
3739
        // Try to delete rowset data in background thread
3740
200
        int ret = worker_pool->submit_with_timeout(
3741
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3742
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3743
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3744
200
                        return;
3745
200
                    }
3746
                    // The async recycled rowsets are staled format or has not been used,
3747
                    // so we don't need to check the rowset ref count key.
3748
0
                    std::vector<std::string> keys;
3749
0
                    {
3750
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3751
0
                        async_recycled_rowset_keys.push_back(std::move(key));
3752
0
                        if (async_recycled_rowset_keys.size() > 100) {
3753
0
                            keys.swap(async_recycled_rowset_keys);
3754
0
                        }
3755
0
                    }
3756
0
                    if (keys.empty()) return;
3757
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3758
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3759
0
                                     << instance_id_;
3760
0
                    } else {
3761
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3762
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3763
0
                                           num_recycled, start_time);
3764
0
                    }
3765
0
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
3741
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3742
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3743
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3744
200
                        return;
3745
200
                    }
3746
                    // The async recycled rowsets are staled format or has not been used,
3747
                    // so we don't need to check the rowset ref count key.
3748
0
                    std::vector<std::string> keys;
3749
0
                    {
3750
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3751
0
                        async_recycled_rowset_keys.push_back(std::move(key));
3752
0
                        if (async_recycled_rowset_keys.size() > 100) {
3753
0
                            keys.swap(async_recycled_rowset_keys);
3754
0
                        }
3755
0
                    }
3756
0
                    if (keys.empty()) return;
3757
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3758
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3759
0
                                     << instance_id_;
3760
0
                    } else {
3761
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3762
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3763
0
                                           num_recycled, start_time);
3764
0
                    }
3765
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
3766
200
                0);
3767
200
        if (ret == 0) return 0;
3768
        // Submit task failed, delete rowset data in current thread
3769
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3770
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3771
0
            return -1;
3772
0
        }
3773
0
        orphan_rowset_keys.push_back(std::move(key));
3774
0
        return 0;
3775
0
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
3738
200
                                            int64_t tablet_id, const std::string& rowset_id) {
3739
        // Try to delete rowset data in background thread
3740
200
        int ret = worker_pool->submit_with_timeout(
3741
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3742
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3743
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3744
200
                        return;
3745
200
                    }
3746
                    // The async recycled rowsets are staled format or has not been used,
3747
                    // so we don't need to check the rowset ref count key.
3748
200
                    std::vector<std::string> keys;
3749
200
                    {
3750
200
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3751
200
                        async_recycled_rowset_keys.push_back(std::move(key));
3752
200
                        if (async_recycled_rowset_keys.size() > 100) {
3753
200
                            keys.swap(async_recycled_rowset_keys);
3754
200
                        }
3755
200
                    }
3756
200
                    if (keys.empty()) return;
3757
200
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3758
200
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3759
200
                                     << instance_id_;
3760
200
                    } else {
3761
200
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3762
200
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3763
200
                                           num_recycled, start_time);
3764
200
                    }
3765
200
                },
3766
200
                0);
3767
200
        if (ret == 0) return 0;
3768
        // Submit task failed, delete rowset data in current thread
3769
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3770
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3771
0
            return -1;
3772
0
        }
3773
0
        orphan_rowset_keys.push_back(std::move(key));
3774
0
        return 0;
3775
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
3776
3777
8
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3778
3779
1.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
3780
1.01k
        ++num_scanned;
3781
1.01k
        total_rowset_key_size += k.size();
3782
1.01k
        total_rowset_value_size += v.size();
3783
1.01k
        RecycleRowsetPB rowset;
3784
1.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3785
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3786
0
            return -1;
3787
0
        }
3788
3789
1.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3790
3791
1.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3792
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
3793
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3794
1.01k
        int64_t current_time = ::time(nullptr);
3795
1.01k
        if (current_time < final_expiration) { // not expired
3796
0
            return 0;
3797
0
        }
3798
1.01k
        ++num_expired;
3799
1.01k
        expired_rowset_size += v.size();
3800
1.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3801
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3802
                // in old version, keep this key-value pair and it needs to be checked manually
3803
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3804
0
                return -1;
3805
0
            }
3806
0
            if (rowset.resource_id().empty()) [[unlikely]] {
3807
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3808
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3809
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3810
0
                orphan_rowset_keys.emplace_back(k);
3811
0
                return -1;
3812
0
            }
3813
            // decode rowset_id
3814
0
            auto k1 = k;
3815
0
            k1.remove_prefix(1);
3816
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3817
0
            decode_key(&k1, &out);
3818
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3819
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3820
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3821
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3822
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3823
0
                                             rowset.tablet_id(), rowset_id) != 0) {
3824
0
                return -1;
3825
0
            }
3826
0
            return 0;
3827
0
        }
3828
        // TODO(plat1ko): check rowset not referenced
3829
1.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
3830
1.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3831
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3832
0
                LOG_INFO("recycle rowset that has empty resource id");
3833
0
            } else {
3834
                // other situations, keep this key-value pair and it needs to be checked manually
3835
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3836
0
                return -1;
3837
0
            }
3838
0
        }
3839
1.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3840
1.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
3841
1.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3842
1.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3843
1.01k
                  << "] txn_id=" << rowset_meta->txn_id()
3844
1.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3845
1.01k
                  << " rowset_meta_size=" << v.size()
3846
1.01k
                  << " creation_time=" << rowset_meta->creation_time();
3847
1.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3848
            // unable to calculate file path, can only be deleted by rowset id prefix
3849
200
            num_prepare += 1;
3850
200
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3851
200
                                             rowset_meta->tablet_id(),
3852
200
                                             rowset_meta->rowset_id_v2()) != 0) {
3853
0
                return -1;
3854
0
            }
3855
813
        } else {
3856
813
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
3857
813
            worker_pool->submit(
3858
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3859
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3860
798
                            return;
3861
798
                        }
3862
15
                        num_compacted += is_compacted;
3863
15
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3864
15
                        if (rowset_meta.num_segments() == 0) {
3865
0
                            ++num_empty_rowset;
3866
0
                        }
3867
15
                    });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
3858
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3859
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3860
798
                            return;
3861
798
                        }
3862
15
                        num_compacted += is_compacted;
3863
15
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3864
15
                        if (rowset_meta.num_segments() == 0) {
3865
0
                            ++num_empty_rowset;
3866
0
                        }
3867
15
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
3868
813
        }
3869
1.01k
        return 0;
3870
1.01k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3779
1.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
3780
1.01k
        ++num_scanned;
3781
1.01k
        total_rowset_key_size += k.size();
3782
1.01k
        total_rowset_value_size += v.size();
3783
1.01k
        RecycleRowsetPB rowset;
3784
1.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3785
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3786
0
            return -1;
3787
0
        }
3788
3789
1.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3790
3791
1.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3792
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
3793
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3794
1.01k
        int64_t current_time = ::time(nullptr);
3795
1.01k
        if (current_time < final_expiration) { // not expired
3796
0
            return 0;
3797
0
        }
3798
1.01k
        ++num_expired;
3799
1.01k
        expired_rowset_size += v.size();
3800
1.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3801
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3802
                // in old version, keep this key-value pair and it needs to be checked manually
3803
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3804
0
                return -1;
3805
0
            }
3806
0
            if (rowset.resource_id().empty()) [[unlikely]] {
3807
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3808
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3809
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3810
0
                orphan_rowset_keys.emplace_back(k);
3811
0
                return -1;
3812
0
            }
3813
            // decode rowset_id
3814
0
            auto k1 = k;
3815
0
            k1.remove_prefix(1);
3816
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3817
0
            decode_key(&k1, &out);
3818
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3819
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3820
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3821
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3822
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3823
0
                                             rowset.tablet_id(), rowset_id) != 0) {
3824
0
                return -1;
3825
0
            }
3826
0
            return 0;
3827
0
        }
3828
        // TODO(plat1ko): check rowset not referenced
3829
1.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
3830
1.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3831
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3832
0
                LOG_INFO("recycle rowset that has empty resource id");
3833
0
            } else {
3834
                // other situations, keep this key-value pair and it needs to be checked manually
3835
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3836
0
                return -1;
3837
0
            }
3838
0
        }
3839
1.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3840
1.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
3841
1.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3842
1.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3843
1.01k
                  << "] txn_id=" << rowset_meta->txn_id()
3844
1.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3845
1.01k
                  << " rowset_meta_size=" << v.size()
3846
1.01k
                  << " creation_time=" << rowset_meta->creation_time();
3847
1.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3848
            // unable to calculate file path, can only be deleted by rowset id prefix
3849
200
            num_prepare += 1;
3850
200
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3851
200
                                             rowset_meta->tablet_id(),
3852
200
                                             rowset_meta->rowset_id_v2()) != 0) {
3853
0
                return -1;
3854
0
            }
3855
813
        } else {
3856
813
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
3857
813
            worker_pool->submit(
3858
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3859
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3860
813
                            return;
3861
813
                        }
3862
813
                        num_compacted += is_compacted;
3863
813
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3864
813
                        if (rowset_meta.num_segments() == 0) {
3865
813
                            ++num_empty_rowset;
3866
813
                        }
3867
813
                    });
3868
813
        }
3869
1.01k
        return 0;
3870
1.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3871
3872
8
    if (config::enable_recycler_stats_metrics) {
3873
0
        scan_and_statistics_rowsets();
3874
0
    }
3875
3876
8
    auto loop_done = [&]() -> int {
3877
5
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
3878
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3879
0
        }
3880
5
        orphan_rowset_keys.clear();
3881
5
        return 0;
3882
5
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
3876
5
    auto loop_done = [&]() -> int {
3877
5
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
3878
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3879
0
        }
3880
5
        orphan_rowset_keys.clear();
3881
5
        return 0;
3882
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
3883
3884
    // recycle_func and loop_done for scan and recycle
3885
8
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
3886
8
                               std::move(loop_done));
3887
3888
8
    worker_pool->stop();
3889
3890
8
    if (!async_recycled_rowset_keys.empty()) {
3891
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
3892
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3893
0
            return -1;
3894
0
        } else {
3895
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
3896
0
        }
3897
0
    }
3898
8
    return ret;
3899
8
}
3900
3901
int InstanceRecycler::recycle_rowset_meta_and_data(std::string_view recycle_rowset_key,
3902
                                                   const RowsetMetaCloudPB& rowset_meta,
3903
813
                                                   std::string_view non_versioned_rowset_key) {
3904
813
    constexpr int MAX_RETRY = 10;
3905
813
    int64_t tablet_id = rowset_meta.tablet_id();
3906
813
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
3907
813
    std::string_view reference_instance_id = instance_id_;
3908
813
    if (rowset_meta.has_reference_instance_id()) {
3909
8
        reference_instance_id = rowset_meta.reference_instance_id();
3910
8
    }
3911
3912
813
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
3913
813
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
3914
813
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(recycle_rowset_key));
3915
813
    AnnotateTag instance_id_tag("instance_id", instance_id_);
3916
813
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
3917
813
    for (int i = 0; i < MAX_RETRY; ++i) {
3918
813
        std::unique_ptr<Transaction> txn;
3919
813
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3920
813
        if (err != TxnErrorCode::TXN_OK) {
3921
0
            LOG_WARNING("failed to create txn").tag("err", err);
3922
0
            return -1;
3923
0
        }
3924
3925
813
        std::string rowset_ref_count_key =
3926
813
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
3927
813
        int64_t ref_count = 0;
3928
813
        {
3929
813
            std::string value;
3930
813
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
3931
813
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3932
                // This is the old version rowset, we could recycle it directly.
3933
802
                ref_count = 1;
3934
802
            } else if (err != TxnErrorCode::TXN_OK) {
3935
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
3936
0
                return -1;
3937
11
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
3938
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
3939
0
                return -1;
3940
0
            }
3941
813
        }
3942
3943
813
        if (ref_count == 1) {
3944
            // It would not be added since it is recycling.
3945
809
            if (delete_rowset_data(rowset_meta) != 0) {
3946
800
                LOG_WARNING("failed to delete rowset data");
3947
800
                return -1;
3948
800
            }
3949
3950
            // Reset the transaction to avoid timeout.
3951
9
            err = txn_kv_->create_txn(&txn);
3952
9
            if (err != TxnErrorCode::TXN_OK) {
3953
0
                LOG_WARNING("failed to create txn").tag("err", err);
3954
0
                return -1;
3955
0
            }
3956
9
            txn->remove(rowset_ref_count_key);
3957
9
            LOG_INFO("delete rowset data ref count key")
3958
9
                    .tag("txn_id", rowset_meta.txn_id())
3959
9
                    .tag("ref_count_key", hex(rowset_ref_count_key));
3960
3961
9
            std::string dbm_start_key =
3962
9
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
3963
9
            std::string dbm_end_key = meta_delete_bitmap_key(
3964
9
                    {reference_instance_id, tablet_id, rowset_id,
3965
9
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
3966
9
            txn->remove(dbm_start_key, dbm_end_key);
3967
9
            LOG_INFO("remove delete bitmap kv")
3968
9
                    .tag("begin", hex(dbm_start_key))
3969
9
                    .tag("end", hex(dbm_end_key));
3970
3971
9
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
3972
9
                    {reference_instance_id, tablet_id, rowset_id});
3973
9
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
3974
9
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
3975
9
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
3976
9
            LOG_INFO("remove versioned delete bitmap kv")
3977
9
                    .tag("begin", hex(versioned_dbm_start_key))
3978
9
                    .tag("end", hex(versioned_dbm_end_key));
3979
3980
9
            std::string meta_rowset_key_begin =
3981
9
                    versioned::meta_rowset_key({reference_instance_id, tablet_id, rowset_id});
3982
9
            std::string meta_rowset_key_end = meta_rowset_key_begin;
3983
9
            encode_int64(INT64_MAX, &meta_rowset_key_end);
3984
9
            txn->remove(meta_rowset_key_begin, meta_rowset_key_end);
3985
9
            LOG_INFO("remove meta rowset key").tag("key", hex(meta_rowset_key_begin));
3986
9
        } else {
3987
            // Decrease the rowset ref count.
3988
            //
3989
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
3990
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
3991
4
            txn->atomic_add(rowset_ref_count_key, -1);
3992
4
            LOG_INFO("decrease rowset data ref count")
3993
4
                    .tag("txn_id", rowset_meta.txn_id())
3994
4
                    .tag("ref_count", ref_count - 1)
3995
4
                    .tag("ref_count_key", hex(rowset_ref_count_key));
3996
4
        }
3997
3998
13
        if (!recycle_rowset_key.empty()) { // empty when recycle ref rowsets for deleted instance
3999
13
            txn->remove(recycle_rowset_key);
4000
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(recycle_rowset_key));
4001
13
        }
4002
13
        if (!non_versioned_rowset_key.empty()) {
4003
0
            txn->remove(non_versioned_rowset_key);
4004
0
            LOG_INFO("remove non versioned rowset key").tag("key", hex(non_versioned_rowset_key));
4005
0
        }
4006
4007
13
        err = txn->commit();
4008
13
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
4009
            // The rowset ref count key has been changed, we need to retry.
4010
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
4011
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
4012
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
4013
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
4014
0
            continue;
4015
13
        } else if (err != TxnErrorCode::TXN_OK) {
4016
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
4017
0
            return -1;
4018
0
        }
4019
13
        LOG_INFO("recycle rowset meta and data success");
4020
13
        return 0;
4021
13
    }
4022
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
4023
0
            .tag("tablet_id", tablet_id)
4024
0
            .tag("rowset_id", rowset_id)
4025
0
            .tag("retry", MAX_RETRY);
4026
0
    return -1;
4027
813
}
4028
4029
18
int InstanceRecycler::recycle_tmp_rowsets() {
4030
18
    const std::string task_name = "recycle_tmp_rowsets";
4031
18
    int64_t num_scanned = 0;
4032
18
    int64_t num_expired = 0;
4033
18
    std::atomic_long num_recycled = 0;
4034
18
    size_t expired_rowset_size = 0;
4035
18
    size_t total_rowset_key_size = 0;
4036
18
    size_t total_rowset_value_size = 0;
4037
18
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4038
4039
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
4040
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
4041
18
    std::string tmp_rs_key0;
4042
18
    std::string tmp_rs_key1;
4043
18
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
4044
18
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
4045
4046
18
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
4047
4048
18
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4049
18
    register_recycle_task(task_name, start_time);
4050
4051
18
    DORIS_CLOUD_DEFER {
4052
18
        unregister_recycle_task(task_name);
4053
18
        int64_t cost =
4054
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4055
18
        metrics_context.finish_report();
4056
18
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
4057
18
                .tag("instance_id", instance_id_)
4058
18
                .tag("num_scanned", num_scanned)
4059
18
                .tag("num_expired", num_expired)
4060
18
                .tag("num_recycled", num_recycled)
4061
18
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4062
18
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4063
18
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
4064
18
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
4051
14
    DORIS_CLOUD_DEFER {
4052
14
        unregister_recycle_task(task_name);
4053
14
        int64_t cost =
4054
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4055
14
        metrics_context.finish_report();
4056
14
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
4057
14
                .tag("instance_id", instance_id_)
4058
14
                .tag("num_scanned", num_scanned)
4059
14
                .tag("num_expired", num_expired)
4060
14
                .tag("num_recycled", num_recycled)
4061
14
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4062
14
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4063
14
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
4064
14
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
4051
4
    DORIS_CLOUD_DEFER {
4052
4
        unregister_recycle_task(task_name);
4053
4
        int64_t cost =
4054
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4055
4
        metrics_context.finish_report();
4056
4
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
4057
4
                .tag("instance_id", instance_id_)
4058
4
                .tag("num_scanned", num_scanned)
4059
4
                .tag("num_expired", num_expired)
4060
4
                .tag("num_recycled", num_recycled)
4061
4
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4062
4
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4063
4
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
4064
4
    };
4065
4066
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
4067
4068
18
    std::vector<std::string> tmp_rowset_keys;
4069
18
    std::vector<std::string> tmp_rowset_ref_count_keys;
4070
4071
    // rowset_id -> rowset_meta
4072
    // store tmp_rowset id and meta for statistics rs size when delete
4073
18
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
4074
18
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4075
18
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
4076
18
    worker_pool->start();
4077
4078
18
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4079
4080
18
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
4081
18
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
4082
18
                             &earlest_ts, &tmp_rowset_ref_count_keys, this,
4083
57.0k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
4084
57.0k
        ++num_scanned;
4085
57.0k
        total_rowset_key_size += k.size();
4086
57.0k
        total_rowset_value_size += v.size();
4087
57.0k
        doris::RowsetMetaCloudPB rowset;
4088
57.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4089
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
4090
0
            return -1;
4091
0
        }
4092
57.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4093
57.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4094
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4095
0
                   << " txn_expiration=" << rowset.txn_expiration()
4096
0
                   << " rowset_creation_time=" << rowset.creation_time();
4097
57.0k
        int64_t current_time = ::time(nullptr);
4098
57.0k
        if (current_time < expiration) { // not expired
4099
0
            return 0;
4100
0
        }
4101
4102
57.0k
        DCHECK_GT(rowset.txn_id(), 0)
4103
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
4104
57.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
4105
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
4106
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
4107
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
4108
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
4109
2.00k
                      << "] txn_id=" << rowset.txn_id()
4110
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
4111
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
4112
2.00k
            return 0;
4113
2.00k
        }
4114
4115
55.0k
        ++num_expired;
4116
55.0k
        expired_rowset_size += v.size();
4117
55.0k
        if (!rowset.has_resource_id()) {
4118
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
4119
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
4120
0
                return -1;
4121
0
            }
4122
            // might be a delete pred rowset
4123
4.00k
            tmp_rowset_keys.emplace_back(k);
4124
4.00k
            return 0;
4125
4.00k
        }
4126
        // TODO(plat1ko): check rowset not referenced
4127
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4128
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
4129
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
4130
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
4131
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
4132
51.0k
                  << " num_expired=" << num_expired
4133
51.0k
                  << " task_type=" << metrics_context.operation_type;
4134
4135
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
4136
        // Remove the rowset ref count key directly since it has not been used.
4137
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
4138
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
4139
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
4140
51.0k
                  << "key=" << hex(rowset_ref_count_key);
4141
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
4142
4143
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
4144
51.0k
        return 0;
4145
55.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4083
51.0k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
4084
51.0k
        ++num_scanned;
4085
51.0k
        total_rowset_key_size += k.size();
4086
51.0k
        total_rowset_value_size += v.size();
4087
51.0k
        doris::RowsetMetaCloudPB rowset;
4088
51.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4089
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
4090
0
            return -1;
4091
0
        }
4092
51.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4093
51.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4094
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4095
0
                   << " txn_expiration=" << rowset.txn_expiration()
4096
0
                   << " rowset_creation_time=" << rowset.creation_time();
4097
51.0k
        int64_t current_time = ::time(nullptr);
4098
51.0k
        if (current_time < expiration) { // not expired
4099
0
            return 0;
4100
0
        }
4101
4102
51.0k
        DCHECK_GT(rowset.txn_id(), 0)
4103
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
4104
51.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
4105
0
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
4106
0
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
4107
0
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
4108
0
                      << rowset.start_version() << '-' << rowset.end_version()
4109
0
                      << "] txn_id=" << rowset.txn_id()
4110
0
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
4111
0
                      << " txn_expiration=" << rowset.txn_expiration();
4112
0
            return 0;
4113
0
        }
4114
4115
51.0k
        ++num_expired;
4116
51.0k
        expired_rowset_size += v.size();
4117
51.0k
        if (!rowset.has_resource_id()) {
4118
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
4119
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
4120
0
                return -1;
4121
0
            }
4122
            // might be a delete pred rowset
4123
0
            tmp_rowset_keys.emplace_back(k);
4124
0
            return 0;
4125
0
        }
4126
        // TODO(plat1ko): check rowset not referenced
4127
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4128
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
4129
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
4130
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
4131
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
4132
51.0k
                  << " num_expired=" << num_expired
4133
51.0k
                  << " task_type=" << metrics_context.operation_type;
4134
4135
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
4136
        // Remove the rowset ref count key directly since it has not been used.
4137
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
4138
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
4139
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
4140
51.0k
                  << "key=" << hex(rowset_ref_count_key);
4141
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
4142
4143
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
4144
51.0k
        return 0;
4145
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4083
6.00k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
4084
6.00k
        ++num_scanned;
4085
6.00k
        total_rowset_key_size += k.size();
4086
6.00k
        total_rowset_value_size += v.size();
4087
6.00k
        doris::RowsetMetaCloudPB rowset;
4088
6.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4089
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
4090
0
            return -1;
4091
0
        }
4092
6.00k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4093
6.00k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4094
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4095
0
                   << " txn_expiration=" << rowset.txn_expiration()
4096
0
                   << " rowset_creation_time=" << rowset.creation_time();
4097
6.00k
        int64_t current_time = ::time(nullptr);
4098
6.00k
        if (current_time < expiration) { // not expired
4099
0
            return 0;
4100
0
        }
4101
4102
6.00k
        DCHECK_GT(rowset.txn_id(), 0)
4103
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
4104
6.00k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
4105
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
4106
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
4107
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
4108
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
4109
2.00k
                      << "] txn_id=" << rowset.txn_id()
4110
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
4111
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
4112
2.00k
            return 0;
4113
2.00k
        }
4114
4115
4.00k
        ++num_expired;
4116
4.00k
        expired_rowset_size += v.size();
4117
4.00k
        if (!rowset.has_resource_id()) {
4118
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
4119
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
4120
0
                return -1;
4121
0
            }
4122
            // might be a delete pred rowset
4123
4.00k
            tmp_rowset_keys.emplace_back(k);
4124
4.00k
            return 0;
4125
4.00k
        }
4126
        // TODO(plat1ko): check rowset not referenced
4127
0
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4128
0
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
4129
0
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
4130
0
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
4131
0
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
4132
0
                  << " num_expired=" << num_expired
4133
0
                  << " task_type=" << metrics_context.operation_type;
4134
4135
0
        tmp_rowset_keys.emplace_back(k.data(), k.size());
4136
        // Remove the rowset ref count key directly since it has not been used.
4137
0
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
4138
0
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
4139
0
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
4140
0
                  << "key=" << hex(rowset_ref_count_key);
4141
0
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
4142
4143
0
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
4144
0
        return 0;
4145
4.00k
    };
4146
4147
    // TODO bacth delete
4148
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4149
51.0k
        std::string dbm_start_key =
4150
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4151
51.0k
        std::string dbm_end_key = dbm_start_key;
4152
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
4153
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4154
51.0k
        if (ret != 0) {
4155
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4156
0
                         << instance_id_ << ", tablet_id=" << tablet_id
4157
0
                         << ", rowset_id=" << rowset_id;
4158
0
        }
4159
51.0k
        return ret;
4160
51.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4148
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4149
51.0k
        std::string dbm_start_key =
4150
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4151
51.0k
        std::string dbm_end_key = dbm_start_key;
4152
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
4153
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4154
51.0k
        if (ret != 0) {
4155
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4156
0
                         << instance_id_ << ", tablet_id=" << tablet_id
4157
0
                         << ", rowset_id=" << rowset_id;
4158
0
        }
4159
51.0k
        return ret;
4160
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
4161
4162
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4163
51.0k
        auto delete_bitmap_start =
4164
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
4165
51.0k
        auto delete_bitmap_end =
4166
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
4167
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
4168
51.0k
        if (ret != 0) {
4169
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
4170
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
4171
0
        }
4172
51.0k
        return ret;
4173
51.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4162
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4163
51.0k
        auto delete_bitmap_start =
4164
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
4165
51.0k
        auto delete_bitmap_end =
4166
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
4167
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
4168
51.0k
        if (ret != 0) {
4169
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
4170
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
4171
0
        }
4172
51.0k
        return ret;
4173
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
4174
4175
18
    auto loop_done = [&]() -> int {
4176
10
        DORIS_CLOUD_DEFER {
4177
10
            tmp_rowset_keys.clear();
4178
10
            tmp_rowsets.clear();
4179
10
            tmp_rowset_ref_count_keys.clear();
4180
10
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4176
7
        DORIS_CLOUD_DEFER {
4177
7
            tmp_rowset_keys.clear();
4178
7
            tmp_rowsets.clear();
4179
7
            tmp_rowset_ref_count_keys.clear();
4180
7
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4176
3
        DORIS_CLOUD_DEFER {
4177
3
            tmp_rowset_keys.clear();
4178
3
            tmp_rowsets.clear();
4179
3
            tmp_rowset_ref_count_keys.clear();
4180
3
        };
4181
10
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
4182
10
                             tmp_rowsets_to_delete = tmp_rowsets,
4183
10
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4184
10
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4185
10
                                   metrics_context) != 0) {
4186
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4187
0
                return;
4188
0
            }
4189
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4190
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4191
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4192
0
                                 << rs.ShortDebugString();
4193
0
                    return;
4194
0
                }
4195
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4196
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4197
0
                                 << rs.ShortDebugString();
4198
0
                    return;
4199
0
                }
4200
51.0k
            }
4201
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4202
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4203
0
                return;
4204
0
            }
4205
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4206
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4207
0
                return;
4208
0
            }
4209
10
            num_recycled += tmp_rowset_keys.size();
4210
10
            return;
4211
10
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4183
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4184
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4185
7
                                   metrics_context) != 0) {
4186
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4187
0
                return;
4188
0
            }
4189
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4190
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4191
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4192
0
                                 << rs.ShortDebugString();
4193
0
                    return;
4194
0
                }
4195
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4196
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4197
0
                                 << rs.ShortDebugString();
4198
0
                    return;
4199
0
                }
4200
51.0k
            }
4201
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4202
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4203
0
                return;
4204
0
            }
4205
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4206
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4207
0
                return;
4208
0
            }
4209
7
            num_recycled += tmp_rowset_keys.size();
4210
7
            return;
4211
7
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4183
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4184
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4185
3
                                   metrics_context) != 0) {
4186
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4187
0
                return;
4188
0
            }
4189
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4190
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4191
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4192
0
                                 << rs.ShortDebugString();
4193
0
                    return;
4194
0
                }
4195
0
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4196
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4197
0
                                 << rs.ShortDebugString();
4198
0
                    return;
4199
0
                }
4200
0
            }
4201
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4202
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4203
0
                return;
4204
0
            }
4205
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4206
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4207
0
                return;
4208
0
            }
4209
3
            num_recycled += tmp_rowset_keys.size();
4210
3
            return;
4211
3
        });
4212
10
        return 0;
4213
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
4175
7
    auto loop_done = [&]() -> int {
4176
7
        DORIS_CLOUD_DEFER {
4177
7
            tmp_rowset_keys.clear();
4178
7
            tmp_rowsets.clear();
4179
7
            tmp_rowset_ref_count_keys.clear();
4180
7
        };
4181
7
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
4182
7
                             tmp_rowsets_to_delete = tmp_rowsets,
4183
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4184
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4185
7
                                   metrics_context) != 0) {
4186
7
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4187
7
                return;
4188
7
            }
4189
7
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4190
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4191
7
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4192
7
                                 << rs.ShortDebugString();
4193
7
                    return;
4194
7
                }
4195
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4196
7
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4197
7
                                 << rs.ShortDebugString();
4198
7
                    return;
4199
7
                }
4200
7
            }
4201
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4202
7
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4203
7
                return;
4204
7
            }
4205
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4206
7
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4207
7
                return;
4208
7
            }
4209
7
            num_recycled += tmp_rowset_keys.size();
4210
7
            return;
4211
7
        });
4212
7
        return 0;
4213
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
4175
3
    auto loop_done = [&]() -> int {
4176
3
        DORIS_CLOUD_DEFER {
4177
3
            tmp_rowset_keys.clear();
4178
3
            tmp_rowsets.clear();
4179
3
            tmp_rowset_ref_count_keys.clear();
4180
3
        };
4181
3
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
4182
3
                             tmp_rowsets_to_delete = tmp_rowsets,
4183
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
4184
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
4185
3
                                   metrics_context) != 0) {
4186
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
4187
3
                return;
4188
3
            }
4189
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
4190
3
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4191
3
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
4192
3
                                 << rs.ShortDebugString();
4193
3
                    return;
4194
3
                }
4195
3
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4196
3
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
4197
3
                                 << rs.ShortDebugString();
4198
3
                    return;
4199
3
                }
4200
3
            }
4201
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
4202
3
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
4203
3
                return;
4204
3
            }
4205
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
4206
3
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
4207
3
                return;
4208
3
            }
4209
3
            num_recycled += tmp_rowset_keys.size();
4210
3
            return;
4211
3
        });
4212
3
        return 0;
4213
3
    };
4214
4215
18
    if (config::enable_recycler_stats_metrics) {
4216
0
        scan_and_statistics_tmp_rowsets();
4217
0
    }
4218
    // recycle_func and loop_done for scan and recycle
4219
18
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
4220
18
                               std::move(loop_done));
4221
4222
18
    worker_pool->stop();
4223
18
    return ret;
4224
18
}
4225
4226
int InstanceRecycler::scan_and_recycle(
4227
        std::string begin, std::string_view end,
4228
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
4229
221
        std::function<int()> loop_done) {
4230
221
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
4231
221
    int ret = 0;
4232
221
    int64_t cnt = 0;
4233
221
    int get_range_retried = 0;
4234
221
    std::string err;
4235
221
    DORIS_CLOUD_DEFER_COPY(begin, end) {
4236
221
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
4237
221
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
4238
221
                  << " ret=" << ret << " err=" << err;
4239
221
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
4235
202
    DORIS_CLOUD_DEFER_COPY(begin, end) {
4236
202
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
4237
202
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
4238
202
                  << " ret=" << ret << " err=" << err;
4239
202
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
4235
19
    DORIS_CLOUD_DEFER_COPY(begin, end) {
4236
19
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
4237
19
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
4238
19
                  << " ret=" << ret << " err=" << err;
4239
19
    };
4240
4241
221
    std::unique_ptr<RangeGetIterator> it;
4242
249
    do {
4243
249
        if (get_range_retried > 1000) {
4244
0
            err = "txn_get exceeds max retry, may not scan all keys";
4245
0
            ret = -1;
4246
0
            return -1;
4247
0
        }
4248
249
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
4249
249
        if (get_ret != 0) { // txn kv may complain "Request for future version"
4250
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
4251
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
4252
0
                         << " get_range_retried=" << get_range_retried;
4253
0
            ++get_range_retried;
4254
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
4255
0
            continue; // try again
4256
0
        }
4257
249
        if (!it->has_next()) {
4258
123
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
4259
123
            break; // scan finished
4260
123
        }
4261
100k
        while (it->has_next()) {
4262
100k
            ++cnt;
4263
            // recycle corresponding resources
4264
100k
            auto [k, v] = it->next();
4265
100k
            if (!it->has_next()) {
4266
126
                begin = k;
4267
126
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
4268
126
            }
4269
            // if we want to continue scanning, the recycle_func should not return non-zero
4270
100k
            if (recycle_func(k, v) != 0) {
4271
4.00k
                err = "recycle_func error";
4272
4.00k
                ret = -1;
4273
4.00k
            }
4274
100k
        }
4275
126
        begin.push_back('\x00'); // Update to next smallest key for iteration
4276
        // if we want to continue scanning, the recycle_func should not return non-zero
4277
126
        if (loop_done && loop_done() != 0) {
4278
3
            err = "loop_done error";
4279
3
            ret = -1;
4280
3
        }
4281
126
    } while (it->more() && !stopped());
4282
221
    return ret;
4283
221
}
4284
4285
20
int InstanceRecycler::abort_timeout_txn() {
4286
20
    const std::string task_name = "abort_timeout_txn";
4287
20
    int64_t num_scanned = 0;
4288
20
    int64_t num_timeout = 0;
4289
20
    int64_t num_abort = 0;
4290
20
    int64_t num_advance = 0;
4291
20
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4292
4293
20
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
4294
20
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
4295
20
    std::string begin_txn_running_key;
4296
20
    std::string end_txn_running_key;
4297
20
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
4298
20
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
4299
4300
20
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
4301
4302
20
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4303
20
    register_recycle_task(task_name, start_time);
4304
4305
20
    DORIS_CLOUD_DEFER {
4306
20
        unregister_recycle_task(task_name);
4307
20
        int64_t cost =
4308
20
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4309
20
        metrics_context.finish_report();
4310
20
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
4311
20
                .tag("instance_id", instance_id_)
4312
20
                .tag("num_scanned", num_scanned)
4313
20
                .tag("num_timeout", num_timeout)
4314
20
                .tag("num_abort", num_abort)
4315
20
                .tag("num_advance", num_advance);
4316
20
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
4305
16
    DORIS_CLOUD_DEFER {
4306
16
        unregister_recycle_task(task_name);
4307
16
        int64_t cost =
4308
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4309
16
        metrics_context.finish_report();
4310
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
4311
16
                .tag("instance_id", instance_id_)
4312
16
                .tag("num_scanned", num_scanned)
4313
16
                .tag("num_timeout", num_timeout)
4314
16
                .tag("num_abort", num_abort)
4315
16
                .tag("num_advance", num_advance);
4316
16
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
4305
4
    DORIS_CLOUD_DEFER {
4306
4
        unregister_recycle_task(task_name);
4307
4
        int64_t cost =
4308
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4309
4
        metrics_context.finish_report();
4310
4
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
4311
4
                .tag("instance_id", instance_id_)
4312
4
                .tag("num_scanned", num_scanned)
4313
4
                .tag("num_timeout", num_timeout)
4314
4
                .tag("num_abort", num_abort)
4315
4
                .tag("num_advance", num_advance);
4316
4
    };
4317
4318
20
    int64_t current_time =
4319
20
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4320
4321
20
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
4322
20
                                  &current_time, &metrics_context,
4323
20
                                  this](std::string_view k, std::string_view v) -> int {
4324
10
        ++num_scanned;
4325
4326
10
        std::unique_ptr<Transaction> txn;
4327
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4328
10
        if (err != TxnErrorCode::TXN_OK) {
4329
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4330
0
            return -1;
4331
0
        }
4332
10
        std::string_view k1 = k;
4333
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
4334
10
        k1.remove_prefix(1); // Remove key space
4335
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4336
10
        if (decode_key(&k1, &out) != 0) {
4337
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
4338
0
            return -1;
4339
0
        }
4340
10
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4341
10
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4342
10
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4343
        // Update txn_info
4344
10
        std::string txn_inf_key, txn_inf_val;
4345
10
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
4346
10
        err = txn->get(txn_inf_key, &txn_inf_val);
4347
10
        if (err != TxnErrorCode::TXN_OK) {
4348
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
4349
0
            return -1;
4350
0
        }
4351
10
        TxnInfoPB txn_info;
4352
10
        if (!txn_info.ParseFromString(txn_inf_val)) {
4353
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
4354
0
            return -1;
4355
0
        }
4356
4357
10
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
4358
4
            txn.reset();
4359
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
4360
4
            std::shared_ptr<TxnLazyCommitTask> task =
4361
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
4362
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
4363
4
            if (ret.first != MetaServiceCode::OK) {
4364
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
4365
0
                             << "msg=" << ret.second;
4366
0
                return -1;
4367
0
            }
4368
4
            ++num_advance;
4369
4
            return 0;
4370
6
        } else {
4371
6
            TxnRunningPB txn_running_pb;
4372
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
4373
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4374
0
                return -1;
4375
0
            }
4376
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
4377
4
                return 0;
4378
4
            }
4379
2
            ++num_timeout;
4380
4381
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
4382
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
4383
2
            txn_info.set_finish_time(current_time);
4384
2
            txn_info.set_reason("timeout");
4385
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
4386
2
            txn_inf_val.clear();
4387
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
4388
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
4389
0
                return -1;
4390
0
            }
4391
2
            txn->put(txn_inf_key, txn_inf_val);
4392
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
4393
            // Put recycle txn key
4394
2
            std::string recyc_txn_key, recyc_txn_val;
4395
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
4396
2
            RecycleTxnPB recycle_txn_pb;
4397
2
            recycle_txn_pb.set_creation_time(current_time);
4398
2
            recycle_txn_pb.set_label(txn_info.label());
4399
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
4400
0
                LOG_WARNING("failed to serialize txn recycle info")
4401
0
                        .tag("key", hex(k))
4402
0
                        .tag("db_id", db_id)
4403
0
                        .tag("txn_id", txn_id);
4404
0
                return -1;
4405
0
            }
4406
2
            txn->put(recyc_txn_key, recyc_txn_val);
4407
            // Remove txn running key
4408
2
            txn->remove(k);
4409
2
            err = txn->commit();
4410
2
            if (err != TxnErrorCode::TXN_OK) {
4411
0
                LOG_WARNING("failed to commit txn err={}", err)
4412
0
                        .tag("key", hex(k))
4413
0
                        .tag("db_id", db_id)
4414
0
                        .tag("txn_id", txn_id);
4415
0
                return -1;
4416
0
            }
4417
2
            metrics_context.total_recycled_num = ++num_abort;
4418
2
            metrics_context.report();
4419
2
        }
4420
4421
2
        return 0;
4422
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4323
6
                                  this](std::string_view k, std::string_view v) -> int {
4324
6
        ++num_scanned;
4325
4326
6
        std::unique_ptr<Transaction> txn;
4327
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4328
6
        if (err != TxnErrorCode::TXN_OK) {
4329
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4330
0
            return -1;
4331
0
        }
4332
6
        std::string_view k1 = k;
4333
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
4334
6
        k1.remove_prefix(1); // Remove key space
4335
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4336
6
        if (decode_key(&k1, &out) != 0) {
4337
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
4338
0
            return -1;
4339
0
        }
4340
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4341
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4342
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4343
        // Update txn_info
4344
6
        std::string txn_inf_key, txn_inf_val;
4345
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
4346
6
        err = txn->get(txn_inf_key, &txn_inf_val);
4347
6
        if (err != TxnErrorCode::TXN_OK) {
4348
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
4349
0
            return -1;
4350
0
        }
4351
6
        TxnInfoPB txn_info;
4352
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
4353
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
4354
0
            return -1;
4355
0
        }
4356
4357
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
4358
0
            txn.reset();
4359
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
4360
0
            std::shared_ptr<TxnLazyCommitTask> task =
4361
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
4362
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
4363
0
            if (ret.first != MetaServiceCode::OK) {
4364
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
4365
0
                             << "msg=" << ret.second;
4366
0
                return -1;
4367
0
            }
4368
0
            ++num_advance;
4369
0
            return 0;
4370
6
        } else {
4371
6
            TxnRunningPB txn_running_pb;
4372
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
4373
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4374
0
                return -1;
4375
0
            }
4376
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
4377
4
                return 0;
4378
4
            }
4379
2
            ++num_timeout;
4380
4381
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
4382
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
4383
2
            txn_info.set_finish_time(current_time);
4384
2
            txn_info.set_reason("timeout");
4385
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
4386
2
            txn_inf_val.clear();
4387
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
4388
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
4389
0
                return -1;
4390
0
            }
4391
2
            txn->put(txn_inf_key, txn_inf_val);
4392
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
4393
            // Put recycle txn key
4394
2
            std::string recyc_txn_key, recyc_txn_val;
4395
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
4396
2
            RecycleTxnPB recycle_txn_pb;
4397
2
            recycle_txn_pb.set_creation_time(current_time);
4398
2
            recycle_txn_pb.set_label(txn_info.label());
4399
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
4400
0
                LOG_WARNING("failed to serialize txn recycle info")
4401
0
                        .tag("key", hex(k))
4402
0
                        .tag("db_id", db_id)
4403
0
                        .tag("txn_id", txn_id);
4404
0
                return -1;
4405
0
            }
4406
2
            txn->put(recyc_txn_key, recyc_txn_val);
4407
            // Remove txn running key
4408
2
            txn->remove(k);
4409
2
            err = txn->commit();
4410
2
            if (err != TxnErrorCode::TXN_OK) {
4411
0
                LOG_WARNING("failed to commit txn err={}", err)
4412
0
                        .tag("key", hex(k))
4413
0
                        .tag("db_id", db_id)
4414
0
                        .tag("txn_id", txn_id);
4415
0
                return -1;
4416
0
            }
4417
2
            metrics_context.total_recycled_num = ++num_abort;
4418
2
            metrics_context.report();
4419
2
        }
4420
4421
2
        return 0;
4422
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4323
4
                                  this](std::string_view k, std::string_view v) -> int {
4324
4
        ++num_scanned;
4325
4326
4
        std::unique_ptr<Transaction> txn;
4327
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4328
4
        if (err != TxnErrorCode::TXN_OK) {
4329
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4330
0
            return -1;
4331
0
        }
4332
4
        std::string_view k1 = k;
4333
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
4334
4
        k1.remove_prefix(1); // Remove key space
4335
4
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4336
4
        if (decode_key(&k1, &out) != 0) {
4337
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
4338
0
            return -1;
4339
0
        }
4340
4
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4341
4
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4342
4
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4343
        // Update txn_info
4344
4
        std::string txn_inf_key, txn_inf_val;
4345
4
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
4346
4
        err = txn->get(txn_inf_key, &txn_inf_val);
4347
4
        if (err != TxnErrorCode::TXN_OK) {
4348
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
4349
0
            return -1;
4350
0
        }
4351
4
        TxnInfoPB txn_info;
4352
4
        if (!txn_info.ParseFromString(txn_inf_val)) {
4353
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
4354
0
            return -1;
4355
0
        }
4356
4357
4
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
4358
4
            txn.reset();
4359
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
4360
4
            std::shared_ptr<TxnLazyCommitTask> task =
4361
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
4362
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
4363
4
            if (ret.first != MetaServiceCode::OK) {
4364
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
4365
0
                             << "msg=" << ret.second;
4366
0
                return -1;
4367
0
            }
4368
4
            ++num_advance;
4369
4
            return 0;
4370
4
        } else {
4371
0
            TxnRunningPB txn_running_pb;
4372
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
4373
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4374
0
                return -1;
4375
0
            }
4376
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
4377
0
                return 0;
4378
0
            }
4379
0
            ++num_timeout;
4380
4381
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
4382
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
4383
0
            txn_info.set_finish_time(current_time);
4384
0
            txn_info.set_reason("timeout");
4385
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
4386
0
            txn_inf_val.clear();
4387
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
4388
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
4389
0
                return -1;
4390
0
            }
4391
0
            txn->put(txn_inf_key, txn_inf_val);
4392
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
4393
            // Put recycle txn key
4394
0
            std::string recyc_txn_key, recyc_txn_val;
4395
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
4396
0
            RecycleTxnPB recycle_txn_pb;
4397
0
            recycle_txn_pb.set_creation_time(current_time);
4398
0
            recycle_txn_pb.set_label(txn_info.label());
4399
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
4400
0
                LOG_WARNING("failed to serialize txn recycle info")
4401
0
                        .tag("key", hex(k))
4402
0
                        .tag("db_id", db_id)
4403
0
                        .tag("txn_id", txn_id);
4404
0
                return -1;
4405
0
            }
4406
0
            txn->put(recyc_txn_key, recyc_txn_val);
4407
            // Remove txn running key
4408
0
            txn->remove(k);
4409
0
            err = txn->commit();
4410
0
            if (err != TxnErrorCode::TXN_OK) {
4411
0
                LOG_WARNING("failed to commit txn err={}", err)
4412
0
                        .tag("key", hex(k))
4413
0
                        .tag("db_id", db_id)
4414
0
                        .tag("txn_id", txn_id);
4415
0
                return -1;
4416
0
            }
4417
0
            metrics_context.total_recycled_num = ++num_abort;
4418
0
            metrics_context.report();
4419
0
        }
4420
4421
0
        return 0;
4422
4
    };
4423
4424
20
    if (config::enable_recycler_stats_metrics) {
4425
0
        scan_and_statistics_abort_timeout_txn();
4426
0
    }
4427
    // recycle_func and loop_done for scan and recycle
4428
20
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
4429
20
                            std::move(handle_txn_running_kv));
4430
20
}
4431
4432
21
int InstanceRecycler::recycle_expired_txn_label() {
4433
21
    const std::string task_name = "recycle_expired_txn_label";
4434
21
    int64_t num_scanned = 0;
4435
21
    int64_t num_expired = 0;
4436
21
    int64_t num_recycled = 0;
4437
21
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4438
21
    int ret = 0;
4439
4440
21
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
4441
21
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
4442
21
    std::string begin_recycle_txn_key;
4443
21
    std::string end_recycle_txn_key;
4444
21
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
4445
21
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
4446
21
    std::vector<std::string> recycle_txn_info_keys;
4447
4448
21
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
4449
4450
21
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4451
21
    register_recycle_task(task_name, start_time);
4452
21
    DORIS_CLOUD_DEFER {
4453
21
        unregister_recycle_task(task_name);
4454
21
        int64_t cost =
4455
21
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4456
21
        metrics_context.finish_report();
4457
21
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
4458
21
                .tag("instance_id", instance_id_)
4459
21
                .tag("num_scanned", num_scanned)
4460
21
                .tag("num_expired", num_expired)
4461
21
                .tag("num_recycled", num_recycled);
4462
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
4452
18
    DORIS_CLOUD_DEFER {
4453
18
        unregister_recycle_task(task_name);
4454
18
        int64_t cost =
4455
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4456
18
        metrics_context.finish_report();
4457
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
4458
18
                .tag("instance_id", instance_id_)
4459
18
                .tag("num_scanned", num_scanned)
4460
18
                .tag("num_expired", num_expired)
4461
18
                .tag("num_recycled", num_recycled);
4462
18
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
4452
3
    DORIS_CLOUD_DEFER {
4453
3
        unregister_recycle_task(task_name);
4454
3
        int64_t cost =
4455
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4456
3
        metrics_context.finish_report();
4457
3
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
4458
3
                .tag("instance_id", instance_id_)
4459
3
                .tag("num_scanned", num_scanned)
4460
3
                .tag("num_expired", num_expired)
4461
3
                .tag("num_recycled", num_recycled);
4462
3
    };
4463
4464
21
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4465
4466
21
    SyncExecutor<int> concurrent_delete_executor(
4467
21
            _thread_pool_group.s3_producer_pool,
4468
21
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
4469
23.0k
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
4469
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
4469
3
            [](const int& ret) { return ret != 0; });
4470
4471
21
    int64_t current_time_ms =
4472
21
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4473
4474
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
4475
30.0k
        ++num_scanned;
4476
30.0k
        RecycleTxnPB recycle_txn_pb;
4477
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
4478
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4479
0
            return -1;
4480
0
        }
4481
30.0k
        if ((config::force_immediate_recycle) ||
4482
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
4483
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
4484
30.0k
             current_time_ms)) {
4485
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
4486
23.0k
            num_expired++;
4487
23.0k
            recycle_txn_info_keys.emplace_back(k);
4488
23.0k
        }
4489
30.0k
        return 0;
4490
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4474
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
4475
30.0k
        ++num_scanned;
4476
30.0k
        RecycleTxnPB recycle_txn_pb;
4477
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
4478
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4479
0
            return -1;
4480
0
        }
4481
30.0k
        if ((config::force_immediate_recycle) ||
4482
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
4483
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
4484
30.0k
             current_time_ms)) {
4485
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
4486
23.0k
            num_expired++;
4487
23.0k
            recycle_txn_info_keys.emplace_back(k);
4488
23.0k
        }
4489
30.0k
        return 0;
4490
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4474
3
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
4475
3
        ++num_scanned;
4476
3
        RecycleTxnPB recycle_txn_pb;
4477
3
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
4478
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4479
0
            return -1;
4480
0
        }
4481
3
        if ((config::force_immediate_recycle) ||
4482
3
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
4483
3
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
4484
3
             current_time_ms)) {
4485
3
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
4486
3
            num_expired++;
4487
3
            recycle_txn_info_keys.emplace_back(k);
4488
3
        }
4489
3
        return 0;
4490
3
    };
4491
4492
    // int 0 for success, 1 for conflict, -1 for error
4493
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
4494
23.0k
        std::string_view k1 = k;
4495
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
4496
23.0k
        k1.remove_prefix(1); // Remove key space
4497
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4498
23.0k
        int ret = decode_key(&k1, &out);
4499
23.0k
        if (ret != 0) {
4500
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
4501
0
            return -1;
4502
0
        }
4503
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4504
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4505
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4506
23.0k
        std::unique_ptr<Transaction> txn;
4507
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4508
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4509
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4510
0
            return -1;
4511
0
        }
4512
        // Remove txn index kv
4513
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
4514
23.0k
        txn->remove(index_key);
4515
        // Remove txn info kv
4516
23.0k
        std::string info_key, info_val;
4517
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
4518
23.0k
        err = txn->get(info_key, &info_val);
4519
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4520
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
4521
0
            return -1;
4522
0
        }
4523
23.0k
        TxnInfoPB txn_info;
4524
23.0k
        if (!txn_info.ParseFromString(info_val)) {
4525
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
4526
0
            return -1;
4527
0
        }
4528
23.0k
        txn->remove(info_key);
4529
        // Remove sub txn index kvs
4530
23.0k
        std::vector<std::string> sub_txn_index_keys;
4531
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
4532
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
4533
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
4534
22.9k
        }
4535
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
4536
23.0k
            txn->remove(sub_txn_index_key);
4537
23.0k
        }
4538
        // Update txn label
4539
23.0k
        std::string label_key, label_val;
4540
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
4541
23.0k
        err = txn->get(label_key, &label_val);
4542
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4543
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
4544
0
                         << " err=" << err;
4545
0
            return -1;
4546
0
        }
4547
23.0k
        TxnLabelPB txn_label;
4548
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
4549
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
4550
0
            return -1;
4551
0
        }
4552
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
4553
23.0k
        if (it != txn_label.txn_ids().end()) {
4554
23.0k
            txn_label.mutable_txn_ids()->erase(it);
4555
23.0k
        }
4556
23.0k
        if (txn_label.txn_ids().empty()) {
4557
23.0k
            txn->remove(label_key);
4558
23.0k
            TEST_SYNC_POINT_CALLBACK(
4559
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
4560
23.0k
        } else {
4561
73
            if (!txn_label.SerializeToString(&label_val)) {
4562
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
4563
0
                return -1;
4564
0
            }
4565
73
            TEST_SYNC_POINT_CALLBACK(
4566
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
4567
73
            txn->atomic_set_ver_value(label_key, label_val);
4568
73
            TEST_SYNC_POINT_CALLBACK(
4569
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
4570
73
        }
4571
        // Remove recycle txn kv
4572
23.0k
        txn->remove(k);
4573
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
4574
23.0k
        err = txn->commit();
4575
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4576
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
4577
62
                TEST_SYNC_POINT_CALLBACK(
4578
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
4579
                // log the txn_id and label
4580
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
4581
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
4582
62
                             << " txn_label=" << txn_info.label();
4583
62
                return 1;
4584
62
            }
4585
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
4586
0
            return -1;
4587
62
        }
4588
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
4589
23.0k
        metrics_context.report();
4590
4591
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
4592
23.0k
        return 0;
4593
23.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4493
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
4494
23.0k
        std::string_view k1 = k;
4495
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
4496
23.0k
        k1.remove_prefix(1); // Remove key space
4497
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4498
23.0k
        int ret = decode_key(&k1, &out);
4499
23.0k
        if (ret != 0) {
4500
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
4501
0
            return -1;
4502
0
        }
4503
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4504
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4505
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4506
23.0k
        std::unique_ptr<Transaction> txn;
4507
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4508
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4509
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4510
0
            return -1;
4511
0
        }
4512
        // Remove txn index kv
4513
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
4514
23.0k
        txn->remove(index_key);
4515
        // Remove txn info kv
4516
23.0k
        std::string info_key, info_val;
4517
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
4518
23.0k
        err = txn->get(info_key, &info_val);
4519
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4520
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
4521
0
            return -1;
4522
0
        }
4523
23.0k
        TxnInfoPB txn_info;
4524
23.0k
        if (!txn_info.ParseFromString(info_val)) {
4525
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
4526
0
            return -1;
4527
0
        }
4528
23.0k
        txn->remove(info_key);
4529
        // Remove sub txn index kvs
4530
23.0k
        std::vector<std::string> sub_txn_index_keys;
4531
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
4532
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
4533
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
4534
22.9k
        }
4535
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
4536
23.0k
            txn->remove(sub_txn_index_key);
4537
23.0k
        }
4538
        // Update txn label
4539
23.0k
        std::string label_key, label_val;
4540
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
4541
23.0k
        err = txn->get(label_key, &label_val);
4542
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4543
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
4544
0
                         << " err=" << err;
4545
0
            return -1;
4546
0
        }
4547
23.0k
        TxnLabelPB txn_label;
4548
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
4549
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
4550
0
            return -1;
4551
0
        }
4552
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
4553
23.0k
        if (it != txn_label.txn_ids().end()) {
4554
23.0k
            txn_label.mutable_txn_ids()->erase(it);
4555
23.0k
        }
4556
23.0k
        if (txn_label.txn_ids().empty()) {
4557
23.0k
            txn->remove(label_key);
4558
23.0k
            TEST_SYNC_POINT_CALLBACK(
4559
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
4560
23.0k
        } else {
4561
73
            if (!txn_label.SerializeToString(&label_val)) {
4562
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
4563
0
                return -1;
4564
0
            }
4565
73
            TEST_SYNC_POINT_CALLBACK(
4566
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
4567
73
            txn->atomic_set_ver_value(label_key, label_val);
4568
73
            TEST_SYNC_POINT_CALLBACK(
4569
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
4570
73
        }
4571
        // Remove recycle txn kv
4572
23.0k
        txn->remove(k);
4573
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
4574
23.0k
        err = txn->commit();
4575
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4576
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
4577
62
                TEST_SYNC_POINT_CALLBACK(
4578
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
4579
                // log the txn_id and label
4580
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
4581
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
4582
62
                             << " txn_label=" << txn_info.label();
4583
62
                return 1;
4584
62
            }
4585
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
4586
0
            return -1;
4587
62
        }
4588
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
4589
23.0k
        metrics_context.report();
4590
4591
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
4592
23.0k
        return 0;
4593
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4493
3
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
4494
3
        std::string_view k1 = k;
4495
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
4496
3
        k1.remove_prefix(1); // Remove key space
4497
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4498
3
        int ret = decode_key(&k1, &out);
4499
3
        if (ret != 0) {
4500
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
4501
0
            return -1;
4502
0
        }
4503
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4504
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4505
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4506
3
        std::unique_ptr<Transaction> txn;
4507
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4508
3
        if (err != TxnErrorCode::TXN_OK) {
4509
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4510
0
            return -1;
4511
0
        }
4512
        // Remove txn index kv
4513
3
        auto index_key = txn_index_key({instance_id_, txn_id});
4514
3
        txn->remove(index_key);
4515
        // Remove txn info kv
4516
3
        std::string info_key, info_val;
4517
3
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
4518
3
        err = txn->get(info_key, &info_val);
4519
3
        if (err != TxnErrorCode::TXN_OK) {
4520
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
4521
0
            return -1;
4522
0
        }
4523
3
        TxnInfoPB txn_info;
4524
3
        if (!txn_info.ParseFromString(info_val)) {
4525
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
4526
0
            return -1;
4527
0
        }
4528
3
        txn->remove(info_key);
4529
        // Remove sub txn index kvs
4530
3
        std::vector<std::string> sub_txn_index_keys;
4531
3
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
4532
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
4533
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
4534
0
        }
4535
3
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
4536
0
            txn->remove(sub_txn_index_key);
4537
0
        }
4538
        // Update txn label
4539
3
        std::string label_key, label_val;
4540
3
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
4541
3
        err = txn->get(label_key, &label_val);
4542
3
        if (err != TxnErrorCode::TXN_OK) {
4543
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
4544
0
                         << " err=" << err;
4545
0
            return -1;
4546
0
        }
4547
3
        TxnLabelPB txn_label;
4548
3
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
4549
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
4550
0
            return -1;
4551
0
        }
4552
3
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
4553
3
        if (it != txn_label.txn_ids().end()) {
4554
3
            txn_label.mutable_txn_ids()->erase(it);
4555
3
        }
4556
3
        if (txn_label.txn_ids().empty()) {
4557
3
            txn->remove(label_key);
4558
3
            TEST_SYNC_POINT_CALLBACK(
4559
3
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
4560
3
        } else {
4561
0
            if (!txn_label.SerializeToString(&label_val)) {
4562
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
4563
0
                return -1;
4564
0
            }
4565
0
            TEST_SYNC_POINT_CALLBACK(
4566
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
4567
0
            txn->atomic_set_ver_value(label_key, label_val);
4568
0
            TEST_SYNC_POINT_CALLBACK(
4569
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
4570
0
        }
4571
        // Remove recycle txn kv
4572
3
        txn->remove(k);
4573
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
4574
3
        err = txn->commit();
4575
3
        if (err != TxnErrorCode::TXN_OK) {
4576
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
4577
0
                TEST_SYNC_POINT_CALLBACK(
4578
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
4579
                // log the txn_id and label
4580
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
4581
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
4582
0
                             << " txn_label=" << txn_info.label();
4583
0
                return 1;
4584
0
            }
4585
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
4586
0
            return -1;
4587
0
        }
4588
3
        metrics_context.total_recycled_num = ++num_recycled;
4589
3
        metrics_context.report();
4590
4591
3
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
4592
3
        return 0;
4593
3
    };
4594
4595
21
    auto loop_done = [&]() -> int {
4596
12
        DORIS_CLOUD_DEFER {
4597
12
            recycle_txn_info_keys.clear();
4598
12
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4596
9
        DORIS_CLOUD_DEFER {
4597
9
            recycle_txn_info_keys.clear();
4598
9
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4596
3
        DORIS_CLOUD_DEFER {
4597
3
            recycle_txn_info_keys.clear();
4598
3
        };
4599
12
        TEST_SYNC_POINT_CALLBACK(
4600
12
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
4601
12
                &recycle_txn_info_keys);
4602
23.0k
        for (const auto& k : recycle_txn_info_keys) {
4603
23.0k
            concurrent_delete_executor.add([&]() {
4604
23.0k
                int ret = delete_recycle_txn_kv(k);
4605
23.0k
                if (ret == 1) {
4606
18
                    constexpr int MAX_RETRY = 10;
4607
54
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4608
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4609
54
                        ret = delete_recycle_txn_kv(k);
4610
                        // clang-format off
4611
54
                        TEST_SYNC_POINT_CALLBACK(
4612
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4613
                        // clang-format off
4614
54
                        if (ret != 1) {
4615
18
                            break;
4616
18
                        }
4617
                        // random sleep 0-100 ms to retry
4618
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4619
36
                    }
4620
18
                }
4621
23.0k
                if (ret != 0) {
4622
9
                    LOG_WARNING("failed to delete recycle txn kv")
4623
9
                            .tag("instance id", instance_id_)
4624
9
                            .tag("key", hex(k));
4625
9
                    return -1;
4626
9
                }
4627
23.0k
                return 0;
4628
23.0k
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4603
23.0k
            concurrent_delete_executor.add([&]() {
4604
23.0k
                int ret = delete_recycle_txn_kv(k);
4605
23.0k
                if (ret == 1) {
4606
18
                    constexpr int MAX_RETRY = 10;
4607
54
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4608
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4609
54
                        ret = delete_recycle_txn_kv(k);
4610
                        // clang-format off
4611
54
                        TEST_SYNC_POINT_CALLBACK(
4612
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4613
                        // clang-format off
4614
54
                        if (ret != 1) {
4615
18
                            break;
4616
18
                        }
4617
                        // random sleep 0-100 ms to retry
4618
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4619
36
                    }
4620
18
                }
4621
23.0k
                if (ret != 0) {
4622
9
                    LOG_WARNING("failed to delete recycle txn kv")
4623
9
                            .tag("instance id", instance_id_)
4624
9
                            .tag("key", hex(k));
4625
9
                    return -1;
4626
9
                }
4627
23.0k
                return 0;
4628
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4603
3
            concurrent_delete_executor.add([&]() {
4604
3
                int ret = delete_recycle_txn_kv(k);
4605
3
                if (ret == 1) {
4606
0
                    constexpr int MAX_RETRY = 10;
4607
0
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4608
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4609
0
                        ret = delete_recycle_txn_kv(k);
4610
                        // clang-format off
4611
0
                        TEST_SYNC_POINT_CALLBACK(
4612
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4613
                        // clang-format off
4614
0
                        if (ret != 1) {
4615
0
                            break;
4616
0
                        }
4617
                        // random sleep 0-100 ms to retry
4618
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4619
0
                    }
4620
0
                }
4621
3
                if (ret != 0) {
4622
0
                    LOG_WARNING("failed to delete recycle txn kv")
4623
0
                            .tag("instance id", instance_id_)
4624
0
                            .tag("key", hex(k));
4625
0
                    return -1;
4626
0
                }
4627
3
                return 0;
4628
3
            });
4629
23.0k
        }
4630
12
        bool finished = true;
4631
12
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4632
23.0k
        for (int r : rets) {
4633
23.0k
            if (r != 0) {
4634
9
                ret = -1;
4635
9
            }
4636
23.0k
        }
4637
4638
12
        ret = finished ? ret : -1;
4639
4640
12
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
4641
4642
12
        if (ret != 0) {
4643
3
            LOG_WARNING("recycle txn kv ret!=0")
4644
3
                    .tag("finished", finished)
4645
3
                    .tag("ret", ret)
4646
3
                    .tag("instance_id", instance_id_);
4647
3
            return ret;
4648
3
        }
4649
9
        return ret;
4650
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
4595
9
    auto loop_done = [&]() -> int {
4596
9
        DORIS_CLOUD_DEFER {
4597
9
            recycle_txn_info_keys.clear();
4598
9
        };
4599
9
        TEST_SYNC_POINT_CALLBACK(
4600
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
4601
9
                &recycle_txn_info_keys);
4602
23.0k
        for (const auto& k : recycle_txn_info_keys) {
4603
23.0k
            concurrent_delete_executor.add([&]() {
4604
23.0k
                int ret = delete_recycle_txn_kv(k);
4605
23.0k
                if (ret == 1) {
4606
23.0k
                    constexpr int MAX_RETRY = 10;
4607
23.0k
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4608
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4609
23.0k
                        ret = delete_recycle_txn_kv(k);
4610
                        // clang-format off
4611
23.0k
                        TEST_SYNC_POINT_CALLBACK(
4612
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4613
                        // clang-format off
4614
23.0k
                        if (ret != 1) {
4615
23.0k
                            break;
4616
23.0k
                        }
4617
                        // random sleep 0-100 ms to retry
4618
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4619
23.0k
                    }
4620
23.0k
                }
4621
23.0k
                if (ret != 0) {
4622
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
4623
23.0k
                            .tag("instance id", instance_id_)
4624
23.0k
                            .tag("key", hex(k));
4625
23.0k
                    return -1;
4626
23.0k
                }
4627
23.0k
                return 0;
4628
23.0k
            });
4629
23.0k
        }
4630
9
        bool finished = true;
4631
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4632
23.0k
        for (int r : rets) {
4633
23.0k
            if (r != 0) {
4634
9
                ret = -1;
4635
9
            }
4636
23.0k
        }
4637
4638
9
        ret = finished ? ret : -1;
4639
4640
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
4641
4642
9
        if (ret != 0) {
4643
3
            LOG_WARNING("recycle txn kv ret!=0")
4644
3
                    .tag("finished", finished)
4645
3
                    .tag("ret", ret)
4646
3
                    .tag("instance_id", instance_id_);
4647
3
            return ret;
4648
3
        }
4649
6
        return ret;
4650
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
4595
3
    auto loop_done = [&]() -> int {
4596
3
        DORIS_CLOUD_DEFER {
4597
3
            recycle_txn_info_keys.clear();
4598
3
        };
4599
3
        TEST_SYNC_POINT_CALLBACK(
4600
3
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
4601
3
                &recycle_txn_info_keys);
4602
3
        for (const auto& k : recycle_txn_info_keys) {
4603
3
            concurrent_delete_executor.add([&]() {
4604
3
                int ret = delete_recycle_txn_kv(k);
4605
3
                if (ret == 1) {
4606
3
                    constexpr int MAX_RETRY = 10;
4607
3
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4608
3
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4609
3
                        ret = delete_recycle_txn_kv(k);
4610
                        // clang-format off
4611
3
                        TEST_SYNC_POINT_CALLBACK(
4612
3
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4613
                        // clang-format off
4614
3
                        if (ret != 1) {
4615
3
                            break;
4616
3
                        }
4617
                        // random sleep 0-100 ms to retry
4618
3
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4619
3
                    }
4620
3
                }
4621
3
                if (ret != 0) {
4622
3
                    LOG_WARNING("failed to delete recycle txn kv")
4623
3
                            .tag("instance id", instance_id_)
4624
3
                            .tag("key", hex(k));
4625
3
                    return -1;
4626
3
                }
4627
3
                return 0;
4628
3
            });
4629
3
        }
4630
3
        bool finished = true;
4631
3
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4632
3
        for (int r : rets) {
4633
3
            if (r != 0) {
4634
0
                ret = -1;
4635
0
            }
4636
3
        }
4637
4638
3
        ret = finished ? ret : -1;
4639
4640
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
4641
4642
3
        if (ret != 0) {
4643
0
            LOG_WARNING("recycle txn kv ret!=0")
4644
0
                    .tag("finished", finished)
4645
0
                    .tag("ret", ret)
4646
0
                    .tag("instance_id", instance_id_);
4647
0
            return ret;
4648
0
        }
4649
3
        return ret;
4650
3
    };
4651
4652
21
    if (config::enable_recycler_stats_metrics) {
4653
0
        scan_and_statistics_expired_txn_label();
4654
0
    }
4655
    // recycle_func and loop_done for scan and recycle
4656
21
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
4657
21
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
4658
21
}
4659
4660
struct CopyJobIdTuple {
4661
    std::string instance_id;
4662
    std::string stage_id;
4663
    long table_id;
4664
    std::string copy_id;
4665
    std::string stage_path;
4666
};
4667
struct BatchObjStoreAccessor {
4668
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
4669
                          TxnKv* txn_kv)
4670
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
4671
3
    ~BatchObjStoreAccessor() {
4672
3
        if (!paths_.empty()) {
4673
3
            consume();
4674
3
        }
4675
3
    }
4676
4677
    /**
4678
    * To implicitely do batch work and submit the batch delete task to s3
4679
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
4680
    *
4681
    * @param copy_job The protubuf struct consists of the copy job files.
4682
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
4683
    *            it would last until we finish the delete task, here we need pass one string value
4684
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
4685
    */
4686
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
4687
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
4688
5
        auto& file_keys = copy_file_keys_[key];
4689
5
        file_keys.log_trace =
4690
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
4691
5
                            instance_id, stage_id, table_id, copy_id, path);
4692
5
        std::string_view log_trace = file_keys.log_trace;
4693
2.03k
        for (const auto& file : copy_job.object_files()) {
4694
2.03k
            auto relative_path = file.relative_path();
4695
2.03k
            paths_.push_back(relative_path);
4696
2.03k
            file_keys.keys.push_back(copy_file_key(
4697
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
4698
2.03k
            LOG_INFO(log_trace)
4699
2.03k
                    .tag("relative_path", relative_path)
4700
2.03k
                    .tag("batch_count", batch_count_);
4701
2.03k
        }
4702
5
        LOG_INFO(log_trace)
4703
5
                .tag("objects_num", copy_job.object_files().size())
4704
5
                .tag("batch_count", batch_count_);
4705
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
4706
        // recommend using delete objects when objects num is less than 10)
4707
5
        if (paths_.size() < 1000) {
4708
3
            return;
4709
3
        }
4710
2
        consume();
4711
2
    }
4712
4713
private:
4714
5
    void consume() {
4715
5
        DORIS_CLOUD_DEFER {
4716
5
            paths_.clear();
4717
5
            copy_file_keys_.clear();
4718
5
            batch_count_++;
4719
4720
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
4721
5
                        batch_count_);
4722
5
        };
4723
4724
5
        StopWatch sw;
4725
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
4726
5
        if (0 != accessor_->delete_files(paths_)) {
4727
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
4728
2
                        paths_.size(), batch_count_, sw.elapsed_us());
4729
2
            return;
4730
2
        }
4731
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
4732
3
                    paths_.size(), batch_count_, sw.elapsed_us());
4733
        // delete fdb's keys
4734
3
        for (auto& file_keys : copy_file_keys_) {
4735
3
            auto& [log_trace, keys] = file_keys.second;
4736
3
            std::unique_ptr<Transaction> txn;
4737
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
4738
0
                LOG(WARNING) << "failed to create txn";
4739
0
                continue;
4740
0
            }
4741
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4742
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4743
            // limited, should not cause the txn commit failed.
4744
1.02k
            for (const auto& key : keys) {
4745
1.02k
                txn->remove(key);
4746
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
4747
1.02k
            }
4748
3
            txn->remove(file_keys.first);
4749
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
4750
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
4751
0
                continue;
4752
0
            }
4753
3
        }
4754
3
    }
4755
    std::shared_ptr<StorageVaultAccessor> accessor_;
4756
    // the path of the s3 files to be deleted
4757
    std::vector<std::string> paths_;
4758
    struct CopyFiles {
4759
        std::string log_trace;
4760
        std::vector<std::string> keys;
4761
    };
4762
    // pair<std::string, std::vector<std::string>>
4763
    // first: instance_id_ stage_id table_id query_id
4764
    // second: keys to be deleted
4765
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
4766
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
4767
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
4768
    // which can together uniquely identifies different tasks for tracing log
4769
    uint64_t& batch_count_;
4770
    TxnKv* txn_kv_;
4771
};
4772
4773
13
int InstanceRecycler::recycle_copy_jobs() {
4774
13
    int64_t num_scanned = 0;
4775
13
    int64_t num_finished = 0;
4776
13
    int64_t num_expired = 0;
4777
13
    int64_t num_recycled = 0;
4778
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
4779
13
    uint64_t batch_count = 0;
4780
13
    const std::string task_name = "recycle_copy_jobs";
4781
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4782
4783
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
4784
4785
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4786
13
    register_recycle_task(task_name, start_time);
4787
4788
13
    DORIS_CLOUD_DEFER {
4789
13
        unregister_recycle_task(task_name);
4790
13
        int64_t cost =
4791
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4792
13
        metrics_context.finish_report();
4793
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
4794
13
                .tag("instance_id", instance_id_)
4795
13
                .tag("num_scanned", num_scanned)
4796
13
                .tag("num_finished", num_finished)
4797
13
                .tag("num_expired", num_expired)
4798
13
                .tag("num_recycled", num_recycled);
4799
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
4788
13
    DORIS_CLOUD_DEFER {
4789
13
        unregister_recycle_task(task_name);
4790
13
        int64_t cost =
4791
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4792
13
        metrics_context.finish_report();
4793
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
4794
13
                .tag("instance_id", instance_id_)
4795
13
                .tag("num_scanned", num_scanned)
4796
13
                .tag("num_finished", num_finished)
4797
13
                .tag("num_expired", num_expired)
4798
13
                .tag("num_recycled", num_recycled);
4799
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
4800
4801
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
4802
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
4803
13
    std::string key0;
4804
13
    std::string key1;
4805
13
    copy_job_key(key_info0, &key0);
4806
13
    copy_job_key(key_info1, &key1);
4807
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
4808
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
4809
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
4810
16
                         this](std::string_view k, std::string_view v) -> int {
4811
16
        ++num_scanned;
4812
16
        CopyJobPB copy_job;
4813
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
4814
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
4815
0
            return -1;
4816
0
        }
4817
4818
        // decode copy job key
4819
16
        auto k1 = k;
4820
16
        k1.remove_prefix(1);
4821
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4822
16
        decode_key(&k1, &out);
4823
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
4824
        // -> CopyJobPB
4825
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
4826
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
4827
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
4828
4829
16
        bool check_storage = true;
4830
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4831
12
            ++num_finished;
4832
4833
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
4834
7
                auto it = stage_accessor_map.find(stage_id);
4835
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
4836
7
                std::string_view path;
4837
7
                if (it != stage_accessor_map.end()) {
4838
2
                    accessor = it->second;
4839
5
                } else {
4840
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
4841
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
4842
5
                                                      &inner_accessor);
4843
5
                    if (ret < 0) { // error
4844
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
4845
0
                        return -1;
4846
5
                    } else if (ret == 0) {
4847
3
                        path = inner_accessor->uri();
4848
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
4849
3
                                inner_accessor, batch_count, txn_kv_.get());
4850
3
                        stage_accessor_map.emplace(stage_id, accessor);
4851
3
                    } else { // stage not found, skip check storage
4852
2
                        check_storage = false;
4853
2
                    }
4854
5
                }
4855
7
                if (check_storage) {
4856
                    // TODO delete objects with key and etag is not supported
4857
5
                    accessor->add(std::move(copy_job), std::string(k),
4858
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
4859
5
                    return 0;
4860
5
                }
4861
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
4862
5
                int64_t current_time =
4863
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4864
5
                if (copy_job.finish_time_ms() > 0) {
4865
2
                    if (!config::force_immediate_recycle &&
4866
2
                        current_time < copy_job.finish_time_ms() +
4867
2
                                               config::copy_job_max_retention_second * 1000) {
4868
1
                        return 0;
4869
1
                    }
4870
3
                } else {
4871
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
4872
3
                    if (!config::force_immediate_recycle &&
4873
3
                        current_time < copy_job.start_time_ms() +
4874
3
                                               config::copy_job_max_retention_second * 1000) {
4875
1
                        return 0;
4876
1
                    }
4877
3
                }
4878
5
            }
4879
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4880
4
            int64_t current_time =
4881
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4882
            // if copy job is timeout: delete all copy file kvs and copy job kv
4883
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4884
2
                return 0;
4885
2
            }
4886
2
            ++num_expired;
4887
2
        }
4888
4889
        // delete all copy files
4890
7
        std::vector<std::string> copy_file_keys;
4891
70
        for (auto& file : copy_job.object_files()) {
4892
70
            copy_file_keys.push_back(copy_file_key(
4893
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
4894
70
        }
4895
7
        std::unique_ptr<Transaction> txn;
4896
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4897
0
            LOG(WARNING) << "failed to create txn";
4898
0
            return -1;
4899
0
        }
4900
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4901
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4902
        // limited, should not cause the txn commit failed.
4903
70
        for (const auto& key : copy_file_keys) {
4904
70
            txn->remove(key);
4905
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
4906
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
4907
70
                      << ", query_id=" << copy_id;
4908
70
        }
4909
7
        txn->remove(k);
4910
7
        TxnErrorCode err = txn->commit();
4911
7
        if (err != TxnErrorCode::TXN_OK) {
4912
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
4913
0
            return -1;
4914
0
        }
4915
4916
7
        metrics_context.total_recycled_num = ++num_recycled;
4917
7
        metrics_context.report();
4918
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4919
7
        return 0;
4920
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4810
16
                         this](std::string_view k, std::string_view v) -> int {
4811
16
        ++num_scanned;
4812
16
        CopyJobPB copy_job;
4813
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
4814
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
4815
0
            return -1;
4816
0
        }
4817
4818
        // decode copy job key
4819
16
        auto k1 = k;
4820
16
        k1.remove_prefix(1);
4821
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4822
16
        decode_key(&k1, &out);
4823
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
4824
        // -> CopyJobPB
4825
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
4826
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
4827
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
4828
4829
16
        bool check_storage = true;
4830
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4831
12
            ++num_finished;
4832
4833
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
4834
7
                auto it = stage_accessor_map.find(stage_id);
4835
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
4836
7
                std::string_view path;
4837
7
                if (it != stage_accessor_map.end()) {
4838
2
                    accessor = it->second;
4839
5
                } else {
4840
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
4841
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
4842
5
                                                      &inner_accessor);
4843
5
                    if (ret < 0) { // error
4844
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
4845
0
                        return -1;
4846
5
                    } else if (ret == 0) {
4847
3
                        path = inner_accessor->uri();
4848
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
4849
3
                                inner_accessor, batch_count, txn_kv_.get());
4850
3
                        stage_accessor_map.emplace(stage_id, accessor);
4851
3
                    } else { // stage not found, skip check storage
4852
2
                        check_storage = false;
4853
2
                    }
4854
5
                }
4855
7
                if (check_storage) {
4856
                    // TODO delete objects with key and etag is not supported
4857
5
                    accessor->add(std::move(copy_job), std::string(k),
4858
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
4859
5
                    return 0;
4860
5
                }
4861
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
4862
5
                int64_t current_time =
4863
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4864
5
                if (copy_job.finish_time_ms() > 0) {
4865
2
                    if (!config::force_immediate_recycle &&
4866
2
                        current_time < copy_job.finish_time_ms() +
4867
2
                                               config::copy_job_max_retention_second * 1000) {
4868
1
                        return 0;
4869
1
                    }
4870
3
                } else {
4871
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
4872
3
                    if (!config::force_immediate_recycle &&
4873
3
                        current_time < copy_job.start_time_ms() +
4874
3
                                               config::copy_job_max_retention_second * 1000) {
4875
1
                        return 0;
4876
1
                    }
4877
3
                }
4878
5
            }
4879
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4880
4
            int64_t current_time =
4881
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4882
            // if copy job is timeout: delete all copy file kvs and copy job kv
4883
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4884
2
                return 0;
4885
2
            }
4886
2
            ++num_expired;
4887
2
        }
4888
4889
        // delete all copy files
4890
7
        std::vector<std::string> copy_file_keys;
4891
70
        for (auto& file : copy_job.object_files()) {
4892
70
            copy_file_keys.push_back(copy_file_key(
4893
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
4894
70
        }
4895
7
        std::unique_ptr<Transaction> txn;
4896
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4897
0
            LOG(WARNING) << "failed to create txn";
4898
0
            return -1;
4899
0
        }
4900
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4901
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4902
        // limited, should not cause the txn commit failed.
4903
70
        for (const auto& key : copy_file_keys) {
4904
70
            txn->remove(key);
4905
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
4906
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
4907
70
                      << ", query_id=" << copy_id;
4908
70
        }
4909
7
        txn->remove(k);
4910
7
        TxnErrorCode err = txn->commit();
4911
7
        if (err != TxnErrorCode::TXN_OK) {
4912
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
4913
0
            return -1;
4914
0
        }
4915
4916
7
        metrics_context.total_recycled_num = ++num_recycled;
4917
7
        metrics_context.report();
4918
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4919
7
        return 0;
4920
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
4921
4922
13
    if (config::enable_recycler_stats_metrics) {
4923
0
        scan_and_statistics_copy_jobs();
4924
0
    }
4925
    // recycle_func and loop_done for scan and recycle
4926
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
4927
13
}
4928
4929
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
4930
                                             const StagePB::StageType& stage_type,
4931
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
4932
5
#ifdef UNIT_TEST
4933
    // In unit test, external use the same accessor as the internal stage
4934
5
    auto it = accessor_map_.find(stage_id);
4935
5
    if (it != accessor_map_.end()) {
4936
3
        *accessor = it->second;
4937
3
    } else {
4938
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
4939
2
        return 1;
4940
2
    }
4941
#else
4942
    // init s3 accessor and add to accessor map
4943
    auto stage_it =
4944
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
4945
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
4946
4947
    if (stage_it == instance_info_.stages().end()) {
4948
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
4949
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
4950
        return 1;
4951
    }
4952
4953
    const auto& object_store_info = stage_it->obj_info();
4954
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
4955
4956
    S3Conf s3_conf;
4957
    if (stage_type == StagePB::EXTERNAL) {
4958
        if (stage_access_type == StagePB::AKSK) {
4959
            auto conf = S3Conf::from_obj_store_info(object_store_info);
4960
            if (!conf) {
4961
                return -1;
4962
            }
4963
4964
            s3_conf = std::move(*conf);
4965
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
4966
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
4967
            if (!conf) {
4968
                return -1;
4969
            }
4970
4971
            s3_conf = std::move(*conf);
4972
            if (instance_info_.ram_user().has_encryption_info()) {
4973
                AkSkPair plain_ak_sk_pair;
4974
                int ret = decrypt_ak_sk_helper(
4975
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
4976
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
4977
                if (ret != 0) {
4978
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
4979
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
4980
                    return -1;
4981
                }
4982
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
4983
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
4984
            } else {
4985
                s3_conf.ak = instance_info_.ram_user().ak();
4986
                s3_conf.sk = instance_info_.ram_user().sk();
4987
            }
4988
        } else {
4989
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
4990
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
4991
            return -1;
4992
        }
4993
    } else if (stage_type == StagePB::INTERNAL) {
4994
        int idx = stoi(object_store_info.id());
4995
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4996
            LOG(WARNING) << "invalid idx: " << idx;
4997
            return -1;
4998
        }
4999
5000
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
5001
        auto conf = S3Conf::from_obj_store_info(old_obj);
5002
        if (!conf) {
5003
            return -1;
5004
        }
5005
5006
        s3_conf = std::move(*conf);
5007
        s3_conf.prefix = object_store_info.prefix();
5008
    } else {
5009
        LOG(WARNING) << "unknown stage type " << stage_type;
5010
        return -1;
5011
    }
5012
5013
    std::shared_ptr<S3Accessor> s3_accessor;
5014
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
5015
    if (ret != 0) {
5016
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
5017
        return -1;
5018
    }
5019
5020
    *accessor = std::move(s3_accessor);
5021
#endif
5022
3
    return 0;
5023
5
}
5024
5025
11
int InstanceRecycler::recycle_stage() {
5026
11
    int64_t num_scanned = 0;
5027
11
    int64_t num_recycled = 0;
5028
11
    const std::string task_name = "recycle_stage";
5029
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5030
5031
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
5032
5033
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5034
11
    register_recycle_task(task_name, start_time);
5035
5036
11
    DORIS_CLOUD_DEFER {
5037
11
        unregister_recycle_task(task_name);
5038
11
        int64_t cost =
5039
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5040
11
        metrics_context.finish_report();
5041
11
        LOG_WARNING("recycle stage, cost={}s", cost)
5042
11
                .tag("instance_id", instance_id_)
5043
11
                .tag("num_scanned", num_scanned)
5044
11
                .tag("num_recycled", num_recycled);
5045
11
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
5036
11
    DORIS_CLOUD_DEFER {
5037
11
        unregister_recycle_task(task_name);
5038
11
        int64_t cost =
5039
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5040
11
        metrics_context.finish_report();
5041
11
        LOG_WARNING("recycle stage, cost={}s", cost)
5042
11
                .tag("instance_id", instance_id_)
5043
11
                .tag("num_scanned", num_scanned)
5044
11
                .tag("num_recycled", num_recycled);
5045
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
5046
5047
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
5048
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
5049
11
    std::string key0 = recycle_stage_key(key_info0);
5050
11
    std::string key1 = recycle_stage_key(key_info1);
5051
5052
11
    std::vector<std::string_view> stage_keys;
5053
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
5054
11
                         this](std::string_view k, std::string_view v) -> int {
5055
1
        ++num_scanned;
5056
1
        RecycleStagePB recycle_stage;
5057
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
5058
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
5059
0
            return -1;
5060
0
        }
5061
5062
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
5063
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5064
0
            LOG(WARNING) << "invalid idx: " << idx;
5065
0
            return -1;
5066
0
        }
5067
5068
1
        std::shared_ptr<StorageVaultAccessor> accessor;
5069
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
5070
1
                [&] {
5071
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
5072
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5073
1
                    if (!s3_conf) {
5074
1
                        return -1;
5075
1
                    }
5076
5077
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
5078
1
                    std::shared_ptr<S3Accessor> s3_accessor;
5079
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
5080
1
                    if (ret != 0) {
5081
1
                        return -1;
5082
1
                    }
5083
5084
1
                    accessor = std::move(s3_accessor);
5085
1
                    return 0;
5086
1
                }(),
5087
1
                "recycle_stage:get_accessor", &accessor);
5088
5089
1
        if (ret != 0) {
5090
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
5091
0
            return ret;
5092
0
        }
5093
5094
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
5095
1
                .tag("instance_id", instance_id_)
5096
1
                .tag("stage_id", recycle_stage.stage().stage_id())
5097
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
5098
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
5099
1
                .tag("obj_info_id", idx)
5100
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
5101
1
        ret = accessor->delete_all();
5102
1
        if (ret != 0) {
5103
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
5104
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
5105
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
5106
0
                         << ", ret=" << ret;
5107
0
            return -1;
5108
0
        }
5109
1
        metrics_context.total_recycled_num = ++num_recycled;
5110
1
        metrics_context.report();
5111
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
5112
1
        stage_keys.push_back(k);
5113
1
        return 0;
5114
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5054
1
                         this](std::string_view k, std::string_view v) -> int {
5055
1
        ++num_scanned;
5056
1
        RecycleStagePB recycle_stage;
5057
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
5058
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
5059
0
            return -1;
5060
0
        }
5061
5062
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
5063
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5064
0
            LOG(WARNING) << "invalid idx: " << idx;
5065
0
            return -1;
5066
0
        }
5067
5068
1
        std::shared_ptr<StorageVaultAccessor> accessor;
5069
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
5070
1
                [&] {
5071
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
5072
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5073
1
                    if (!s3_conf) {
5074
1
                        return -1;
5075
1
                    }
5076
5077
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
5078
1
                    std::shared_ptr<S3Accessor> s3_accessor;
5079
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
5080
1
                    if (ret != 0) {
5081
1
                        return -1;
5082
1
                    }
5083
5084
1
                    accessor = std::move(s3_accessor);
5085
1
                    return 0;
5086
1
                }(),
5087
1
                "recycle_stage:get_accessor", &accessor);
5088
5089
1
        if (ret != 0) {
5090
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
5091
0
            return ret;
5092
0
        }
5093
5094
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
5095
1
                .tag("instance_id", instance_id_)
5096
1
                .tag("stage_id", recycle_stage.stage().stage_id())
5097
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
5098
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
5099
1
                .tag("obj_info_id", idx)
5100
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
5101
1
        ret = accessor->delete_all();
5102
1
        if (ret != 0) {
5103
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
5104
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
5105
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
5106
0
                         << ", ret=" << ret;
5107
0
            return -1;
5108
0
        }
5109
1
        metrics_context.total_recycled_num = ++num_recycled;
5110
1
        metrics_context.report();
5111
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
5112
1
        stage_keys.push_back(k);
5113
1
        return 0;
5114
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
5115
5116
11
    auto loop_done = [&stage_keys, this]() -> int {
5117
1
        if (stage_keys.empty()) return 0;
5118
1
        DORIS_CLOUD_DEFER {
5119
1
            stage_keys.clear();
5120
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5118
1
        DORIS_CLOUD_DEFER {
5119
1
            stage_keys.clear();
5120
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
5121
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
5122
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
5123
0
            return -1;
5124
0
        }
5125
1
        return 0;
5126
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
5116
1
    auto loop_done = [&stage_keys, this]() -> int {
5117
1
        if (stage_keys.empty()) return 0;
5118
1
        DORIS_CLOUD_DEFER {
5119
1
            stage_keys.clear();
5120
1
        };
5121
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
5122
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
5123
0
            return -1;
5124
0
        }
5125
1
        return 0;
5126
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
5127
11
    if (config::enable_recycler_stats_metrics) {
5128
0
        scan_and_statistics_stage();
5129
0
    }
5130
    // recycle_func and loop_done for scan and recycle
5131
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
5132
11
}
5133
5134
10
int InstanceRecycler::recycle_expired_stage_objects() {
5135
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
5136
5137
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5138
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
5139
5140
10
    DORIS_CLOUD_DEFER {
5141
10
        int64_t cost =
5142
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5143
10
        metrics_context.finish_report();
5144
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
5145
10
                .tag("instance_id", instance_id_);
5146
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
5140
10
    DORIS_CLOUD_DEFER {
5141
10
        int64_t cost =
5142
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5143
10
        metrics_context.finish_report();
5144
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
5145
10
                .tag("instance_id", instance_id_);
5146
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
5147
5148
10
    int ret = 0;
5149
5150
10
    if (config::enable_recycler_stats_metrics) {
5151
0
        scan_and_statistics_expired_stage_objects();
5152
0
    }
5153
5154
10
    for (const auto& stage : instance_info_.stages()) {
5155
0
        std::stringstream ss;
5156
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
5157
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
5158
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
5159
0
           << ", prefix=" << stage.obj_info().prefix();
5160
5161
0
        if (stopped()) {
5162
0
            break;
5163
0
        }
5164
0
        if (stage.type() == StagePB::EXTERNAL) {
5165
0
            continue;
5166
0
        }
5167
0
        int idx = stoi(stage.obj_info().id());
5168
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5169
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
5170
0
            continue;
5171
0
        }
5172
5173
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
5174
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5175
0
        if (!s3_conf) {
5176
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
5177
0
            continue;
5178
0
        }
5179
5180
0
        s3_conf->prefix = stage.obj_info().prefix();
5181
0
        std::shared_ptr<S3Accessor> accessor;
5182
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
5183
0
        if (ret1 != 0) {
5184
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
5185
0
            ret = -1;
5186
0
            continue;
5187
0
        }
5188
5189
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
5190
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
5191
0
            ret = -1;
5192
0
            continue;
5193
0
        }
5194
5195
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
5196
0
        int64_t expiration_time =
5197
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
5198
0
                config::internal_stage_objects_expire_time_second;
5199
0
        if (config::force_immediate_recycle) {
5200
0
            expiration_time = INT64_MAX;
5201
0
        }
5202
0
        ret1 = accessor->delete_all(expiration_time);
5203
0
        if (ret1 != 0) {
5204
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
5205
0
                         << ss.str();
5206
0
            ret = -1;
5207
0
            continue;
5208
0
        }
5209
0
        metrics_context.total_recycled_num++;
5210
0
        metrics_context.report();
5211
0
    }
5212
10
    return ret;
5213
10
}
5214
5215
149
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
5216
149
    std::lock_guard lock(recycle_tasks_mutex);
5217
149
    running_recycle_tasks[task_name] = start_time;
5218
149
}
5219
5220
149
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
5221
149
    std::lock_guard lock(recycle_tasks_mutex);
5222
149
    DCHECK(running_recycle_tasks[task_name] > 0);
5223
149
    running_recycle_tasks.erase(task_name);
5224
149
}
5225
5226
21
bool InstanceRecycler::check_recycle_tasks() {
5227
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
5228
21
    {
5229
21
        std::lock_guard lock(recycle_tasks_mutex);
5230
21
        tmp_running_recycle_tasks = running_recycle_tasks;
5231
21
    }
5232
5233
21
    bool found = false;
5234
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5235
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
5236
20
        int64_t cost = now - start_time;
5237
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
5238
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
5239
20
                    .tag("instance_id", instance_id_)
5240
20
                    .tag("task", task_name);
5241
20
            found = true;
5242
20
        }
5243
20
    }
5244
5245
21
    return found;
5246
21
}
5247
5248
// Scan and statistics indexes that need to be recycled
5249
0
int InstanceRecycler::scan_and_statistics_indexes() {
5250
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
5251
5252
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
5253
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
5254
0
    std::string index_key0;
5255
0
    std::string index_key1;
5256
0
    recycle_index_key(index_key_info0, &index_key0);
5257
0
    recycle_index_key(index_key_info1, &index_key1);
5258
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5259
5260
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
5261
0
        RecycleIndexPB index_pb;
5262
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
5263
0
            return 0;
5264
0
        }
5265
0
        int64_t current_time = ::time(nullptr);
5266
0
        if (current_time <
5267
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
5268
0
            return 0;
5269
0
        }
5270
        // decode index_id
5271
0
        auto k1 = k;
5272
0
        k1.remove_prefix(1);
5273
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5274
0
        decode_key(&k1, &out);
5275
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
5276
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
5277
0
        std::unique_ptr<Transaction> txn;
5278
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5279
0
        if (err != TxnErrorCode::TXN_OK) {
5280
0
            return 0;
5281
0
        }
5282
0
        std::string val;
5283
0
        err = txn->get(k, &val);
5284
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5285
0
            return 0;
5286
0
        }
5287
0
        if (err != TxnErrorCode::TXN_OK) {
5288
0
            return 0;
5289
0
        }
5290
0
        index_pb.Clear();
5291
0
        if (!index_pb.ParseFromString(val)) {
5292
0
            return 0;
5293
0
        }
5294
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
5295
0
            return 0;
5296
0
        }
5297
0
        metrics_context.total_need_recycle_num++;
5298
0
        return 0;
5299
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5300
5301
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
5302
0
    metrics_context.report(true);
5303
0
    segment_metrics_context_.report(true);
5304
0
    tablet_metrics_context_.report(true);
5305
0
    return ret;
5306
0
}
5307
5308
// Scan and statistics partitions that need to be recycled
5309
0
int InstanceRecycler::scan_and_statistics_partitions() {
5310
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
5311
5312
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
5313
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
5314
0
    std::string part_key0;
5315
0
    std::string part_key1;
5316
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5317
5318
0
    recycle_partition_key(part_key_info0, &part_key0);
5319
0
    recycle_partition_key(part_key_info1, &part_key1);
5320
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
5321
0
        RecyclePartitionPB part_pb;
5322
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
5323
0
            return 0;
5324
0
        }
5325
0
        int64_t current_time = ::time(nullptr);
5326
0
        if (current_time <
5327
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
5328
0
            return 0;
5329
0
        }
5330
        // decode partition_id
5331
0
        auto k1 = k;
5332
0
        k1.remove_prefix(1);
5333
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5334
0
        decode_key(&k1, &out);
5335
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
5336
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
5337
        // Change state to RECYCLING
5338
0
        std::unique_ptr<Transaction> txn;
5339
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5340
0
        if (err != TxnErrorCode::TXN_OK) {
5341
0
            return 0;
5342
0
        }
5343
0
        std::string val;
5344
0
        err = txn->get(k, &val);
5345
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5346
0
            return 0;
5347
0
        }
5348
0
        if (err != TxnErrorCode::TXN_OK) {
5349
0
            return 0;
5350
0
        }
5351
0
        part_pb.Clear();
5352
0
        if (!part_pb.ParseFromString(val)) {
5353
0
            return 0;
5354
0
        }
5355
        // Partitions with PREPARED state MUST have no data
5356
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
5357
0
        int ret = 0;
5358
0
        for (int64_t index_id : part_pb.index_id()) {
5359
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
5360
0
                                            partition_id, is_empty_tablet) != 0) {
5361
0
                ret = 0;
5362
0
            }
5363
0
        }
5364
0
        metrics_context.total_need_recycle_num++;
5365
0
        return ret;
5366
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5367
5368
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
5369
0
    metrics_context.report(true);
5370
0
    segment_metrics_context_.report(true);
5371
0
    tablet_metrics_context_.report(true);
5372
0
    return ret;
5373
0
}
5374
5375
// Scan and statistics rowsets that need to be recycled
5376
0
int InstanceRecycler::scan_and_statistics_rowsets() {
5377
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
5378
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
5379
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
5380
0
    std::string recyc_rs_key0;
5381
0
    std::string recyc_rs_key1;
5382
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
5383
0
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
5384
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5385
5386
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5387
0
        RecycleRowsetPB rowset;
5388
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5389
0
            return 0;
5390
0
        }
5391
0
        int64_t current_time = ::time(nullptr);
5392
0
        if (current_time <
5393
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
5394
0
            return 0;
5395
0
        }
5396
0
        if (!rowset.has_type()) {
5397
0
            if (!rowset.has_resource_id()) [[unlikely]] {
5398
0
                return 0;
5399
0
            }
5400
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5401
0
                return 0;
5402
0
            }
5403
0
            metrics_context.total_need_recycle_num++;
5404
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
5405
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
5406
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
5407
0
            return 0;
5408
0
        }
5409
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
5410
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
5411
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
5412
0
                return 0;
5413
0
            }
5414
0
        }
5415
0
        metrics_context.total_need_recycle_num++;
5416
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
5417
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
5418
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
5419
0
        return 0;
5420
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5421
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
5422
0
    metrics_context.report(true);
5423
0
    segment_metrics_context_.report(true);
5424
0
    return ret;
5425
0
}
5426
5427
// Scan and statistics tmp_rowsets that need to be recycled
5428
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
5429
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
5430
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5431
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5432
0
    std::string tmp_rs_key0;
5433
0
    std::string tmp_rs_key1;
5434
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5435
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5436
5437
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5438
5439
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
5440
0
        doris::RowsetMetaCloudPB rowset;
5441
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5442
0
            return 0;
5443
0
        }
5444
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5445
0
        int64_t current_time = ::time(nullptr);
5446
0
        if (current_time < expiration) {
5447
0
            return 0;
5448
0
        }
5449
5450
0
        DCHECK_GT(rowset.txn_id(), 0)
5451
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
5452
0
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
5453
0
            return 0;
5454
0
        }
5455
5456
0
        if (!rowset.has_resource_id()) {
5457
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5458
0
                return 0;
5459
0
            }
5460
0
            return 0;
5461
0
        }
5462
5463
0
        metrics_context.total_need_recycle_num++;
5464
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
5465
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
5466
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
5467
0
        return 0;
5468
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5469
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
5470
0
    metrics_context.report(true);
5471
0
    segment_metrics_context_.report(true);
5472
0
    return ret;
5473
0
}
5474
5475
// Scan and statistics abort_timeout_txn that need to be recycled
5476
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
5477
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
5478
5479
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
5480
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5481
0
    std::string begin_txn_running_key;
5482
0
    std::string end_txn_running_key;
5483
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
5484
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
5485
5486
0
    int64_t current_time =
5487
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5488
5489
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
5490
0
                                               std::string_view k, std::string_view v) -> int {
5491
0
        std::unique_ptr<Transaction> txn;
5492
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5493
0
        if (err != TxnErrorCode::TXN_OK) {
5494
0
            return 0;
5495
0
        }
5496
0
        std::string_view k1 = k;
5497
0
        k1.remove_prefix(1);
5498
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5499
0
        if (decode_key(&k1, &out) != 0) {
5500
0
            return 0;
5501
0
        }
5502
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5503
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5504
        // Update txn_info
5505
0
        std::string txn_inf_key, txn_inf_val;
5506
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5507
0
        err = txn->get(txn_inf_key, &txn_inf_val);
5508
0
        if (err != TxnErrorCode::TXN_OK) {
5509
0
            return 0;
5510
0
        }
5511
0
        TxnInfoPB txn_info;
5512
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
5513
0
            return 0;
5514
0
        }
5515
5516
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
5517
0
            TxnRunningPB txn_running_pb;
5518
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5519
0
                return 0;
5520
0
            }
5521
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5522
0
                return 0;
5523
0
            }
5524
0
            metrics_context.total_need_recycle_num++;
5525
0
        }
5526
0
        return 0;
5527
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5528
5529
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
5530
0
    metrics_context.report(true);
5531
0
    return ret;
5532
0
}
5533
5534
// Scan and statistics expired_txn_label that need to be recycled
5535
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
5536
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
5537
5538
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
5539
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5540
0
    std::string begin_recycle_txn_key;
5541
0
    std::string end_recycle_txn_key;
5542
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
5543
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
5544
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5545
0
    int64_t current_time_ms =
5546
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5547
5548
    // for calculate the total num or bytes of recyled objects
5549
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
5550
0
        RecycleTxnPB recycle_txn_pb;
5551
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5552
0
            return 0;
5553
0
        }
5554
0
        if ((config::force_immediate_recycle) ||
5555
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5556
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5557
0
             current_time_ms)) {
5558
0
            metrics_context.total_need_recycle_num++;
5559
0
        }
5560
0
        return 0;
5561
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5562
5563
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
5564
0
    metrics_context.report(true);
5565
0
    return ret;
5566
0
}
5567
5568
// Scan and statistics copy_jobs that need to be recycled
5569
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
5570
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
5571
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
5572
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
5573
0
    std::string key0;
5574
0
    std::string key1;
5575
0
    copy_job_key(key_info0, &key0);
5576
0
    copy_job_key(key_info1, &key1);
5577
5578
    // for calculate the total num or bytes of recyled objects
5579
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
5580
0
        CopyJobPB copy_job;
5581
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
5582
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
5583
0
            return 0;
5584
0
        }
5585
5586
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
5587
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
5588
0
                int64_t current_time =
5589
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5590
0
                if (copy_job.finish_time_ms() > 0) {
5591
0
                    if (!config::force_immediate_recycle &&
5592
0
                        current_time < copy_job.finish_time_ms() +
5593
0
                                               config::copy_job_max_retention_second * 1000) {
5594
0
                        return 0;
5595
0
                    }
5596
0
                } else {
5597
0
                    if (!config::force_immediate_recycle &&
5598
0
                        current_time < copy_job.start_time_ms() +
5599
0
                                               config::copy_job_max_retention_second * 1000) {
5600
0
                        return 0;
5601
0
                    }
5602
0
                }
5603
0
            }
5604
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
5605
0
            int64_t current_time =
5606
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5607
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
5608
0
                return 0;
5609
0
            }
5610
0
        }
5611
0
        metrics_context.total_need_recycle_num++;
5612
0
        return 0;
5613
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5614
5615
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
5616
0
    metrics_context.report(true);
5617
0
    return ret;
5618
0
}
5619
5620
// Scan and statistics stage that need to be recycled
5621
0
int InstanceRecycler::scan_and_statistics_stage() {
5622
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
5623
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
5624
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
5625
0
    std::string key0 = recycle_stage_key(key_info0);
5626
0
    std::string key1 = recycle_stage_key(key_info1);
5627
5628
    // for calculate the total num or bytes of recyled objects
5629
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
5630
0
                                                        std::string_view v) -> int {
5631
0
        RecycleStagePB recycle_stage;
5632
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
5633
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
5634
0
            return 0;
5635
0
        }
5636
5637
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
5638
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5639
0
            LOG(WARNING) << "invalid idx: " << idx;
5640
0
            return 0;
5641
0
        }
5642
5643
0
        std::shared_ptr<StorageVaultAccessor> accessor;
5644
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
5645
0
                [&] {
5646
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
5647
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5648
0
                    if (!s3_conf) {
5649
0
                        return 0;
5650
0
                    }
5651
5652
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
5653
0
                    std::shared_ptr<S3Accessor> s3_accessor;
5654
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
5655
0
                    if (ret != 0) {
5656
0
                        return 0;
5657
0
                    }
5658
5659
0
                    accessor = std::move(s3_accessor);
5660
0
                    return 0;
5661
0
                }(),
5662
0
                "recycle_stage:get_accessor", &accessor);
5663
5664
0
        if (ret != 0) {
5665
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
5666
0
            return 0;
5667
0
        }
5668
5669
0
        metrics_context.total_need_recycle_num++;
5670
0
        return 0;
5671
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5672
5673
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
5674
0
    metrics_context.report(true);
5675
0
    return ret;
5676
0
}
5677
5678
// Scan and statistics expired_stage_objects that need to be recycled
5679
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
5680
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
5681
5682
    // for calculate the total num or bytes of recyled objects
5683
0
    auto scan_and_statistics = [&metrics_context, this]() {
5684
0
        for (const auto& stage : instance_info_.stages()) {
5685
0
            if (stopped()) {
5686
0
                break;
5687
0
            }
5688
0
            if (stage.type() == StagePB::EXTERNAL) {
5689
0
                continue;
5690
0
            }
5691
0
            int idx = stoi(stage.obj_info().id());
5692
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
5693
0
                continue;
5694
0
            }
5695
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
5696
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5697
0
            if (!s3_conf) {
5698
0
                continue;
5699
0
            }
5700
0
            s3_conf->prefix = stage.obj_info().prefix();
5701
0
            std::shared_ptr<S3Accessor> accessor;
5702
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
5703
0
            if (ret1 != 0) {
5704
0
                continue;
5705
0
            }
5706
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
5707
0
                continue;
5708
0
            }
5709
0
            metrics_context.total_need_recycle_num++;
5710
0
        }
5711
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
5712
5713
0
    scan_and_statistics();
5714
0
    metrics_context.report(true);
5715
0
    return 0;
5716
0
}
5717
5718
// Scan and statistics versions that need to be recycled
5719
0
int InstanceRecycler::scan_and_statistics_versions() {
5720
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
5721
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
5722
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
5723
5724
0
    int64_t last_scanned_table_id = 0;
5725
0
    bool is_recycled = false; // Is last scanned kv recycled
5726
    // for calculate the total num or bytes of recyled objects
5727
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
5728
0
                                       std::string_view k, std::string_view) {
5729
0
        auto k1 = k;
5730
0
        k1.remove_prefix(1);
5731
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
5732
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5733
0
        decode_key(&k1, &out);
5734
0
        DCHECK_EQ(out.size(), 6) << k;
5735
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
5736
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
5737
0
            metrics_context.total_need_recycle_num +=
5738
0
                    is_recycled; // Version kv of this table has been recycled
5739
0
            return 0;
5740
0
        }
5741
0
        last_scanned_table_id = table_id;
5742
0
        is_recycled = false;
5743
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
5744
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
5745
0
        std::unique_ptr<Transaction> txn;
5746
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5747
0
        if (err != TxnErrorCode::TXN_OK) {
5748
0
            return 0;
5749
0
        }
5750
0
        std::unique_ptr<RangeGetIterator> iter;
5751
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
5752
0
        if (err != TxnErrorCode::TXN_OK) {
5753
0
            return 0;
5754
0
        }
5755
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
5756
0
            return 0;
5757
0
        }
5758
0
        metrics_context.total_need_recycle_num++;
5759
0
        is_recycled = true;
5760
0
        return 0;
5761
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5762
5763
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
5764
0
    metrics_context.report(true);
5765
0
    return ret;
5766
0
}
5767
5768
// Scan and statistics restore jobs that need to be recycled
5769
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
5770
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
5771
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
5772
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
5773
0
    std::string restore_job_key0;
5774
0
    std::string restore_job_key1;
5775
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
5776
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
5777
5778
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5779
5780
    // for calculate the total num or bytes of recyled objects
5781
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
5782
0
        RestoreJobCloudPB restore_job_pb;
5783
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5784
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5785
0
            return 0;
5786
0
        }
5787
0
        int64_t expiration =
5788
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5789
0
        int64_t current_time = ::time(nullptr);
5790
0
        if (current_time < expiration) { // not expired
5791
0
            return 0;
5792
0
        }
5793
0
        metrics_context.total_need_recycle_num++;
5794
0
        if(restore_job_pb.need_recycle_data()) {
5795
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
5796
0
        }
5797
0
        return 0;
5798
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5799
5800
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
5801
0
    metrics_context.report(true);
5802
0
    return ret;
5803
0
}
5804
5805
int InstanceRecycler::classify_rowset_task_by_ref_count(
5806
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
5807
60
    constexpr int MAX_RETRY = 10;
5808
60
    const auto& rowset_meta = task.rowset_meta;
5809
60
    int64_t tablet_id = rowset_meta.tablet_id();
5810
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
5811
60
    std::string_view reference_instance_id = instance_id_;
5812
60
    if (rowset_meta.has_reference_instance_id()) {
5813
5
        reference_instance_id = rowset_meta.reference_instance_id();
5814
5
    }
5815
5816
61
    for (int i = 0; i < MAX_RETRY; ++i) {
5817
61
        std::unique_ptr<Transaction> txn;
5818
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5819
61
        if (err != TxnErrorCode::TXN_OK) {
5820
0
            LOG_WARNING("failed to create txn when classifying rowset task")
5821
0
                    .tag("instance_id", instance_id_)
5822
0
                    .tag("tablet_id", tablet_id)
5823
0
                    .tag("rowset_id", rowset_id)
5824
0
                    .tag("err", err);
5825
0
            return -1;
5826
0
        }
5827
5828
61
        std::string rowset_ref_count_key =
5829
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
5830
61
        task.rowset_ref_count_key = rowset_ref_count_key;
5831
5832
61
        int64_t ref_count = 0;
5833
61
        {
5834
61
            std::string value;
5835
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
5836
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5837
0
                ref_count = 1;
5838
61
            } else if (err != TxnErrorCode::TXN_OK) {
5839
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
5840
0
                        .tag("instance_id", instance_id_)
5841
0
                        .tag("tablet_id", tablet_id)
5842
0
                        .tag("rowset_id", rowset_id)
5843
0
                        .tag("err", err);
5844
0
                return -1;
5845
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
5846
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
5847
0
                        .tag("instance_id", instance_id_)
5848
0
                        .tag("tablet_id", tablet_id)
5849
0
                        .tag("rowset_id", rowset_id)
5850
0
                        .tag("value", hex(value));
5851
0
                return -1;
5852
0
            }
5853
61
        }
5854
5855
61
        if (ref_count > 1) {
5856
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
5857
12
            txn->atomic_add(rowset_ref_count_key, -1);
5858
12
            LOG_INFO("decrease rowset data ref count in classification phase")
5859
12
                    .tag("instance_id", instance_id_)
5860
12
                    .tag("tablet_id", tablet_id)
5861
12
                    .tag("rowset_id", rowset_id)
5862
12
                    .tag("ref_count", ref_count - 1)
5863
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5864
5865
12
            if (!task.recycle_rowset_key.empty()) {
5866
12
                txn->remove(task.recycle_rowset_key);
5867
12
                LOG_INFO("remove recycle rowset key in classification phase")
5868
12
                        .tag("key", hex(task.recycle_rowset_key));
5869
12
            }
5870
12
            if (!task.non_versioned_rowset_key.empty()) {
5871
12
                txn->remove(task.non_versioned_rowset_key);
5872
12
                LOG_INFO("remove non versioned rowset key in classification phase")
5873
12
                        .tag("key", hex(task.non_versioned_rowset_key));
5874
12
            }
5875
5876
12
            err = txn->commit();
5877
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
5878
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
5879
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5880
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
5881
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
5882
1
                continue;
5883
11
            } else if (err != TxnErrorCode::TXN_OK) {
5884
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
5885
0
                        .tag("instance_id", instance_id_)
5886
0
                        .tag("tablet_id", tablet_id)
5887
0
                        .tag("rowset_id", rowset_id)
5888
0
                        .tag("err", err);
5889
0
                return -1;
5890
0
            }
5891
11
            return 1; // handled, not added to batch delete
5892
49
        } else {
5893
            // ref_count == 1: Add to batch delete plan without modifying any KV.
5894
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
5895
49
            LOG_INFO("add rowset to batch delete plan")
5896
49
                    .tag("instance_id", instance_id_)
5897
49
                    .tag("tablet_id", tablet_id)
5898
49
                    .tag("rowset_id", rowset_id)
5899
49
                    .tag("resource_id", rowset_meta.resource_id())
5900
49
                    .tag("ref_count", ref_count);
5901
5902
49
            batch_delete_tasks.push_back(std::move(task));
5903
49
            return 0; // added to batch delete
5904
49
        }
5905
61
    }
5906
5907
0
    LOG_WARNING("failed to classify rowset task after retry")
5908
0
            .tag("instance_id", instance_id_)
5909
0
            .tag("tablet_id", tablet_id)
5910
0
            .tag("rowset_id", rowset_id)
5911
0
            .tag("retry", MAX_RETRY);
5912
0
    return -1;
5913
60
}
5914
5915
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
5916
10
    int ret = 0;
5917
49
    for (const auto& task : tasks) {
5918
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
5919
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
5920
5921
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
5922
        // so we don't need to call it again here.
5923
5924
        // Remove all metadata keys in one transaction
5925
49
        std::unique_ptr<Transaction> txn;
5926
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5927
49
        if (err != TxnErrorCode::TXN_OK) {
5928
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
5929
0
                    .tag("instance_id", instance_id_)
5930
0
                    .tag("tablet_id", tablet_id)
5931
0
                    .tag("rowset_id", rowset_id)
5932
0
                    .tag("err", err);
5933
0
            ret = -1;
5934
0
            continue;
5935
0
        }
5936
5937
49
        std::string_view reference_instance_id = instance_id_;
5938
49
        if (task.rowset_meta.has_reference_instance_id()) {
5939
0
            reference_instance_id = task.rowset_meta.reference_instance_id();
5940
0
        }
5941
5942
49
        txn->remove(task.rowset_ref_count_key);
5943
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
5944
49
                .tag("instance_id", instance_id_)
5945
49
                .tag("tablet_id", tablet_id)
5946
49
                .tag("rowset_id", rowset_id)
5947
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
5948
5949
49
        std::string dbm_start_key =
5950
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
5951
49
        std::string dbm_end_key = meta_delete_bitmap_key(
5952
49
                {reference_instance_id, tablet_id, rowset_id,
5953
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
5954
49
        txn->remove(dbm_start_key, dbm_end_key);
5955
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
5956
49
                .tag("instance_id", instance_id_)
5957
49
                .tag("tablet_id", tablet_id)
5958
49
                .tag("rowset_id", rowset_id)
5959
49
                .tag("begin", hex(dbm_start_key))
5960
49
                .tag("end", hex(dbm_end_key));
5961
5962
49
        std::string versioned_dbm_start_key =
5963
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
5964
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
5965
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
5966
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5967
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
5968
49
                .tag("instance_id", instance_id_)
5969
49
                .tag("tablet_id", tablet_id)
5970
49
                .tag("rowset_id", rowset_id)
5971
49
                .tag("begin", hex(versioned_dbm_start_key))
5972
49
                .tag("end", hex(versioned_dbm_end_key));
5973
5974
        // Remove versioned meta rowset key
5975
49
        if (!task.versioned_rowset_key.empty()) {
5976
49
            std::string versioned_rowset_key_end = task.versioned_rowset_key;
5977
49
            encode_int64(INT64_MAX, &versioned_rowset_key_end);
5978
49
            txn->remove(task.versioned_rowset_key, versioned_rowset_key_end);
5979
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
5980
49
                    .tag("instance_id", instance_id_)
5981
49
                    .tag("tablet_id", tablet_id)
5982
49
                    .tag("rowset_id", rowset_id)
5983
49
                    .tag("begin", hex(task.versioned_rowset_key))
5984
49
                    .tag("end", hex(versioned_rowset_key_end));
5985
49
        }
5986
5987
49
        if (!task.non_versioned_rowset_key.empty()) {
5988
49
            txn->remove(task.non_versioned_rowset_key);
5989
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
5990
49
                    .tag("instance_id", instance_id_)
5991
49
                    .tag("tablet_id", tablet_id)
5992
49
                    .tag("rowset_id", rowset_id)
5993
49
                    .tag("key", hex(task.non_versioned_rowset_key));
5994
49
        }
5995
5996
        // Remove recycle_rowset_key last to ensure retry safety:
5997
        // if cleanup fails, this key remains and triggers next round retry.
5998
49
        if (!task.recycle_rowset_key.empty()) {
5999
49
            txn->remove(task.recycle_rowset_key);
6000
49
            LOG_INFO("remove recycle rowset key in cleanup phase")
6001
49
                    .tag("instance_id", instance_id_)
6002
49
                    .tag("tablet_id", tablet_id)
6003
49
                    .tag("rowset_id", rowset_id)
6004
49
                    .tag("key", hex(task.recycle_rowset_key));
6005
49
        }
6006
6007
49
        err = txn->commit();
6008
49
        if (err != TxnErrorCode::TXN_OK) {
6009
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
6010
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
6011
0
                    .tag("instance_id", instance_id_)
6012
0
                    .tag("tablet_id", tablet_id)
6013
0
                    .tag("rowset_id", rowset_id)
6014
0
                    .tag("err", err);
6015
0
            ret = -1;
6016
0
            continue;
6017
0
        }
6018
6019
49
        LOG_INFO("cleanup rowset metadata success")
6020
49
                .tag("instance_id", instance_id_)
6021
49
                .tag("tablet_id", tablet_id)
6022
49
                .tag("rowset_id", rowset_id);
6023
49
    }
6024
10
    return ret;
6025
10
}
6026
6027
} // namespace doris::cloud