Coverage Report

Created: 2025-12-23 12:43

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <random>
40
#include <string>
41
#include <string_view>
42
#include <thread>
43
#include <unordered_map>
44
#include <utility>
45
#include <variant>
46
47
#include "common/defer.h"
48
#include "common/stopwatch.h"
49
#include "meta-service/meta_service.h"
50
#include "meta-service/meta_service_helper.h"
51
#include "meta-service/meta_service_schema.h"
52
#include "meta-store/blob_message.h"
53
#include "meta-store/meta_reader.h"
54
#include "meta-store/txn_kv.h"
55
#include "meta-store/txn_kv_error.h"
56
#include "meta-store/versioned_value.h"
57
#include "recycler/checker.h"
58
#ifdef ENABLE_HDFS_STORAGE_VAULT
59
#include "recycler/hdfs_accessor.h"
60
#endif
61
#include "recycler/s3_accessor.h"
62
#include "recycler/storage_vault_accessor.h"
63
#ifdef UNIT_TEST
64
#include "../test/mock_accessor.h"
65
#endif
66
#include "common/bvars.h"
67
#include "common/config.h"
68
#include "common/encryption_util.h"
69
#include "common/logging.h"
70
#include "common/simple_thread_pool.h"
71
#include "common/util.h"
72
#include "cpp/sync_point.h"
73
#include "meta-store/codec.h"
74
#include "meta-store/document_message.h"
75
#include "meta-store/keys.h"
76
#include "recycler/recycler_service.h"
77
#include "recycler/sync_executor.h"
78
#include "recycler/util.h"
79
80
namespace doris::cloud {
81
82
using namespace std::chrono;
83
84
namespace {
85
86
0
int64_t packed_file_retry_sleep_ms() {
87
0
    const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms);
88
0
    const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms);
89
0
    thread_local std::mt19937_64 gen(std::random_device {}());
90
0
    std::uniform_int_distribution<int64_t> dist(min_ms, max_ms);
91
0
    return dist(gen);
92
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
93
94
0
void sleep_for_packed_file_retry() {
95
0
    std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms()));
96
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
97
98
} // namespace
99
100
// return 0 for success get a key, 1 for key not found, negative for error
101
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
102
0
    std::unique_ptr<Transaction> txn;
103
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
104
0
    if (err != TxnErrorCode::TXN_OK) {
105
0
        return -1;
106
0
    }
107
0
    switch (txn->get(key, &val, true)) {
108
0
    case TxnErrorCode::TXN_OK:
109
0
        return 0;
110
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
111
0
        return 1;
112
0
    default:
113
0
        return -1;
114
0
    };
115
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
116
117
// 0 for success, negative for error
118
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
119
280
                   std::unique_ptr<RangeGetIterator>& it) {
120
280
    std::unique_ptr<Transaction> txn;
121
280
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
280
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
280
    switch (txn->get(begin, end, &it, true)) {
126
280
    case TxnErrorCode::TXN_OK:
127
280
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
280
    };
133
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
119
19
                   std::unique_ptr<RangeGetIterator>& it) {
120
19
    std::unique_ptr<Transaction> txn;
121
19
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
19
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
19
    switch (txn->get(begin, end, &it, true)) {
126
19
    case TxnErrorCode::TXN_OK:
127
19
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
19
    };
133
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
119
261
                   std::unique_ptr<RangeGetIterator>& it) {
120
261
    std::unique_ptr<Transaction> txn;
121
261
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
261
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
261
    switch (txn->get(begin, end, &it, true)) {
126
261
    case TxnErrorCode::TXN_OK:
127
261
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
261
    };
133
0
}
134
135
// return 0 for success otherwise error
136
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
6
    std::unique_ptr<Transaction> txn;
138
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
6
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
10
    for (auto k : keys) {
143
10
        txn->remove(k);
144
10
    }
145
6
    switch (txn->commit()) {
146
6
    case TxnErrorCode::TXN_OK:
147
6
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
6
    }
153
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
136
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
1
    std::unique_ptr<Transaction> txn;
138
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
1
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
1
    for (auto k : keys) {
143
1
        txn->remove(k);
144
1
    }
145
1
    switch (txn->commit()) {
146
1
    case TxnErrorCode::TXN_OK:
147
1
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
1
    }
153
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
136
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
5
    std::unique_ptr<Transaction> txn;
138
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
5
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
9
    for (auto k : keys) {
143
9
        txn->remove(k);
144
9
    }
145
5
    switch (txn->commit()) {
146
5
    case TxnErrorCode::TXN_OK:
147
5
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
5
    }
153
5
}
154
155
// return 0 for success otherwise error
156
55
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
55
    std::unique_ptr<Transaction> txn;
158
55
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
55
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
110k
    for (auto& k : keys) {
163
110k
        txn->remove(k);
164
110k
    }
165
55
    switch (txn->commit()) {
166
55
    case TxnErrorCode::TXN_OK:
167
55
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
55
    }
173
55
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
156
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
6
    std::unique_ptr<Transaction> txn;
158
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
6
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
4.00k
    for (auto& k : keys) {
163
4.00k
        txn->remove(k);
164
4.00k
    }
165
6
    switch (txn->commit()) {
166
6
    case TxnErrorCode::TXN_OK:
167
6
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
6
    }
173
6
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
156
49
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
49
    std::unique_ptr<Transaction> txn;
158
49
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
49
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
106k
    for (auto& k : keys) {
163
106k
        txn->remove(k);
164
106k
    }
165
49
    switch (txn->commit()) {
166
49
    case TxnErrorCode::TXN_OK:
167
49
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
49
    }
173
49
}
174
175
// return 0 for success otherwise error
176
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
177
106k
                                       std::string_view end) {
178
106k
    std::unique_ptr<Transaction> txn;
179
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
106k
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
106k
    txn->remove(begin, end);
184
106k
    switch (txn->commit()) {
185
106k
    case TxnErrorCode::TXN_OK:
186
106k
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
106k
    }
192
106k
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
177
106k
                                       std::string_view end) {
178
106k
    std::unique_ptr<Transaction> txn;
179
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
106k
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
106k
    txn->remove(begin, end);
184
106k
    switch (txn->commit()) {
185
106k
    case TxnErrorCode::TXN_OK:
186
106k
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
106k
    }
192
106k
}
193
194
void scan_restore_job_rowset(
195
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
196
        std::string& msg,
197
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
198
199
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
200
                                      int64_t num_scanned, int64_t num_recycled,
201
52
                                      int64_t start_time) {
202
52
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
52
    return;
214
52
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
201
2
                                      int64_t start_time) {
202
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
2
    return;
214
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
201
50
                                      int64_t start_time) {
202
50
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
50
    return;
214
50
}
215
216
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
217
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
218
219
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
220
4
                                                               "s3_producer_pool");
221
4
    s3_producer_pool->start();
222
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
223
4
                                                                  "recycle_tablet_pool");
224
4
    recycle_tablet_pool->start();
225
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
226
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
227
4
    group_recycle_function_pool->start();
228
4
    _thread_pool_group =
229
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
230
4
                                    std::move(group_recycle_function_pool));
231
232
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
233
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
234
4
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
235
4
}
236
237
4
Recycler::~Recycler() {
238
4
    if (!stopped()) {
239
0
        stop();
240
0
    }
241
4
}
242
243
4
void Recycler::instance_scanner_callback() {
244
    // sleep 60 seconds before scheduling for the launch procedure to complete:
245
    // some bad hdfs connection may cause some log to stdout stderr
246
    // which may pollute .out file and affect the script to check success
247
4
    std::this_thread::sleep_for(
248
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
249
7
    while (!stopped()) {
250
3
        std::vector<InstanceInfoPB> instances;
251
3
        get_all_instances(txn_kv_.get(), instances);
252
        // TODO(plat1ko): delete job recycle kv of non-existent instances
253
3
        LOG(INFO) << "Recycler get instances: " << [&instances] {
254
3
            std::stringstream ss;
255
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
256
3
            return ss.str();
257
3
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
253
3
        LOG(INFO) << "Recycler get instances: " << [&instances] {
254
3
            std::stringstream ss;
255
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
256
3
            return ss.str();
257
3
        }();
258
3
        if (!instances.empty()) {
259
            // enqueue instances
260
3
            std::lock_guard lock(mtx_);
261
30
            for (auto& instance : instances) {
262
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
263
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
264
                // skip instance already in pending queue
265
30
                if (success) {
266
30
                    pending_instance_queue_.push_back(std::move(instance));
267
30
                }
268
30
            }
269
3
            pending_instance_cond_.notify_all();
270
3
        }
271
3
        {
272
3
            std::unique_lock lock(mtx_);
273
3
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
274
6
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
274
6
                               [&]() { return stopped(); });
275
3
        }
276
3
    }
277
4
}
278
279
8
void Recycler::recycle_callback() {
280
38
    while (!stopped()) {
281
38
        InstanceInfoPB instance;
282
38
        {
283
38
            std::unique_lock lock(mtx_);
284
38
            pending_instance_cond_.wait(
285
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
285
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
286
38
            if (stopped()) {
287
8
                return;
288
8
            }
289
30
            instance = std::move(pending_instance_queue_.front());
290
30
            pending_instance_queue_.pop_front();
291
30
            pending_instance_set_.erase(instance.instance_id());
292
30
        }
293
0
        auto& instance_id = instance.instance_id();
294
30
        {
295
30
            std::lock_guard lock(mtx_);
296
            // skip instance in recycling
297
30
            if (recycling_instance_map_.count(instance_id)) continue;
298
30
        }
299
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
300
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
301
302
30
        if (int r = instance_recycler->init(); r != 0) {
303
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
304
0
                         << " ret=" << r;
305
0
            continue;
306
0
        }
307
30
        std::string recycle_job_key;
308
30
        job_recycle_key({instance_id}, &recycle_job_key);
309
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
310
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
311
30
        if (ret != 0) { // Prepare failed
312
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
313
20
                         << " ret=" << ret;
314
20
            continue;
315
20
        } else {
316
10
            std::lock_guard lock(mtx_);
317
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
318
10
        }
319
10
        if (stopped()) return;
320
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
321
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
322
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
323
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
324
10
        ret = instance_recycler->do_recycle();
325
        // If instance recycler has been aborted, don't finish this job
326
327
10
        if (!instance_recycler->stopped()) {
328
9
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
329
9
                                        ret == 0, ctime_ms);
330
9
        }
331
10
        if (instance_recycler->stopped() || ret != 0) {
332
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
333
0
        }
334
10
        {
335
10
            std::lock_guard lock(mtx_);
336
10
            recycling_instance_map_.erase(instance_id);
337
10
        }
338
339
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
340
10
        auto elpased_ms = now - ctime_ms;
341
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
342
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
343
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
344
10
                                             now + config::recycle_interval_seconds * 1000);
345
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
346
10
        LOG(INFO) << "recycle instance done, "
347
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
348
10
                  << " now: " << now;
349
350
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
351
352
10
        LOG_WARNING("finish recycle instance")
353
10
                .tag("instance_id", instance_id)
354
10
                .tag("cost_ms", elpased_ms);
355
10
    }
356
8
}
357
358
4
void Recycler::lease_recycle_jobs() {
359
54
    while (!stopped()) {
360
50
        std::vector<std::string> instances;
361
50
        instances.reserve(recycling_instance_map_.size());
362
50
        {
363
50
            std::lock_guard lock(mtx_);
364
50
            for (auto& [id, _] : recycling_instance_map_) {
365
30
                instances.push_back(id);
366
30
            }
367
50
        }
368
50
        for (auto& i : instances) {
369
30
            std::string recycle_job_key;
370
30
            job_recycle_key({i}, &recycle_job_key);
371
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
372
30
            if (ret == 1) {
373
0
                std::lock_guard lock(mtx_);
374
0
                if (auto it = recycling_instance_map_.find(i);
375
0
                    it != recycling_instance_map_.end()) {
376
0
                    it->second->stop();
377
0
                }
378
0
            }
379
30
        }
380
50
        {
381
50
            std::unique_lock lock(mtx_);
382
50
            notifier_.wait_for(lock,
383
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
384
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
384
100
                               [&]() { return stopped(); });
385
50
        }
386
50
    }
387
4
}
388
389
4
void Recycler::check_recycle_tasks() {
390
7
    while (!stopped()) {
391
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
392
3
        {
393
3
            std::lock_guard lock(mtx_);
394
3
            recycling_instance_map = recycling_instance_map_;
395
3
        }
396
3
        for (auto& entry : recycling_instance_map) {
397
0
            entry.second->check_recycle_tasks();
398
0
        }
399
400
3
        std::unique_lock lock(mtx_);
401
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
402
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
402
6
                           [&]() { return stopped(); });
403
3
    }
404
4
}
405
406
4
int Recycler::start(brpc::Server* server) {
407
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
408
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
409
4
    S3Environment::getInstance();
410
411
4
    if (config::enable_checker) {
412
0
        checker_ = std::make_unique<Checker>(txn_kv_);
413
0
        int ret = checker_->start();
414
0
        std::string msg;
415
0
        if (ret != 0) {
416
0
            msg = "failed to start checker";
417
0
            LOG(ERROR) << msg;
418
0
            std::cerr << msg << std::endl;
419
0
            return ret;
420
0
        }
421
0
        msg = "checker started";
422
0
        LOG(INFO) << msg;
423
0
        std::cout << msg << std::endl;
424
0
    }
425
426
4
    if (server) {
427
        // Add service
428
1
        auto recycler_service =
429
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
430
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
431
1
    }
432
433
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
433
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
434
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
435
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
435
8
        workers_.emplace_back([this] { recycle_callback(); });
436
8
    }
437
438
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
439
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
440
441
4
    if (config::enable_snapshot_data_migrator) {
442
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
443
0
        int ret = snapshot_data_migrator_->start();
444
0
        if (ret != 0) {
445
0
            LOG(ERROR) << "failed to start snapshot data migrator";
446
0
            return ret;
447
0
        }
448
0
        LOG(INFO) << "snapshot data migrator started";
449
0
    }
450
451
4
    if (config::enable_snapshot_chain_compactor) {
452
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
453
0
        int ret = snapshot_chain_compactor_->start();
454
0
        if (ret != 0) {
455
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
456
0
            return ret;
457
0
        }
458
0
        LOG(INFO) << "snapshot chain compactor started";
459
0
    }
460
461
4
    return 0;
462
4
}
463
464
4
void Recycler::stop() {
465
4
    stopped_ = true;
466
4
    notifier_.notify_all();
467
4
    pending_instance_cond_.notify_all();
468
4
    {
469
4
        std::lock_guard lock(mtx_);
470
4
        for (auto& [_, recycler] : recycling_instance_map_) {
471
0
            recycler->stop();
472
0
        }
473
4
    }
474
20
    for (auto& w : workers_) {
475
20
        if (w.joinable()) w.join();
476
20
    }
477
4
    if (checker_) {
478
0
        checker_->stop();
479
0
    }
480
4
    if (snapshot_data_migrator_) {
481
0
        snapshot_data_migrator_->stop();
482
0
    }
483
4
    if (snapshot_chain_compactor_) {
484
0
        snapshot_chain_compactor_->stop();
485
0
    }
486
4
}
487
488
class InstanceRecycler::InvertedIndexIdCache {
489
public:
490
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
491
115
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
492
493
    // Return 0 if success, 1 if schema kv not found, negative for error
494
    // For the same index_id, schema_version, res, since `get` is not completely atomic
495
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
496
    // resulting in repeated addition and inaccuracy.
497
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
498
    // repeated addition does not affect correctness.
499
28.0k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
500
28.0k
        {
501
28.0k
            std::lock_guard lock(mtx_);
502
28.0k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
503
4.57k
                return 0;
504
4.57k
            }
505
23.4k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
506
23.4k
                it != inverted_index_id_map_.end()) {
507
17.8k
                res = it->second;
508
17.8k
                return 0;
509
17.8k
            }
510
23.4k
        }
511
        // Get schema from kv
512
        // TODO(plat1ko): Single flight
513
5.53k
        std::unique_ptr<Transaction> txn;
514
5.53k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
515
5.53k
        if (err != TxnErrorCode::TXN_OK) {
516
0
            LOG(WARNING) << "failed to create txn, err=" << err;
517
0
            return -1;
518
0
        }
519
5.53k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
520
5.53k
        ValueBuf val_buf;
521
5.53k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
522
5.53k
        if (err != TxnErrorCode::TXN_OK) {
523
504
            LOG(WARNING) << "failed to get schema, err=" << err;
524
504
            return static_cast<int>(err);
525
504
        }
526
5.03k
        doris::TabletSchemaCloudPB schema;
527
5.03k
        if (!parse_schema_value(val_buf, &schema)) {
528
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
529
0
            return -1;
530
0
        }
531
5.03k
        if (schema.index_size() > 0) {
532
4.01k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
533
4.01k
            if (schema.has_inverted_index_storage_format()) {
534
4.00k
                index_format = schema.inverted_index_storage_format();
535
4.00k
            }
536
4.01k
            res.first = index_format;
537
4.01k
            res.second.reserve(schema.index_size());
538
10.0k
            for (auto& i : schema.index()) {
539
10.0k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
540
10.0k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
541
10.0k
                }
542
10.0k
            }
543
4.01k
        }
544
5.03k
        insert(index_id, schema_version, res);
545
5.03k
        return 0;
546
5.03k
    }
547
548
    // Empty `ids` means this schema has no inverted index
549
5.03k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
550
5.03k
        if (index_info.second.empty()) {
551
1.01k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
552
1.01k
            std::lock_guard lock(mtx_);
553
1.01k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
554
4.01k
        } else {
555
4.01k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
556
4.01k
            std::lock_guard lock(mtx_);
557
4.01k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
558
4.01k
        }
559
5.03k
    }
560
561
private:
562
    std::string instance_id_;
563
    std::shared_ptr<TxnKv> txn_kv_;
564
565
    std::mutex mtx_;
566
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
567
    struct HashOfKey {
568
56.4k
        size_t operator()(const Key& key) const {
569
56.4k
            size_t seed = 0;
570
56.4k
            seed = std::hash<int64_t> {}(key.first);
571
56.4k
            seed = std::hash<int32_t> {}(key.second);
572
56.4k
            return seed;
573
56.4k
        }
574
    };
575
    // <index_id, schema_version> -> inverted_index_ids
576
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
577
    // Store <index_id, schema_version> of schema which doesn't have inverted index
578
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
579
};
580
581
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
582
                                   RecyclerThreadPoolGroup thread_pool_group,
583
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
584
        : txn_kv_(std::move(txn_kv)),
585
          instance_id_(instance.instance_id()),
586
          instance_info_(instance),
587
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
588
          _thread_pool_group(std::move(thread_pool_group)),
589
115
          txn_lazy_committer_(std::move(txn_lazy_committer)) {
590
115
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
591
592
    // Since the recycler's resource manager could not be notified when instance info changes,
593
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
594
115
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
595
115
};
596
597
115
InstanceRecycler::~InstanceRecycler() = default;
598
599
99
int InstanceRecycler::init_obj_store_accessors() {
600
99
    for (const auto& obj_info : instance_info_.obj_info()) {
601
68
#ifdef UNIT_TEST
602
68
        auto accessor = std::make_shared<MockAccessor>();
603
#else
604
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
605
        if (!s3_conf) {
606
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
607
            return -1;
608
        }
609
610
        std::shared_ptr<S3Accessor> accessor;
611
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
612
        if (ret != 0) {
613
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
614
                         << " resource_id=" << obj_info.id();
615
            return ret;
616
        }
617
#endif
618
68
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
619
68
    }
620
621
99
    return 0;
622
99
}
623
624
99
int InstanceRecycler::init_storage_vault_accessors() {
625
99
    if (instance_info_.resource_ids().empty()) {
626
92
        return 0;
627
92
    }
628
629
7
    FullRangeGetOptions opts(txn_kv_);
630
7
    opts.prefetch = true;
631
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
632
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
633
634
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
635
18
        auto [k, v] = *kv;
636
18
        StorageVaultPB vault;
637
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
638
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
639
0
            return -1;
640
0
        }
641
18
        std::string recycler_storage_vault_white_list = accumulate(
642
18
                config::recycler_storage_vault_white_list.begin(),
643
18
                config::recycler_storage_vault_white_list.end(), std::string(),
644
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
644
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
645
18
        LOG_INFO("config::recycler_storage_vault_white_list")
646
18
                .tag("", recycler_storage_vault_white_list);
647
18
        if (!config::recycler_storage_vault_white_list.empty()) {
648
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
649
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
650
8
                it == config::recycler_storage_vault_white_list.end()) {
651
2
                LOG_WARNING(
652
2
                        "failed to init accessor for vault because this vault is not in "
653
2
                        "config::recycler_storage_vault_white_list. ")
654
2
                        .tag(" vault name:", vault.name())
655
2
                        .tag(" config::recycler_storage_vault_white_list:",
656
2
                             recycler_storage_vault_white_list);
657
2
                continue;
658
2
            }
659
8
        }
660
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
661
16
                                 &accessor_map_, &vault);
662
16
        if (vault.has_hdfs_info()) {
663
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
664
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
665
9
            int ret = accessor->init();
666
9
            if (ret != 0) {
667
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
668
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
669
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
670
4
                continue;
671
4
            }
672
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
673
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
674
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
675
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
676
#else
677
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
678
                       << "but HDFS storage vaults were detected";
679
#endif
680
7
        } else if (vault.has_obj_info()) {
681
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
682
7
            if (!s3_conf) {
683
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
684
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
685
1
                continue;
686
1
            }
687
688
6
            std::shared_ptr<S3Accessor> accessor;
689
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
690
6
            if (ret != 0) {
691
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
692
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
693
0
                             << " ret=" << ret
694
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
695
0
                continue;
696
0
            }
697
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
698
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
699
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
700
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
701
6
        }
702
16
    }
703
704
7
    if (!it->is_valid()) {
705
0
        LOG_WARNING("failed to get storage vault kv");
706
0
        return -1;
707
0
    }
708
709
7
    if (accessor_map_.empty()) {
710
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
711
1
        return -2;
712
1
    }
713
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
714
6
             instance_id_);
715
716
6
    return 0;
717
7
}
718
719
99
int InstanceRecycler::init() {
720
99
    int ret = init_obj_store_accessors();
721
99
    if (ret != 0) {
722
0
        return ret;
723
0
    }
724
725
99
    return init_storage_vault_accessors();
726
99
}
727
728
template <typename... Func>
729
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
120
    return [funcs...]() {
731
120
        return [](std::initializer_list<int> ret_vals) {
732
120
            int i = 0;
733
140
            for (int ret : ret_vals) {
734
140
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
140
            }
738
120
            return i;
739
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
20
            for (int ret : ret_vals) {
734
20
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
20
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
20
            for (int ret : ret_vals) {
734
20
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
20
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
0
                    i = ret;
736
0
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
729
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
730
10
    return [funcs...]() {
731
10
        return [](std::initializer_list<int> ret_vals) {
732
10
            int i = 0;
733
10
            for (int ret : ret_vals) {
734
10
                if (ret != 0) {
735
10
                    i = ret;
736
10
                }
737
10
            }
738
10
            return i;
739
10
        }({funcs()...});
740
10
    };
741
10
}
742
743
10
int InstanceRecycler::do_recycle() {
744
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
745
10
    tablet_metrics_context_.reset();
746
10
    segment_metrics_context_.reset();
747
10
    DORIS_CLOUD_DEFER {
748
10
        tablet_metrics_context_.finish_report();
749
10
        segment_metrics_context_.finish_report();
750
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
747
10
    DORIS_CLOUD_DEFER {
748
10
        tablet_metrics_context_.finish_report();
749
10
        segment_metrics_context_.finish_report();
750
10
    };
751
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
752
0
        int res = recycle_cluster_snapshots();
753
0
        if (res != 0) {
754
0
            return -1;
755
0
        }
756
0
        return recycle_deleted_instance();
757
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
758
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
759
10
                                        fmt::format("instance id {}", instance_id_),
760
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
760
120
                                        [](int r) { return r != 0; });
761
10
        sync_executor
762
10
                .add(task_wrapper(
763
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
763
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
764
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
764
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
765
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
766
                                   // becase they may both recycle the same set of tablets
767
                        // recycle dropped table or idexes(mv, rollup)
768
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
768
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
769
                        // recycle dropped partitions
770
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
770
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
771
10
                .add(task_wrapper(
772
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
772
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
773
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
773
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
774
10
                .add(task_wrapper(
775
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
775
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
776
10
                .add(task_wrapper(
777
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
777
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
778
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
778
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
779
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
779
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
780
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
780
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
781
10
                .add(task_wrapper(
782
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
782
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
783
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
783
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
784
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
784
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
785
10
        bool finished = true;
786
10
        std::vector<int> rets = sync_executor.when_all(&finished);
787
120
        for (int ret : rets) {
788
120
            if (ret != 0) {
789
0
                return ret;
790
0
            }
791
120
        }
792
10
        return finished ? 0 : -1;
793
10
    } else {
794
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
795
0
                     << " instance_id=" << instance_id_;
796
0
        return -1;
797
0
    }
798
10
}
799
800
/**
801
* 1. delete all remote data
802
* 2. delete all kv
803
* 3. remove instance kv
804
*/
805
4
int InstanceRecycler::recycle_deleted_instance() {
806
4
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
807
808
4
    int ret = 0;
809
4
    auto start_time = steady_clock::now();
810
811
4
    DORIS_CLOUD_DEFER {
812
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
813
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
814
4
                     << " recycle deleted instance, cost=" << cost
815
4
                     << "s, instance_id=" << instance_id_;
816
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
811
4
    DORIS_CLOUD_DEFER {
812
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
813
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
814
4
                     << " recycle deleted instance, cost=" << cost
815
4
                     << "s, instance_id=" << instance_id_;
816
4
    };
817
818
4
    bool has_snapshots = false;
819
4
    if (has_cluster_snapshots(&has_snapshots) != 0) {
820
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
821
0
        return -1;
822
4
    } else if (has_snapshots) {
823
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
824
1
        return 0;
825
1
    }
826
827
3
    if (recycle_operation_logs() != 0) {
828
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
829
0
        return -1;
830
0
    }
831
832
3
    if (recycle_versioned_rowsets() != 0) {
833
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
834
0
        return -1;
835
0
    }
836
837
3
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
838
3
                            instance_info().snapshot_switch_status() !=
839
0
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
840
3
    if (snapshot_enabled) {
841
0
        bool has_unrecycled_rowsets = false;
842
0
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
843
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
844
0
            return -1;
845
0
        } else if (has_unrecycled_rowsets) {
846
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
847
0
                    .tag("instance_id", instance_id_);
848
0
            return ret;
849
0
        }
850
3
    } else { // delete all remote data if snapshot is disabled
851
3
        for (auto& [_, accessor] : accessor_map_) {
852
3
            if (stopped()) {
853
0
                return ret;
854
0
            }
855
856
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
857
3
            int del_ret = accessor->delete_all();
858
3
            if (del_ret == 0) {
859
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
860
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
861
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
862
                // so the recycling has been successful.
863
0
                ret = -1;
864
0
            }
865
3
        }
866
867
3
        if (ret != 0) {
868
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
869
0
            return ret;
870
0
        }
871
3
    }
872
873
    // delete all kv
874
3
    std::unique_ptr<Transaction> txn;
875
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
876
3
    if (err != TxnErrorCode::TXN_OK) {
877
0
        LOG(WARNING) << "failed to create txn";
878
0
        ret = -1;
879
0
        return -1;
880
0
    }
881
3
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
882
    // delete kv before deleting objects to prevent the checker from misjudging data loss
883
3
    std::string start_txn_key = txn_key_prefix(instance_id_);
884
3
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
885
3
    txn->remove(start_txn_key, end_txn_key);
886
3
    std::string start_version_key = version_key_prefix(instance_id_);
887
3
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
888
3
    txn->remove(start_version_key, end_version_key);
889
3
    std::string start_meta_key = meta_key_prefix(instance_id_);
890
3
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
891
3
    txn->remove(start_meta_key, end_meta_key);
892
3
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
893
3
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
894
3
    txn->remove(start_recycle_key, end_recycle_key);
895
3
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
896
3
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
897
3
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
898
3
    std::string start_copy_key = copy_key_prefix(instance_id_);
899
3
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
900
3
    txn->remove(start_copy_key, end_copy_key);
901
    // should not remove job key range, because we need to reserve job recycle kv
902
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
903
3
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
904
3
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
905
3
    txn->remove(start_job_tablet_key, end_job_tablet_key);
906
3
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
907
3
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
908
3
    std::string start_vault_key = storage_vault_key(key_info0);
909
3
    std::string end_vault_key = storage_vault_key(key_info1);
910
3
    txn->remove(start_vault_key, end_vault_key);
911
3
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
912
3
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
913
3
    txn->remove(versioned_version_key_start, versioned_version_key_end);
914
3
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
915
3
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
916
3
    txn->remove(versioned_index_key_start, versioned_index_key_end);
917
3
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
918
3
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
919
3
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
920
3
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
921
3
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
922
3
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
923
3
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
924
3
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
925
3
    txn->remove(versioned_data_key_start, versioned_data_key_end);
926
3
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
927
3
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
928
3
    txn->remove(versioned_log_key_start, versioned_log_key_end);
929
3
    err = txn->commit();
930
3
    if (err != TxnErrorCode::TXN_OK) {
931
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
932
0
        ret = -1;
933
0
    }
934
935
3
    if (ret == 0) {
936
        // remove instance kv
937
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
938
3
        err = txn_kv_->create_txn(&txn);
939
3
        if (err != TxnErrorCode::TXN_OK) {
940
0
            LOG(WARNING) << "failed to create txn";
941
0
            ret = -1;
942
0
            return ret;
943
0
        }
944
3
        std::string key;
945
3
        instance_key({instance_id_}, &key);
946
3
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
947
3
        txn->remove(key);
948
3
        err = txn->commit();
949
3
        if (err != TxnErrorCode::TXN_OK) {
950
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
951
0
                         << " err=" << err;
952
0
            ret = -1;
953
0
        }
954
3
    }
955
3
    return ret;
956
3
}
957
958
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
959
9
                                          bool* exists, PackedFileRecycleStats* stats) {
960
9
    if (exists == nullptr) {
961
0
        return -1;
962
0
    }
963
9
    *exists = false;
964
965
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
966
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
967
9
    std::string scan_begin = begin;
968
969
9
    while (true) {
970
9
        std::unique_ptr<RangeGetIterator> it_range;
971
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
972
9
        if (get_ret < 0) {
973
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
974
0
                    .tag("instance_id", instance_id_)
975
0
                    .tag("tablet_id", tablet_id)
976
0
                    .tag("ret", get_ret);
977
0
            return -1;
978
0
        }
979
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
980
6
            return 0;
981
6
        }
982
983
3
        std::string last_key;
984
3
        while (it_range->has_next()) {
985
3
            auto [k, v] = it_range->next();
986
3
            last_key.assign(k.data(), k.size());
987
3
            doris::RowsetMetaCloudPB rowset_meta;
988
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
989
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
990
0
                        .tag("instance_id", instance_id_)
991
0
                        .tag("tablet_id", tablet_id)
992
0
                        .tag("key", hex(k));
993
0
                continue;
994
0
            }
995
3
            if (stats) {
996
3
                ++stats->rowset_scan_count;
997
3
            }
998
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
999
3
                *exists = true;
1000
3
                return 0;
1001
3
            }
1002
3
        }
1003
1004
0
        if (!it_range->more()) {
1005
0
            return 0;
1006
0
        }
1007
1008
        // Continue scanning from the next key to keep each transaction short.
1009
0
        scan_begin = std::move(last_key);
1010
0
        scan_begin.push_back('\x00');
1011
0
    }
1012
9
}
1013
1014
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
1015
                                                          const std::string& rowset_id,
1016
                                                          int64_t txn_id, bool* recycle_exists,
1017
11
                                                          bool* tmp_exists) {
1018
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
1019
0
        return -1;
1020
0
    }
1021
11
    *recycle_exists = false;
1022
11
    *tmp_exists = false;
1023
1024
11
    if (txn_id <= 0) {
1025
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
1026
0
                .tag("instance_id", instance_id_)
1027
0
                .tag("tablet_id", tablet_id)
1028
0
                .tag("rowset_id", rowset_id)
1029
0
                .tag("txn_id", txn_id);
1030
0
        return -1;
1031
0
    }
1032
1033
11
    std::unique_ptr<Transaction> txn;
1034
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1035
11
    if (err != TxnErrorCode::TXN_OK) {
1036
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
1037
0
                .tag("instance_id", instance_id_)
1038
0
                .tag("tablet_id", tablet_id)
1039
0
                .tag("rowset_id", rowset_id)
1040
0
                .tag("txn_id", txn_id)
1041
0
                .tag("err", err);
1042
0
        return -1;
1043
0
    }
1044
1045
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1046
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1047
11
    if (ret == TxnErrorCode::TXN_OK) {
1048
1
        *recycle_exists = true;
1049
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1050
0
        LOG_WARNING("failed to check recycle rowset existence")
1051
0
                .tag("instance_id", instance_id_)
1052
0
                .tag("tablet_id", tablet_id)
1053
0
                .tag("rowset_id", rowset_id)
1054
0
                .tag("key", hex(recycle_key))
1055
0
                .tag("err", ret);
1056
0
        return -1;
1057
0
    }
1058
1059
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1060
11
    ret = key_exists(txn.get(), tmp_key, true);
1061
11
    if (ret == TxnErrorCode::TXN_OK) {
1062
1
        *tmp_exists = true;
1063
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1064
0
        LOG_WARNING("failed to check tmp rowset existence")
1065
0
                .tag("instance_id", instance_id_)
1066
0
                .tag("tablet_id", tablet_id)
1067
0
                .tag("txn_id", txn_id)
1068
0
                .tag("key", hex(tmp_key))
1069
0
                .tag("err", ret);
1070
0
        return -1;
1071
0
    }
1072
1073
11
    return 0;
1074
11
}
1075
1076
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1077
8
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1078
8
    if (!hint.empty()) {
1079
8
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1080
8
            return {hint, it->second};
1081
8
        }
1082
8
    }
1083
1084
0
    return {"", nullptr};
1085
8
}
1086
1087
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1088
                                               const std::string& packed_file_path,
1089
3
                                               PackedFileRecycleStats* stats) {
1090
3
    bool local_changed = false;
1091
3
    int64_t left_num = 0;
1092
3
    int64_t left_bytes = 0;
1093
3
    bool all_small_files_confirmed = true;
1094
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1095
1096
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1097
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1098
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1099
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1100
14
        LOG_INFO("packed slice correction status")
1101
14
                .tag("instance_id", instance_id_)
1102
14
                .tag("packed_file_path", packed_file_path)
1103
14
                .tag("small_file_path", file.path())
1104
14
                .tag("tablet_id", tablet_id)
1105
14
                .tag("rowset_id", rowset_id)
1106
14
                .tag("txn_id", txn_id)
1107
14
                .tag("size", file.size())
1108
14
                .tag("deleted", file.deleted())
1109
14
                .tag("corrected", file.corrected())
1110
14
                .tag("confirmed_this_round", confirmed_this_round);
1111
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1096
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1097
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1098
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1099
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1100
14
        LOG_INFO("packed slice correction status")
1101
14
                .tag("instance_id", instance_id_)
1102
14
                .tag("packed_file_path", packed_file_path)
1103
14
                .tag("small_file_path", file.path())
1104
14
                .tag("tablet_id", tablet_id)
1105
14
                .tag("rowset_id", rowset_id)
1106
14
                .tag("txn_id", txn_id)
1107
14
                .tag("size", file.size())
1108
14
                .tag("deleted", file.deleted())
1109
14
                .tag("corrected", file.corrected())
1110
14
                .tag("confirmed_this_round", confirmed_this_round);
1111
14
    };
1112
1113
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1114
14
        auto* small_file = packed_info->mutable_slices(i);
1115
14
        if (small_file->deleted()) {
1116
3
            log_small_file_status(*small_file, small_file->corrected());
1117
3
            continue;
1118
3
        }
1119
1120
11
        if (small_file->corrected()) {
1121
0
            left_num++;
1122
0
            left_bytes += small_file->size();
1123
0
            log_small_file_status(*small_file, true);
1124
0
            continue;
1125
0
        }
1126
1127
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1128
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1129
0
                    .tag("instance_id", instance_id_)
1130
0
                    .tag("small_file_path", small_file->path())
1131
0
                    .tag("index", i);
1132
0
            return -1;
1133
0
        }
1134
1135
11
        int64_t tablet_id = small_file->tablet_id();
1136
11
        const std::string& rowset_id = small_file->rowset_id();
1137
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1138
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1139
0
                    .tag("instance_id", instance_id_)
1140
0
                    .tag("small_file_path", small_file->path())
1141
0
                    .tag("index", i)
1142
0
                    .tag("tablet_id", tablet_id)
1143
0
                    .tag("rowset_id", rowset_id)
1144
0
                    .tag("has_txn_id", small_file->has_txn_id())
1145
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1146
0
            return -1;
1147
0
        }
1148
11
        int64_t txn_id = small_file->txn_id();
1149
11
        bool recycle_exists = false;
1150
11
        bool tmp_exists = false;
1151
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1152
11
                                                &tmp_exists) != 0) {
1153
0
            return -1;
1154
0
        }
1155
1156
11
        bool small_file_confirmed = false;
1157
11
        if (tmp_exists) {
1158
1
            left_num++;
1159
1
            left_bytes += small_file->size();
1160
1
            small_file_confirmed = true;
1161
10
        } else if (recycle_exists) {
1162
1
            left_num++;
1163
1
            left_bytes += small_file->size();
1164
            // keep small_file_confirmed=false so the packed file remains uncorrected
1165
9
        } else {
1166
9
            bool rowset_exists = false;
1167
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1168
0
                return -1;
1169
0
            }
1170
1171
9
            if (!rowset_exists) {
1172
6
                if (!small_file->deleted()) {
1173
6
                    small_file->set_deleted(true);
1174
6
                    local_changed = true;
1175
6
                }
1176
6
                if (!small_file->corrected()) {
1177
6
                    small_file->set_corrected(true);
1178
6
                    local_changed = true;
1179
6
                }
1180
6
                small_file_confirmed = true;
1181
6
            } else {
1182
3
                left_num++;
1183
3
                left_bytes += small_file->size();
1184
3
                small_file_confirmed = true;
1185
3
            }
1186
9
        }
1187
1188
11
        if (!small_file_confirmed) {
1189
1
            all_small_files_confirmed = false;
1190
1
        }
1191
1192
11
        if (small_file->corrected() != small_file_confirmed) {
1193
4
            small_file->set_corrected(small_file_confirmed);
1194
4
            local_changed = true;
1195
4
        }
1196
1197
11
        log_small_file_status(*small_file, small_file_confirmed);
1198
11
    }
1199
1200
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1201
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1202
3
        local_changed = true;
1203
3
    }
1204
3
    if (packed_info->ref_cnt() != left_num) {
1205
3
        auto old_ref_cnt = packed_info->ref_cnt();
1206
3
        packed_info->set_ref_cnt(left_num);
1207
3
        LOG_INFO("corrected packed file ref count")
1208
3
                .tag("instance_id", instance_id_)
1209
3
                .tag("resource_id", packed_info->resource_id())
1210
3
                .tag("packed_file_path", packed_file_path)
1211
3
                .tag("old_ref_cnt", old_ref_cnt)
1212
3
                .tag("new_ref_cnt", left_num);
1213
3
        local_changed = true;
1214
3
    }
1215
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1216
2
        packed_info->set_corrected(all_small_files_confirmed);
1217
2
        local_changed = true;
1218
2
    }
1219
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1220
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1221
1
        local_changed = true;
1222
1
    }
1223
1224
3
    if (changed != nullptr) {
1225
3
        *changed = local_changed;
1226
3
    }
1227
3
    return 0;
1228
3
}
1229
1230
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1231
                                                 const std::string& packed_file_path,
1232
4
                                                 PackedFileRecycleStats* stats) {
1233
4
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
1234
4
    bool correction_ok = false;
1235
4
    cloud::PackedFileInfoPB packed_info;
1236
1237
4
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
1238
4
        if (stopped()) {
1239
0
            LOG_WARNING("recycler stopped before processing packed file")
1240
0
                    .tag("instance_id", instance_id_)
1241
0
                    .tag("packed_file_path", packed_file_path)
1242
0
                    .tag("attempt", attempt);
1243
0
            return -1;
1244
0
        }
1245
1246
4
        std::unique_ptr<Transaction> txn;
1247
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1248
4
        if (err != TxnErrorCode::TXN_OK) {
1249
0
            LOG_WARNING("failed to create txn when processing packed file")
1250
0
                    .tag("instance_id", instance_id_)
1251
0
                    .tag("packed_file_path", packed_file_path)
1252
0
                    .tag("attempt", attempt)
1253
0
                    .tag("err", err);
1254
0
            return -1;
1255
0
        }
1256
1257
4
        std::string packed_val;
1258
4
        err = txn->get(packed_key, &packed_val);
1259
4
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1260
0
            return 0;
1261
0
        }
1262
4
        if (err != TxnErrorCode::TXN_OK) {
1263
0
            LOG_WARNING("failed to get packed file kv")
1264
0
                    .tag("instance_id", instance_id_)
1265
0
                    .tag("packed_file_path", packed_file_path)
1266
0
                    .tag("attempt", attempt)
1267
0
                    .tag("err", err);
1268
0
            return -1;
1269
0
        }
1270
1271
4
        if (!packed_info.ParseFromString(packed_val)) {
1272
0
            LOG_WARNING("failed to parse packed file info")
1273
0
                    .tag("instance_id", instance_id_)
1274
0
                    .tag("packed_file_path", packed_file_path)
1275
0
                    .tag("attempt", attempt);
1276
0
            return -1;
1277
0
        }
1278
1279
4
        int64_t now_sec = ::time(nullptr);
1280
4
        bool corrected = packed_info.corrected();
1281
4
        bool due = config::force_immediate_recycle ||
1282
4
                   now_sec - packed_info.created_at_sec() >=
1283
4
                           config::packed_file_correction_delay_seconds;
1284
1285
4
        if (!corrected && due) {
1286
3
            bool changed = false;
1287
3
            if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1288
0
                LOG_WARNING("correct_packed_file_info failed")
1289
0
                        .tag("instance_id", instance_id_)
1290
0
                        .tag("packed_file_path", packed_file_path)
1291
0
                        .tag("attempt", attempt);
1292
0
                return -1;
1293
0
            }
1294
3
            if (changed) {
1295
3
                std::string updated;
1296
3
                if (!packed_info.SerializeToString(&updated)) {
1297
0
                    LOG_WARNING("failed to serialize packed file info after correction")
1298
0
                            .tag("instance_id", instance_id_)
1299
0
                            .tag("packed_file_path", packed_file_path)
1300
0
                            .tag("attempt", attempt);
1301
0
                    return -1;
1302
0
                }
1303
3
                txn->put(packed_key, updated);
1304
3
                err = txn->commit();
1305
3
                if (err == TxnErrorCode::TXN_OK) {
1306
3
                    if (stats) {
1307
3
                        ++stats->num_corrected;
1308
3
                    }
1309
3
                } else {
1310
0
                    if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) {
1311
0
                        LOG_WARNING(
1312
0
                                "failed to commit correction for packed file due to conflict, "
1313
0
                                "retrying")
1314
0
                                .tag("instance_id", instance_id_)
1315
0
                                .tag("packed_file_path", packed_file_path)
1316
0
                                .tag("attempt", attempt);
1317
0
                        sleep_for_packed_file_retry();
1318
0
                        packed_info.Clear();
1319
0
                        continue;
1320
0
                    }
1321
0
                    LOG_WARNING("failed to commit correction for packed file")
1322
0
                            .tag("instance_id", instance_id_)
1323
0
                            .tag("packed_file_path", packed_file_path)
1324
0
                            .tag("attempt", attempt)
1325
0
                            .tag("err", err);
1326
0
                    return -1;
1327
0
                }
1328
3
            }
1329
3
        }
1330
1331
4
        correction_ok = true;
1332
4
        break;
1333
4
    }
1334
1335
4
    if (!correction_ok) {
1336
0
        return -1;
1337
0
    }
1338
1339
4
    if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1340
4
          packed_info.ref_cnt() == 0)) {
1341
3
        return 0;
1342
3
    }
1343
1344
1
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1345
0
        LOG_WARNING("packed file missing resource id when recycling")
1346
0
                .tag("instance_id", instance_id_)
1347
0
                .tag("packed_file_path", packed_file_path);
1348
0
        return -1;
1349
0
    }
1350
1
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1351
1
    if (!accessor) {
1352
0
        LOG_WARNING("no accessor available to delete packed file")
1353
0
                .tag("instance_id", instance_id_)
1354
0
                .tag("packed_file_path", packed_file_path)
1355
0
                .tag("resource_id", packed_info.resource_id());
1356
0
        return -1;
1357
0
    }
1358
1
    int del_ret = accessor->delete_file(packed_file_path);
1359
1
    if (del_ret != 0 && del_ret != 1) {
1360
0
        LOG_WARNING("failed to delete packed file")
1361
0
                .tag("instance_id", instance_id_)
1362
0
                .tag("packed_file_path", packed_file_path)
1363
0
                .tag("resource_id", resource_id)
1364
0
                .tag("ret", del_ret);
1365
0
        return -1;
1366
0
    }
1367
1
    if (del_ret == 1) {
1368
0
        LOG_INFO("packed file already removed")
1369
0
                .tag("instance_id", instance_id_)
1370
0
                .tag("packed_file_path", packed_file_path)
1371
0
                .tag("resource_id", resource_id);
1372
1
    } else {
1373
1
        LOG_INFO("deleted packed file")
1374
1
                .tag("instance_id", instance_id_)
1375
1
                .tag("packed_file_path", packed_file_path)
1376
1
                .tag("resource_id", resource_id);
1377
1
    }
1378
1379
1
    for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) {
1380
1
        std::unique_ptr<Transaction> del_txn;
1381
1
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
1382
1
        if (err != TxnErrorCode::TXN_OK) {
1383
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1384
0
                    .tag("instance_id", instance_id_)
1385
0
                    .tag("packed_file_path", packed_file_path)
1386
0
                    .tag("del_attempt", del_attempt)
1387
0
                    .tag("err", err);
1388
0
            return -1;
1389
0
        }
1390
1391
1
        std::string latest_val;
1392
1
        err = del_txn->get(packed_key, &latest_val);
1393
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1394
0
            return 0;
1395
0
        }
1396
1
        if (err != TxnErrorCode::TXN_OK) {
1397
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1398
0
                    .tag("instance_id", instance_id_)
1399
0
                    .tag("packed_file_path", packed_file_path)
1400
0
                    .tag("del_attempt", del_attempt)
1401
0
                    .tag("err", err);
1402
0
            return -1;
1403
0
        }
1404
1405
1
        cloud::PackedFileInfoPB latest_info;
1406
1
        if (!latest_info.ParseFromString(latest_val)) {
1407
0
            LOG_WARNING("failed to parse packed file info before removal")
1408
0
                    .tag("instance_id", instance_id_)
1409
0
                    .tag("packed_file_path", packed_file_path)
1410
0
                    .tag("del_attempt", del_attempt);
1411
0
            return -1;
1412
0
        }
1413
1414
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1415
1
              latest_info.ref_cnt() == 0)) {
1416
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1417
0
                    .tag("instance_id", instance_id_)
1418
0
                    .tag("packed_file_path", packed_file_path)
1419
0
                    .tag("del_attempt", del_attempt);
1420
0
            return 0;
1421
0
        }
1422
1423
1
        del_txn->remove(packed_key);
1424
1
        err = del_txn->commit();
1425
1
        if (err == TxnErrorCode::TXN_OK) {
1426
1
            if (stats) {
1427
1
                ++stats->num_deleted;
1428
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1429
1
                                        static_cast<int64_t>(latest_val.size());
1430
1
                if (del_ret == 0 || del_ret == 1) {
1431
1
                    ++stats->num_object_deleted;
1432
1
                    int64_t object_size = latest_info.total_slice_bytes();
1433
1
                    if (object_size <= 0) {
1434
0
                        object_size = packed_info.total_slice_bytes();
1435
0
                    }
1436
1
                    stats->bytes_object_deleted += object_size;
1437
1
                }
1438
1
            }
1439
1
            LOG_INFO("removed packed file metadata")
1440
1
                    .tag("instance_id", instance_id_)
1441
1
                    .tag("packed_file_path", packed_file_path);
1442
1
            return 0;
1443
1
        }
1444
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1445
0
            if (del_attempt >= max_retry_times) {
1446
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
1447
0
                        .tag("instance_id", instance_id_)
1448
0
                        .tag("packed_file_path", packed_file_path)
1449
0
                        .tag("del_attempt", del_attempt);
1450
0
                return -1;
1451
0
            }
1452
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
1453
0
                    .tag("instance_id", instance_id_)
1454
0
                    .tag("packed_file_path", packed_file_path)
1455
0
                    .tag("del_attempt", del_attempt);
1456
0
            sleep_for_packed_file_retry();
1457
0
            continue;
1458
0
        }
1459
0
        LOG_WARNING("failed to remove packed file kv")
1460
0
                .tag("instance_id", instance_id_)
1461
0
                .tag("packed_file_path", packed_file_path)
1462
0
                .tag("del_attempt", del_attempt)
1463
0
                .tag("err", err);
1464
0
        return -1;
1465
0
    }
1466
1467
0
    return -1;
1468
1
}
1469
1470
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/,
1471
4
                                            PackedFileRecycleStats* stats, int* ret) {
1472
4
    if (stats) {
1473
4
        ++stats->num_scanned;
1474
4
    }
1475
4
    std::string packed_file_path;
1476
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1477
0
        LOG_WARNING("failed to decode packed file key")
1478
0
                .tag("instance_id", instance_id_)
1479
0
                .tag("key", hex(key));
1480
0
        if (stats) {
1481
0
            ++stats->num_failed;
1482
0
        }
1483
0
        if (ret) {
1484
0
            *ret = -1;
1485
0
        }
1486
0
        return 0;
1487
0
    }
1488
1489
4
    std::string packed_key(key);
1490
4
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1491
4
    if (process_ret != 0) {
1492
0
        if (stats) {
1493
0
            ++stats->num_failed;
1494
0
        }
1495
0
        if (ret) {
1496
0
            *ret = -1;
1497
0
        }
1498
0
    }
1499
4
    return 0;
1500
4
}
1501
bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id,
1502
57.0k
                     int64_t txn_id) {
1503
57.0k
    std::unique_ptr<Transaction> txn;
1504
57.0k
    TxnErrorCode err = txn_kv->create_txn(&txn);
1505
57.0k
    if (err != TxnErrorCode::TXN_OK) {
1506
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
1507
0
        return false;
1508
0
    }
1509
1510
57.0k
    std::string index_val;
1511
57.0k
    const std::string index_key = txn_index_key({instance_id, txn_id});
1512
57.0k
    err = txn->get(index_key, &index_val);
1513
57.0k
    if (err != TxnErrorCode::TXN_OK) {
1514
53.0k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1515
53.0k
            TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled");
1516
            // txn has been recycled;
1517
53.0k
            LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id
1518
53.0k
                      << " instance_id=" << instance_id;
1519
53.0k
            return true;
1520
53.0k
        }
1521
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
1522
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
1523
0
                     << " err=" << err;
1524
0
        return false;
1525
53.0k
    }
1526
1527
4.00k
    TxnIndexPB index_pb;
1528
4.00k
    if (!index_pb.ParseFromString(index_val)) {
1529
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
1530
0
                     << " instance_id=" << instance_id;
1531
0
        return false;
1532
0
    }
1533
1534
4.00k
    DCHECK(index_pb.has_tablet_index() == true);
1535
4.00k
    if (!index_pb.tablet_index().has_db_id()) {
1536
        // In the previous version, the db_id was not set in the index_pb.
1537
        // If updating to the version which enable txn lazy commit, the db_id will be set.
1538
0
        LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id
1539
0
                  << " index=" << index_pb.ShortDebugString();
1540
0
        return true;
1541
0
    }
1542
1543
4.00k
    int64_t db_id = index_pb.tablet_index().db_id();
1544
4.00k
    DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id
1545
0
                        << " instance_id=" << instance_id;
1546
1547
4.00k
    std::string info_val;
1548
4.00k
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
1549
4.00k
    err = txn->get(info_key, &info_val);
1550
4.00k
    if (err != TxnErrorCode::TXN_OK) {
1551
0
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1552
            // txn info has been recycled;
1553
0
            LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id
1554
0
                      << " instance_id=" << instance_id;
1555
0
            return true;
1556
0
        }
1557
1558
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
1559
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
1560
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
1561
0
                     << " err=" << err;
1562
0
        return false;
1563
0
    }
1564
1565
4.00k
    TxnInfoPB txn_info;
1566
4.00k
    if (!txn_info.ParseFromString(info_val)) {
1567
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
1568
0
                     << " instance_id=" << instance_id;
1569
0
        return false;
1570
0
    }
1571
1572
4.00k
    DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id
1573
0
                                        << " txn_info=" << txn_info.ShortDebugString();
1574
1575
4.00k
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() ||
1576
4.00k
        TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) {
1577
2.00k
        TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info);
1578
2.00k
        return true;
1579
2.00k
    }
1580
1581
2.00k
    TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info);
1582
2.00k
    return false;
1583
4.00k
}
1584
1585
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1586
5.01k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1587
5.01k
    if (config::force_immediate_recycle) {
1588
8
        return 0L;
1589
8
    }
1590
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1591
5.00k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1592
5.00k
    int64_t retention_seconds = config::retention_seconds;
1593
5.00k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1594
3.90k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1595
3.90k
    }
1596
5.00k
    int64_t final_expiration = expiration + retention_seconds;
1597
5.00k
    if (*earlest_ts > final_expiration) {
1598
4
        *earlest_ts = final_expiration;
1599
4
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1600
4
    }
1601
5.00k
    return final_expiration;
1602
5.01k
}
1603
1604
int64_t calculate_partition_expired_time(
1605
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1606
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1607
9
    if (config::force_immediate_recycle) {
1608
3
        return 0L;
1609
3
    }
1610
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1611
6
                                                            : partition_meta_pb.creation_time();
1612
6
    int64_t retention_seconds = config::retention_seconds;
1613
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1614
6
        retention_seconds =
1615
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1616
6
    }
1617
6
    int64_t final_expiration = expiration + retention_seconds;
1618
6
    if (*earlest_ts > final_expiration) {
1619
2
        *earlest_ts = final_expiration;
1620
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1621
2
    }
1622
6
    return final_expiration;
1623
9
}
1624
1625
int64_t calculate_index_expired_time(const std::string& instance_id_,
1626
                                     const RecycleIndexPB& index_meta_pb,
1627
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1628
10
    if (config::force_immediate_recycle) {
1629
4
        return 0L;
1630
4
    }
1631
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1632
6
                                                        : index_meta_pb.creation_time();
1633
6
    int64_t retention_seconds = config::retention_seconds;
1634
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1635
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1636
6
    }
1637
6
    int64_t final_expiration = expiration + retention_seconds;
1638
6
    if (*earlest_ts > final_expiration) {
1639
2
        *earlest_ts = final_expiration;
1640
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1641
2
    }
1642
6
    return final_expiration;
1643
10
}
1644
1645
int64_t calculate_tmp_rowset_expired_time(
1646
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1647
57.0k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1648
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1649
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1650
    //  duration or timeout always < `retention_time` in practice.
1651
57.0k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1652
57.0k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1653
57.0k
                                 : tmp_rowset_meta_pb.creation_time();
1654
57.0k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1655
57.0k
    int64_t final_expiration = expiration + config::retention_seconds;
1656
57.0k
    if (*earlest_ts > final_expiration) {
1657
6
        *earlest_ts = final_expiration;
1658
6
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1659
6
    }
1660
57.0k
    return final_expiration;
1661
57.0k
}
1662
1663
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1664
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1665
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1666
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1667
8
        *earlest_ts = final_expiration / 1000;
1668
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1669
8
    }
1670
30.0k
    return final_expiration;
1671
30.0k
}
1672
1673
int64_t calculate_restore_job_expired_time(
1674
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1675
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1676
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1677
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1678
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1679
        // final state, recycle immediately
1680
41
        return 0L;
1681
41
    }
1682
    // not final state, wait much longer than the FE's timeout(1 day)
1683
0
    int64_t last_modified_s =
1684
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1685
0
    int64_t expiration = restore_job.expired_at_s() > 0
1686
0
                                 ? last_modified_s + restore_job.expired_at_s()
1687
0
                                 : last_modified_s;
1688
0
    int64_t final_expiration = expiration + config::retention_seconds;
1689
0
    if (*earlest_ts > final_expiration) {
1690
0
        *earlest_ts = final_expiration;
1691
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1692
0
    }
1693
0
    return final_expiration;
1694
41
}
1695
1696
int get_meta_rowset_key(Transaction* txn, const std::string& instance_id, int64_t tablet_id,
1697
                        const std::string& rowset_id, int64_t start_version, int64_t end_version,
1698
0
                        bool load_key, bool* exist) {
1699
0
    std::string key =
1700
0
            load_key ? versioned::meta_rowset_load_key({instance_id, tablet_id, end_version})
1701
0
                     : versioned::meta_rowset_compact_key({instance_id, tablet_id, end_version});
1702
0
    RowsetMetaCloudPB rowset_meta;
1703
0
    Versionstamp version;
1704
0
    TxnErrorCode err = versioned::document_get(txn, key, &rowset_meta, &version);
1705
0
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1706
0
        VLOG_DEBUG << "not found load or compact meta_rowset_key."
1707
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1708
0
                   << " end_version=" << end_version << " key=" << hex(key);
1709
0
    } else if (err != TxnErrorCode::TXN_OK) {
1710
0
        LOG_INFO("failed to get load or compact meta_rowset_key.")
1711
0
                .tag("rowset_id", rowset_id)
1712
0
                .tag("start_version", start_version)
1713
0
                .tag("end_version", end_version)
1714
0
                .tag("key", hex(key))
1715
0
                .tag("error_code", err);
1716
0
        return -1;
1717
0
    } else if (rowset_meta.rowset_id_v2() == rowset_id) {
1718
0
        *exist = true;
1719
0
        VLOG_DEBUG << "found load or compact meta_rowset_key."
1720
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1721
0
                   << " end_version=" << end_version << " key=" << hex(key);
1722
0
    } else {
1723
0
        VLOG_DEBUG << "rowset_id does not match when find load or compact meta_rowset_key."
1724
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1725
0
                   << " end_version=" << end_version << " key=" << hex(key)
1726
0
                   << " found_rowset_id=" << rowset_meta.rowset_id_v2();
1727
0
    }
1728
0
    return 0;
1729
0
}
1730
1731
0
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
1732
0
    const std::string task_name = "recycle_ref_rowsets";
1733
0
    int64_t num_scanned = 0;
1734
0
    int64_t num_recycled = 0;
1735
0
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1736
1737
0
    std::string data_rowset_ref_count_key_start =
1738
0
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
1739
0
    std::string data_rowset_ref_count_key_end =
1740
0
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
1741
1742
0
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
1743
1744
0
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1745
0
    register_recycle_task(task_name, start_time);
1746
1747
0
    DORIS_CLOUD_DEFER {
1748
0
        unregister_recycle_task(task_name);
1749
0
        int64_t cost =
1750
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1751
0
        metrics_context.finish_report();
1752
0
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
1753
0
                .tag("instance_id", instance_id_)
1754
0
                .tag("num_scanned", num_scanned)
1755
0
                .tag("num_recycled", num_recycled);
1756
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
1757
1758
0
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1759
0
        ++num_scanned;
1760
1761
0
        int64_t tablet_id;
1762
0
        std::string rowset_id;
1763
0
        std::string_view key(k);
1764
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
1765
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
1766
0
            return -1;
1767
0
        }
1768
1769
0
        std::unique_ptr<Transaction> txn;
1770
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1771
0
        if (err != TxnErrorCode::TXN_OK) {
1772
0
            return -1;
1773
0
        }
1774
1775
0
        int64_t ref_count;
1776
0
        if (!txn->decode_atomic_int(v, &ref_count)) {
1777
0
            LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(v));
1778
0
            return -1;
1779
0
        }
1780
0
        if (ref_count > 1) {
1781
0
            *has_unrecycled_rowsets = true;
1782
0
            LOG_INFO("skip recycle ref_count > 1 rowset")
1783
0
                    .tag("instance_id", instance_id_)
1784
0
                    .tag("tablet_id", tablet_id)
1785
0
                    .tag("rowset_id", rowset_id)
1786
0
                    .tag("ref_count", ref_count);
1787
0
            return 0;
1788
0
        }
1789
1790
0
        std::string meta_rowset_key =
1791
0
                versioned::meta_rowset_key({instance_id_, tablet_id, rowset_id});
1792
0
        ValueBuf val_buf;
1793
0
        err = blob_get(txn.get(), meta_rowset_key, &val_buf);
1794
0
        if (err != TxnErrorCode::TXN_OK) {
1795
0
            LOG_WARNING("failed to get meta_rowset_key")
1796
0
                    .tag("instance_id", instance_id_)
1797
0
                    .tag("tablet_id", tablet_id)
1798
0
                    .tag("rowset_id", rowset_id)
1799
0
                    .tag("key", hex(meta_rowset_key))
1800
0
                    .tag("err", err);
1801
0
            return -1;
1802
0
        }
1803
0
        doris::RowsetMetaCloudPB rowset_meta;
1804
0
        if (!val_buf.to_pb(&rowset_meta)) {
1805
0
            LOG_WARNING("failed to parse RowsetMetaCloudPB")
1806
0
                    .tag("instance_id", instance_id_)
1807
0
                    .tag("tablet_id", tablet_id)
1808
0
                    .tag("rowset_id", rowset_id)
1809
0
                    .tag("key", hex(meta_rowset_key));
1810
0
            return -1;
1811
0
        }
1812
0
        int64_t start_version = rowset_meta.start_version();
1813
0
        int64_t end_version = rowset_meta.end_version();
1814
1815
        // Check if the meta_rowset_compact_key or meta_rowset_load_key exists:
1816
        // exists: means it's referenced by current instance, can recycle rowset;
1817
        // not exists: means it's referenced by other instances, cannot recycle;
1818
        //
1819
        // end_version = 1: the first rowset;
1820
        // end_version = 0: the rowset is committed by load, but not commit_txn;
1821
        // can recycle in these 2 situations
1822
0
        bool exist = false;
1823
0
        if (end_version > 1) {
1824
0
            if (start_version != end_version) {
1825
0
                if (get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
1826
0
                                        start_version, end_version, false, &exist) != 0) {
1827
0
                    return -1;
1828
0
                }
1829
0
            } else {
1830
0
                if (get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
1831
0
                                        start_version, end_version, true, &exist) != 0) {
1832
0
                    return -1;
1833
0
                }
1834
0
                if (!exist && get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
1835
0
                                                  start_version, end_version, false, &exist) != 0) {
1836
0
                    return -1;
1837
0
                }
1838
0
            }
1839
0
        }
1840
1841
0
        if (end_version > 1 && !exist) {
1842
0
            *has_unrecycled_rowsets = true;
1843
0
            LOG_INFO("skip recycle ref_count = 1 rowset")
1844
0
                    .tag("instance_id", instance_id_)
1845
0
                    .tag("tablet_id", tablet_id)
1846
0
                    .tag("rowset_id", rowset_id)
1847
0
                    .tag("start_version", start_version)
1848
0
                    .tag("end_version", end_version)
1849
0
                    .tag("ref_count", ref_count);
1850
0
            return 0;
1851
0
        }
1852
1853
0
        if (recycle_rowset_meta_and_data("", rowset_meta) != 0) {
1854
0
            LOG_WARNING("failed to recycle_rowset_meta_and_data")
1855
0
                    .tag("instance_id", instance_id_)
1856
0
                    .tag("tablet_id", tablet_id)
1857
0
                    .tag("rowset_id", rowset_id);
1858
0
            return -1;
1859
0
        }
1860
1861
0
        ++num_recycled;
1862
0
        return 0;
1863
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
1864
1865
    // recycle_func and loop_done for scan and recycle
1866
0
    return scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
1867
0
                            std::move(recycle_func));
1868
0
}
1869
1870
17
int InstanceRecycler::recycle_indexes() {
1871
17
    const std::string task_name = "recycle_indexes";
1872
17
    int64_t num_scanned = 0;
1873
17
    int64_t num_expired = 0;
1874
17
    int64_t num_recycled = 0;
1875
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1876
1877
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
1878
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
1879
17
    std::string index_key0;
1880
17
    std::string index_key1;
1881
17
    recycle_index_key(index_key_info0, &index_key0);
1882
17
    recycle_index_key(index_key_info1, &index_key1);
1883
1884
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
1885
1886
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1887
17
    register_recycle_task(task_name, start_time);
1888
1889
17
    DORIS_CLOUD_DEFER {
1890
17
        unregister_recycle_task(task_name);
1891
17
        int64_t cost =
1892
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1893
17
        metrics_context.finish_report();
1894
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1895
17
                .tag("instance_id", instance_id_)
1896
17
                .tag("num_scanned", num_scanned)
1897
17
                .tag("num_expired", num_expired)
1898
17
                .tag("num_recycled", num_recycled);
1899
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1889
2
    DORIS_CLOUD_DEFER {
1890
2
        unregister_recycle_task(task_name);
1891
2
        int64_t cost =
1892
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1893
2
        metrics_context.finish_report();
1894
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1895
2
                .tag("instance_id", instance_id_)
1896
2
                .tag("num_scanned", num_scanned)
1897
2
                .tag("num_expired", num_expired)
1898
2
                .tag("num_recycled", num_recycled);
1899
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1889
15
    DORIS_CLOUD_DEFER {
1890
15
        unregister_recycle_task(task_name);
1891
15
        int64_t cost =
1892
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1893
15
        metrics_context.finish_report();
1894
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1895
15
                .tag("instance_id", instance_id_)
1896
15
                .tag("num_scanned", num_scanned)
1897
15
                .tag("num_expired", num_expired)
1898
15
                .tag("num_recycled", num_recycled);
1899
15
    };
1900
1901
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1902
1903
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
1904
17
    std::vector<std::string_view> index_keys;
1905
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1906
10
        ++num_scanned;
1907
10
        RecycleIndexPB index_pb;
1908
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1909
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1910
0
            return -1;
1911
0
        }
1912
10
        int64_t current_time = ::time(nullptr);
1913
10
        if (current_time <
1914
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1915
0
            return 0;
1916
0
        }
1917
10
        ++num_expired;
1918
        // decode index_id
1919
10
        auto k1 = k;
1920
10
        k1.remove_prefix(1);
1921
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1922
10
        decode_key(&k1, &out);
1923
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1924
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1925
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1926
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1927
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1928
        // Change state to RECYCLING
1929
10
        std::unique_ptr<Transaction> txn;
1930
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1931
10
        if (err != TxnErrorCode::TXN_OK) {
1932
0
            LOG_WARNING("failed to create txn").tag("err", err);
1933
0
            return -1;
1934
0
        }
1935
10
        std::string val;
1936
10
        err = txn->get(k, &val);
1937
10
        if (err ==
1938
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1939
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1940
0
            return 0;
1941
0
        }
1942
10
        if (err != TxnErrorCode::TXN_OK) {
1943
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1944
0
            return -1;
1945
0
        }
1946
10
        index_pb.Clear();
1947
10
        if (!index_pb.ParseFromString(val)) {
1948
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1949
0
            return -1;
1950
0
        }
1951
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1952
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1953
9
            txn->put(k, index_pb.SerializeAsString());
1954
9
            err = txn->commit();
1955
9
            if (err != TxnErrorCode::TXN_OK) {
1956
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1957
0
                return -1;
1958
0
            }
1959
9
        }
1960
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1961
1
            LOG_WARNING("failed to recycle tablets under index")
1962
1
                    .tag("table_id", index_pb.table_id())
1963
1
                    .tag("instance_id", instance_id_)
1964
1
                    .tag("index_id", index_id);
1965
1
            return -1;
1966
1
        }
1967
1968
9
        if (index_pb.has_db_id()) {
1969
            // Recycle the versioned keys
1970
3
            std::unique_ptr<Transaction> txn;
1971
3
            err = txn_kv_->create_txn(&txn);
1972
3
            if (err != TxnErrorCode::TXN_OK) {
1973
0
                LOG_WARNING("failed to create txn").tag("err", err);
1974
0
                return -1;
1975
0
            }
1976
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1977
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1978
3
            std::string index_inverted_key = versioned::index_inverted_key(
1979
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1980
3
            versioned_remove_all(txn.get(), meta_key);
1981
3
            txn->remove(index_key);
1982
3
            txn->remove(index_inverted_key);
1983
3
            err = txn->commit();
1984
3
            if (err != TxnErrorCode::TXN_OK) {
1985
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1986
0
                return -1;
1987
0
            }
1988
3
        }
1989
1990
9
        metrics_context.total_recycled_num = ++num_recycled;
1991
9
        metrics_context.report();
1992
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1993
9
        index_keys.push_back(k);
1994
9
        return 0;
1995
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1905
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1906
2
        ++num_scanned;
1907
2
        RecycleIndexPB index_pb;
1908
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1909
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1910
0
            return -1;
1911
0
        }
1912
2
        int64_t current_time = ::time(nullptr);
1913
2
        if (current_time <
1914
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1915
0
            return 0;
1916
0
        }
1917
2
        ++num_expired;
1918
        // decode index_id
1919
2
        auto k1 = k;
1920
2
        k1.remove_prefix(1);
1921
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1922
2
        decode_key(&k1, &out);
1923
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1924
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1925
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1926
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1927
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1928
        // Change state to RECYCLING
1929
2
        std::unique_ptr<Transaction> txn;
1930
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1931
2
        if (err != TxnErrorCode::TXN_OK) {
1932
0
            LOG_WARNING("failed to create txn").tag("err", err);
1933
0
            return -1;
1934
0
        }
1935
2
        std::string val;
1936
2
        err = txn->get(k, &val);
1937
2
        if (err ==
1938
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1939
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1940
0
            return 0;
1941
0
        }
1942
2
        if (err != TxnErrorCode::TXN_OK) {
1943
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1944
0
            return -1;
1945
0
        }
1946
2
        index_pb.Clear();
1947
2
        if (!index_pb.ParseFromString(val)) {
1948
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1949
0
            return -1;
1950
0
        }
1951
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1952
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1953
1
            txn->put(k, index_pb.SerializeAsString());
1954
1
            err = txn->commit();
1955
1
            if (err != TxnErrorCode::TXN_OK) {
1956
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1957
0
                return -1;
1958
0
            }
1959
1
        }
1960
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1961
1
            LOG_WARNING("failed to recycle tablets under index")
1962
1
                    .tag("table_id", index_pb.table_id())
1963
1
                    .tag("instance_id", instance_id_)
1964
1
                    .tag("index_id", index_id);
1965
1
            return -1;
1966
1
        }
1967
1968
1
        if (index_pb.has_db_id()) {
1969
            // Recycle the versioned keys
1970
1
            std::unique_ptr<Transaction> txn;
1971
1
            err = txn_kv_->create_txn(&txn);
1972
1
            if (err != TxnErrorCode::TXN_OK) {
1973
0
                LOG_WARNING("failed to create txn").tag("err", err);
1974
0
                return -1;
1975
0
            }
1976
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1977
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1978
1
            std::string index_inverted_key = versioned::index_inverted_key(
1979
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1980
1
            versioned_remove_all(txn.get(), meta_key);
1981
1
            txn->remove(index_key);
1982
1
            txn->remove(index_inverted_key);
1983
1
            err = txn->commit();
1984
1
            if (err != TxnErrorCode::TXN_OK) {
1985
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1986
0
                return -1;
1987
0
            }
1988
1
        }
1989
1990
1
        metrics_context.total_recycled_num = ++num_recycled;
1991
1
        metrics_context.report();
1992
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1993
1
        index_keys.push_back(k);
1994
1
        return 0;
1995
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1905
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1906
8
        ++num_scanned;
1907
8
        RecycleIndexPB index_pb;
1908
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1909
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1910
0
            return -1;
1911
0
        }
1912
8
        int64_t current_time = ::time(nullptr);
1913
8
        if (current_time <
1914
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1915
0
            return 0;
1916
0
        }
1917
8
        ++num_expired;
1918
        // decode index_id
1919
8
        auto k1 = k;
1920
8
        k1.remove_prefix(1);
1921
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1922
8
        decode_key(&k1, &out);
1923
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1924
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1925
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1926
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1927
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1928
        // Change state to RECYCLING
1929
8
        std::unique_ptr<Transaction> txn;
1930
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1931
8
        if (err != TxnErrorCode::TXN_OK) {
1932
0
            LOG_WARNING("failed to create txn").tag("err", err);
1933
0
            return -1;
1934
0
        }
1935
8
        std::string val;
1936
8
        err = txn->get(k, &val);
1937
8
        if (err ==
1938
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1939
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1940
0
            return 0;
1941
0
        }
1942
8
        if (err != TxnErrorCode::TXN_OK) {
1943
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1944
0
            return -1;
1945
0
        }
1946
8
        index_pb.Clear();
1947
8
        if (!index_pb.ParseFromString(val)) {
1948
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1949
0
            return -1;
1950
0
        }
1951
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1952
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1953
8
            txn->put(k, index_pb.SerializeAsString());
1954
8
            err = txn->commit();
1955
8
            if (err != TxnErrorCode::TXN_OK) {
1956
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1957
0
                return -1;
1958
0
            }
1959
8
        }
1960
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1961
0
            LOG_WARNING("failed to recycle tablets under index")
1962
0
                    .tag("table_id", index_pb.table_id())
1963
0
                    .tag("instance_id", instance_id_)
1964
0
                    .tag("index_id", index_id);
1965
0
            return -1;
1966
0
        }
1967
1968
8
        if (index_pb.has_db_id()) {
1969
            // Recycle the versioned keys
1970
2
            std::unique_ptr<Transaction> txn;
1971
2
            err = txn_kv_->create_txn(&txn);
1972
2
            if (err != TxnErrorCode::TXN_OK) {
1973
0
                LOG_WARNING("failed to create txn").tag("err", err);
1974
0
                return -1;
1975
0
            }
1976
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1977
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1978
2
            std::string index_inverted_key = versioned::index_inverted_key(
1979
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1980
2
            versioned_remove_all(txn.get(), meta_key);
1981
2
            txn->remove(index_key);
1982
2
            txn->remove(index_inverted_key);
1983
2
            err = txn->commit();
1984
2
            if (err != TxnErrorCode::TXN_OK) {
1985
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1986
0
                return -1;
1987
0
            }
1988
2
        }
1989
1990
8
        metrics_context.total_recycled_num = ++num_recycled;
1991
8
        metrics_context.report();
1992
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1993
8
        index_keys.push_back(k);
1994
8
        return 0;
1995
8
    };
1996
1997
17
    auto loop_done = [&index_keys, this]() -> int {
1998
6
        if (index_keys.empty()) return 0;
1999
5
        DORIS_CLOUD_DEFER {
2000
5
            index_keys.clear();
2001
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1999
1
        DORIS_CLOUD_DEFER {
2000
1
            index_keys.clear();
2001
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1999
4
        DORIS_CLOUD_DEFER {
2000
4
            index_keys.clear();
2001
4
        };
2002
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2003
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2004
0
            return -1;
2005
0
        }
2006
5
        return 0;
2007
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1997
2
    auto loop_done = [&index_keys, this]() -> int {
1998
2
        if (index_keys.empty()) return 0;
1999
1
        DORIS_CLOUD_DEFER {
2000
1
            index_keys.clear();
2001
1
        };
2002
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2003
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2004
0
            return -1;
2005
0
        }
2006
1
        return 0;
2007
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1997
4
    auto loop_done = [&index_keys, this]() -> int {
1998
4
        if (index_keys.empty()) return 0;
1999
4
        DORIS_CLOUD_DEFER {
2000
4
            index_keys.clear();
2001
4
        };
2002
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2003
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2004
0
            return -1;
2005
0
        }
2006
4
        return 0;
2007
4
    };
2008
2009
17
    if (config::enable_recycler_stats_metrics) {
2010
0
        scan_and_statistics_indexes();
2011
0
    }
2012
    // recycle_func and loop_done for scan and recycle
2013
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
2014
17
}
2015
2016
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
2017
8.24k
                             int64_t tablet_id) {
2018
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
2019
2020
8.23k
    std::unique_ptr<Transaction> txn;
2021
8.23k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2022
8.23k
    if (err != TxnErrorCode::TXN_OK) {
2023
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
2024
0
                     << " tablet_id=" << tablet_id << " err=" << err;
2025
0
        return false;
2026
0
    }
2027
2028
8.23k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
2029
8.23k
    std::string tablet_idx_val;
2030
8.23k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
2031
8.23k
    if (TxnErrorCode::TXN_OK != err) {
2032
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
2033
0
                     << " tablet_id=" << tablet_id << " err=" << err
2034
0
                     << " key=" << hex(tablet_idx_key);
2035
0
        return false;
2036
0
    }
2037
2038
8.23k
    TabletIndexPB tablet_idx_pb;
2039
8.23k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
2040
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
2041
0
                     << " tablet_id=" << tablet_id;
2042
0
        return false;
2043
0
    }
2044
2045
8.23k
    if (!tablet_idx_pb.has_db_id()) {
2046
        // In the previous version, the db_id was not set in the index_pb.
2047
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2048
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
2049
0
                  << " instance_id=" << instance_id
2050
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
2051
0
        return true;
2052
0
    }
2053
2054
8.23k
    std::string ver_val;
2055
8.23k
    std::string ver_key =
2056
8.23k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
2057
8.23k
                                   tablet_idx_pb.partition_id()});
2058
8.23k
    err = txn->get(ver_key, &ver_val);
2059
2060
8.23k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2061
202
        LOG(INFO) << ""
2062
202
                     "partition version not found, instance_id="
2063
202
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
2064
202
                  << " table_id=" << tablet_idx_pb.table_id()
2065
202
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
2066
202
                  << " key=" << hex(ver_key);
2067
202
        return true;
2068
202
    }
2069
2070
8.03k
    if (TxnErrorCode::TXN_OK != err) {
2071
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
2072
0
                     << " db_id=" << tablet_idx_pb.db_id()
2073
0
                     << " table_id=" << tablet_idx_pb.table_id()
2074
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2075
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
2076
0
        return false;
2077
0
    }
2078
2079
8.03k
    VersionPB version_pb;
2080
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
2081
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
2082
0
                     << " db_id=" << tablet_idx_pb.db_id()
2083
0
                     << " table_id=" << tablet_idx_pb.table_id()
2084
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2085
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
2086
0
        return false;
2087
0
    }
2088
2089
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
2090
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
2091
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
2092
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
2093
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
2094
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
2095
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
2096
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
2097
4.00k
                     << " key=" << hex(ver_key);
2098
4.00k
        return false;
2099
4.00k
    }
2100
4.03k
    return true;
2101
8.03k
}
2102
2103
15
int InstanceRecycler::recycle_partitions() {
2104
15
    const std::string task_name = "recycle_partitions";
2105
15
    int64_t num_scanned = 0;
2106
15
    int64_t num_expired = 0;
2107
15
    int64_t num_recycled = 0;
2108
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2109
2110
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
2111
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
2112
15
    std::string part_key0;
2113
15
    std::string part_key1;
2114
15
    recycle_partition_key(part_key_info0, &part_key0);
2115
15
    recycle_partition_key(part_key_info1, &part_key1);
2116
2117
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
2118
2119
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2120
15
    register_recycle_task(task_name, start_time);
2121
2122
15
    DORIS_CLOUD_DEFER {
2123
15
        unregister_recycle_task(task_name);
2124
15
        int64_t cost =
2125
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2126
15
        metrics_context.finish_report();
2127
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2128
15
                .tag("instance_id", instance_id_)
2129
15
                .tag("num_scanned", num_scanned)
2130
15
                .tag("num_expired", num_expired)
2131
15
                .tag("num_recycled", num_recycled);
2132
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2122
2
    DORIS_CLOUD_DEFER {
2123
2
        unregister_recycle_task(task_name);
2124
2
        int64_t cost =
2125
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2126
2
        metrics_context.finish_report();
2127
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2128
2
                .tag("instance_id", instance_id_)
2129
2
                .tag("num_scanned", num_scanned)
2130
2
                .tag("num_expired", num_expired)
2131
2
                .tag("num_recycled", num_recycled);
2132
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2122
13
    DORIS_CLOUD_DEFER {
2123
13
        unregister_recycle_task(task_name);
2124
13
        int64_t cost =
2125
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2126
13
        metrics_context.finish_report();
2127
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2128
13
                .tag("instance_id", instance_id_)
2129
13
                .tag("num_scanned", num_scanned)
2130
13
                .tag("num_expired", num_expired)
2131
13
                .tag("num_recycled", num_recycled);
2132
13
    };
2133
2134
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2135
2136
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
2137
15
    std::vector<std::string_view> partition_keys;
2138
15
    std::vector<std::string> partition_version_keys;
2139
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2140
9
        ++num_scanned;
2141
9
        RecyclePartitionPB part_pb;
2142
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2143
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2144
0
            return -1;
2145
0
        }
2146
9
        int64_t current_time = ::time(nullptr);
2147
9
        if (current_time <
2148
9
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
2149
0
            return 0;
2150
0
        }
2151
9
        ++num_expired;
2152
        // decode partition_id
2153
9
        auto k1 = k;
2154
9
        k1.remove_prefix(1);
2155
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2156
9
        decode_key(&k1, &out);
2157
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2158
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2159
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2160
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2161
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2162
        // Change state to RECYCLING
2163
9
        std::unique_ptr<Transaction> txn;
2164
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2165
9
        if (err != TxnErrorCode::TXN_OK) {
2166
0
            LOG_WARNING("failed to create txn").tag("err", err);
2167
0
            return -1;
2168
0
        }
2169
9
        std::string val;
2170
9
        err = txn->get(k, &val);
2171
9
        if (err ==
2172
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2173
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2174
0
            return 0;
2175
0
        }
2176
9
        if (err != TxnErrorCode::TXN_OK) {
2177
0
            LOG_WARNING("failed to get kv");
2178
0
            return -1;
2179
0
        }
2180
9
        part_pb.Clear();
2181
9
        if (!part_pb.ParseFromString(val)) {
2182
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2183
0
            return -1;
2184
0
        }
2185
        // Partitions with PREPARED state MUST have no data
2186
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2187
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2188
8
            txn->put(k, part_pb.SerializeAsString());
2189
8
            err = txn->commit();
2190
8
            if (err != TxnErrorCode::TXN_OK) {
2191
0
                LOG_WARNING("failed to commit txn: {}", err);
2192
0
                return -1;
2193
0
            }
2194
8
        }
2195
2196
9
        int ret = 0;
2197
33
        for (int64_t index_id : part_pb.index_id()) {
2198
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2199
1
                LOG_WARNING("failed to recycle tablets under partition")
2200
1
                        .tag("table_id", part_pb.table_id())
2201
1
                        .tag("instance_id", instance_id_)
2202
1
                        .tag("index_id", index_id)
2203
1
                        .tag("partition_id", partition_id);
2204
1
                ret = -1;
2205
1
            }
2206
33
        }
2207
9
        if (ret == 0 && part_pb.has_db_id()) {
2208
            // Recycle the versioned keys
2209
8
            std::unique_ptr<Transaction> txn;
2210
8
            err = txn_kv_->create_txn(&txn);
2211
8
            if (err != TxnErrorCode::TXN_OK) {
2212
0
                LOG_WARNING("failed to create txn").tag("err", err);
2213
0
                return -1;
2214
0
            }
2215
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2216
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2217
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2218
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2219
8
            std::string partition_version_key =
2220
8
                    versioned::partition_version_key({instance_id_, partition_id});
2221
8
            versioned_remove_all(txn.get(), meta_key);
2222
8
            txn->remove(index_key);
2223
8
            txn->remove(inverted_index_key);
2224
8
            versioned_remove_all(txn.get(), partition_version_key);
2225
8
            err = txn->commit();
2226
8
            if (err != TxnErrorCode::TXN_OK) {
2227
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2228
0
                return -1;
2229
0
            }
2230
8
        }
2231
2232
9
        if (ret == 0) {
2233
8
            ++num_recycled;
2234
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2235
8
            partition_keys.push_back(k);
2236
8
            if (part_pb.db_id() > 0) {
2237
8
                partition_version_keys.push_back(partition_version_key(
2238
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2239
8
            }
2240
8
            metrics_context.total_recycled_num = num_recycled;
2241
8
            metrics_context.report();
2242
8
        }
2243
9
        return ret;
2244
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2139
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2140
2
        ++num_scanned;
2141
2
        RecyclePartitionPB part_pb;
2142
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2143
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2144
0
            return -1;
2145
0
        }
2146
2
        int64_t current_time = ::time(nullptr);
2147
2
        if (current_time <
2148
2
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
2149
0
            return 0;
2150
0
        }
2151
2
        ++num_expired;
2152
        // decode partition_id
2153
2
        auto k1 = k;
2154
2
        k1.remove_prefix(1);
2155
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2156
2
        decode_key(&k1, &out);
2157
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2158
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2159
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2160
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2161
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2162
        // Change state to RECYCLING
2163
2
        std::unique_ptr<Transaction> txn;
2164
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2165
2
        if (err != TxnErrorCode::TXN_OK) {
2166
0
            LOG_WARNING("failed to create txn").tag("err", err);
2167
0
            return -1;
2168
0
        }
2169
2
        std::string val;
2170
2
        err = txn->get(k, &val);
2171
2
        if (err ==
2172
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2173
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2174
0
            return 0;
2175
0
        }
2176
2
        if (err != TxnErrorCode::TXN_OK) {
2177
0
            LOG_WARNING("failed to get kv");
2178
0
            return -1;
2179
0
        }
2180
2
        part_pb.Clear();
2181
2
        if (!part_pb.ParseFromString(val)) {
2182
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2183
0
            return -1;
2184
0
        }
2185
        // Partitions with PREPARED state MUST have no data
2186
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2187
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2188
1
            txn->put(k, part_pb.SerializeAsString());
2189
1
            err = txn->commit();
2190
1
            if (err != TxnErrorCode::TXN_OK) {
2191
0
                LOG_WARNING("failed to commit txn: {}", err);
2192
0
                return -1;
2193
0
            }
2194
1
        }
2195
2196
2
        int ret = 0;
2197
2
        for (int64_t index_id : part_pb.index_id()) {
2198
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2199
1
                LOG_WARNING("failed to recycle tablets under partition")
2200
1
                        .tag("table_id", part_pb.table_id())
2201
1
                        .tag("instance_id", instance_id_)
2202
1
                        .tag("index_id", index_id)
2203
1
                        .tag("partition_id", partition_id);
2204
1
                ret = -1;
2205
1
            }
2206
2
        }
2207
2
        if (ret == 0 && part_pb.has_db_id()) {
2208
            // Recycle the versioned keys
2209
1
            std::unique_ptr<Transaction> txn;
2210
1
            err = txn_kv_->create_txn(&txn);
2211
1
            if (err != TxnErrorCode::TXN_OK) {
2212
0
                LOG_WARNING("failed to create txn").tag("err", err);
2213
0
                return -1;
2214
0
            }
2215
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2216
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2217
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2218
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2219
1
            std::string partition_version_key =
2220
1
                    versioned::partition_version_key({instance_id_, partition_id});
2221
1
            versioned_remove_all(txn.get(), meta_key);
2222
1
            txn->remove(index_key);
2223
1
            txn->remove(inverted_index_key);
2224
1
            versioned_remove_all(txn.get(), partition_version_key);
2225
1
            err = txn->commit();
2226
1
            if (err != TxnErrorCode::TXN_OK) {
2227
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2228
0
                return -1;
2229
0
            }
2230
1
        }
2231
2232
2
        if (ret == 0) {
2233
1
            ++num_recycled;
2234
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2235
1
            partition_keys.push_back(k);
2236
1
            if (part_pb.db_id() > 0) {
2237
1
                partition_version_keys.push_back(partition_version_key(
2238
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2239
1
            }
2240
1
            metrics_context.total_recycled_num = num_recycled;
2241
1
            metrics_context.report();
2242
1
        }
2243
2
        return ret;
2244
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2139
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2140
7
        ++num_scanned;
2141
7
        RecyclePartitionPB part_pb;
2142
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2143
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2144
0
            return -1;
2145
0
        }
2146
7
        int64_t current_time = ::time(nullptr);
2147
7
        if (current_time <
2148
7
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
2149
0
            return 0;
2150
0
        }
2151
7
        ++num_expired;
2152
        // decode partition_id
2153
7
        auto k1 = k;
2154
7
        k1.remove_prefix(1);
2155
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2156
7
        decode_key(&k1, &out);
2157
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2158
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2159
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2160
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2161
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2162
        // Change state to RECYCLING
2163
7
        std::unique_ptr<Transaction> txn;
2164
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2165
7
        if (err != TxnErrorCode::TXN_OK) {
2166
0
            LOG_WARNING("failed to create txn").tag("err", err);
2167
0
            return -1;
2168
0
        }
2169
7
        std::string val;
2170
7
        err = txn->get(k, &val);
2171
7
        if (err ==
2172
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2173
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2174
0
            return 0;
2175
0
        }
2176
7
        if (err != TxnErrorCode::TXN_OK) {
2177
0
            LOG_WARNING("failed to get kv");
2178
0
            return -1;
2179
0
        }
2180
7
        part_pb.Clear();
2181
7
        if (!part_pb.ParseFromString(val)) {
2182
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2183
0
            return -1;
2184
0
        }
2185
        // Partitions with PREPARED state MUST have no data
2186
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2187
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2188
7
            txn->put(k, part_pb.SerializeAsString());
2189
7
            err = txn->commit();
2190
7
            if (err != TxnErrorCode::TXN_OK) {
2191
0
                LOG_WARNING("failed to commit txn: {}", err);
2192
0
                return -1;
2193
0
            }
2194
7
        }
2195
2196
7
        int ret = 0;
2197
31
        for (int64_t index_id : part_pb.index_id()) {
2198
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2199
0
                LOG_WARNING("failed to recycle tablets under partition")
2200
0
                        .tag("table_id", part_pb.table_id())
2201
0
                        .tag("instance_id", instance_id_)
2202
0
                        .tag("index_id", index_id)
2203
0
                        .tag("partition_id", partition_id);
2204
0
                ret = -1;
2205
0
            }
2206
31
        }
2207
7
        if (ret == 0 && part_pb.has_db_id()) {
2208
            // Recycle the versioned keys
2209
7
            std::unique_ptr<Transaction> txn;
2210
7
            err = txn_kv_->create_txn(&txn);
2211
7
            if (err != TxnErrorCode::TXN_OK) {
2212
0
                LOG_WARNING("failed to create txn").tag("err", err);
2213
0
                return -1;
2214
0
            }
2215
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2216
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2217
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2218
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2219
7
            std::string partition_version_key =
2220
7
                    versioned::partition_version_key({instance_id_, partition_id});
2221
7
            versioned_remove_all(txn.get(), meta_key);
2222
7
            txn->remove(index_key);
2223
7
            txn->remove(inverted_index_key);
2224
7
            versioned_remove_all(txn.get(), partition_version_key);
2225
7
            err = txn->commit();
2226
7
            if (err != TxnErrorCode::TXN_OK) {
2227
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2228
0
                return -1;
2229
0
            }
2230
7
        }
2231
2232
7
        if (ret == 0) {
2233
7
            ++num_recycled;
2234
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2235
7
            partition_keys.push_back(k);
2236
7
            if (part_pb.db_id() > 0) {
2237
7
                partition_version_keys.push_back(partition_version_key(
2238
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2239
7
            }
2240
7
            metrics_context.total_recycled_num = num_recycled;
2241
7
            metrics_context.report();
2242
7
        }
2243
7
        return ret;
2244
7
    };
2245
2246
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2247
5
        if (partition_keys.empty()) return 0;
2248
4
        DORIS_CLOUD_DEFER {
2249
4
            partition_keys.clear();
2250
4
            partition_version_keys.clear();
2251
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2248
1
        DORIS_CLOUD_DEFER {
2249
1
            partition_keys.clear();
2250
1
            partition_version_keys.clear();
2251
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2248
3
        DORIS_CLOUD_DEFER {
2249
3
            partition_keys.clear();
2250
3
            partition_version_keys.clear();
2251
3
        };
2252
4
        std::unique_ptr<Transaction> txn;
2253
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2254
4
        if (err != TxnErrorCode::TXN_OK) {
2255
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2256
0
            return -1;
2257
0
        }
2258
8
        for (auto& k : partition_keys) {
2259
8
            txn->remove(k);
2260
8
        }
2261
8
        for (auto& k : partition_version_keys) {
2262
8
            txn->remove(k);
2263
8
        }
2264
4
        err = txn->commit();
2265
4
        if (err != TxnErrorCode::TXN_OK) {
2266
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2267
0
                         << " err=" << err;
2268
0
            return -1;
2269
0
        }
2270
4
        return 0;
2271
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2246
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2247
2
        if (partition_keys.empty()) return 0;
2248
1
        DORIS_CLOUD_DEFER {
2249
1
            partition_keys.clear();
2250
1
            partition_version_keys.clear();
2251
1
        };
2252
1
        std::unique_ptr<Transaction> txn;
2253
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2254
1
        if (err != TxnErrorCode::TXN_OK) {
2255
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2256
0
            return -1;
2257
0
        }
2258
1
        for (auto& k : partition_keys) {
2259
1
            txn->remove(k);
2260
1
        }
2261
1
        for (auto& k : partition_version_keys) {
2262
1
            txn->remove(k);
2263
1
        }
2264
1
        err = txn->commit();
2265
1
        if (err != TxnErrorCode::TXN_OK) {
2266
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2267
0
                         << " err=" << err;
2268
0
            return -1;
2269
0
        }
2270
1
        return 0;
2271
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2246
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2247
3
        if (partition_keys.empty()) return 0;
2248
3
        DORIS_CLOUD_DEFER {
2249
3
            partition_keys.clear();
2250
3
            partition_version_keys.clear();
2251
3
        };
2252
3
        std::unique_ptr<Transaction> txn;
2253
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2254
3
        if (err != TxnErrorCode::TXN_OK) {
2255
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2256
0
            return -1;
2257
0
        }
2258
7
        for (auto& k : partition_keys) {
2259
7
            txn->remove(k);
2260
7
        }
2261
7
        for (auto& k : partition_version_keys) {
2262
7
            txn->remove(k);
2263
7
        }
2264
3
        err = txn->commit();
2265
3
        if (err != TxnErrorCode::TXN_OK) {
2266
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2267
0
                         << " err=" << err;
2268
0
            return -1;
2269
0
        }
2270
3
        return 0;
2271
3
    };
2272
2273
15
    if (config::enable_recycler_stats_metrics) {
2274
0
        scan_and_statistics_partitions();
2275
0
    }
2276
    // recycle_func and loop_done for scan and recycle
2277
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2278
15
}
2279
2280
14
int InstanceRecycler::recycle_versions() {
2281
14
    if (should_recycle_versioned_keys()) {
2282
2
        return recycle_orphan_partitions();
2283
2
    }
2284
2285
12
    int64_t num_scanned = 0;
2286
12
    int64_t num_recycled = 0;
2287
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2288
2289
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2290
2291
12
    auto start_time = steady_clock::now();
2292
2293
12
    DORIS_CLOUD_DEFER {
2294
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2295
12
        metrics_context.finish_report();
2296
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2297
12
                .tag("instance_id", instance_id_)
2298
12
                .tag("num_scanned", num_scanned)
2299
12
                .tag("num_recycled", num_recycled);
2300
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2293
12
    DORIS_CLOUD_DEFER {
2294
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2295
12
        metrics_context.finish_report();
2296
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2297
12
                .tag("instance_id", instance_id_)
2298
12
                .tag("num_scanned", num_scanned)
2299
12
                .tag("num_recycled", num_recycled);
2300
12
    };
2301
2302
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2303
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2304
12
    int64_t last_scanned_table_id = 0;
2305
12
    bool is_recycled = false; // Is last scanned kv recycled
2306
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2307
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2308
2
        ++num_scanned;
2309
2
        auto k1 = k;
2310
2
        k1.remove_prefix(1);
2311
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2312
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2313
2
        decode_key(&k1, &out);
2314
2
        DCHECK_EQ(out.size(), 6) << k;
2315
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2316
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2317
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2318
0
            return 0;
2319
0
        }
2320
2
        last_scanned_table_id = table_id;
2321
2
        is_recycled = false;
2322
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2323
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2324
2
        std::unique_ptr<Transaction> txn;
2325
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2326
2
        if (err != TxnErrorCode::TXN_OK) {
2327
0
            return -1;
2328
0
        }
2329
2
        std::unique_ptr<RangeGetIterator> iter;
2330
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2331
2
        if (err != TxnErrorCode::TXN_OK) {
2332
0
            return -1;
2333
0
        }
2334
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2335
1
            return 0;
2336
1
        }
2337
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2338
        // 1. Remove all partition version kvs of this table
2339
1
        auto partition_version_key_begin =
2340
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2341
1
        auto partition_version_key_end =
2342
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2343
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2344
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2345
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2346
1
                     << " table_id=" << table_id;
2347
        // 2. Remove the table version kv of this table
2348
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2349
1
        txn->remove(tbl_version_key);
2350
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2351
        // 3. Remove mow delete bitmap update lock and tablet job lock
2352
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2353
1
        txn->remove(lock_key);
2354
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2355
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2356
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2357
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2358
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2359
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2360
1
                     << " table_id=" << table_id;
2361
1
        err = txn->commit();
2362
1
        if (err != TxnErrorCode::TXN_OK) {
2363
0
            return -1;
2364
0
        }
2365
1
        metrics_context.total_recycled_num = ++num_recycled;
2366
1
        metrics_context.report();
2367
1
        is_recycled = true;
2368
1
        return 0;
2369
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2307
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2308
2
        ++num_scanned;
2309
2
        auto k1 = k;
2310
2
        k1.remove_prefix(1);
2311
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2312
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2313
2
        decode_key(&k1, &out);
2314
2
        DCHECK_EQ(out.size(), 6) << k;
2315
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2316
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2317
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2318
0
            return 0;
2319
0
        }
2320
2
        last_scanned_table_id = table_id;
2321
2
        is_recycled = false;
2322
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2323
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2324
2
        std::unique_ptr<Transaction> txn;
2325
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2326
2
        if (err != TxnErrorCode::TXN_OK) {
2327
0
            return -1;
2328
0
        }
2329
2
        std::unique_ptr<RangeGetIterator> iter;
2330
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2331
2
        if (err != TxnErrorCode::TXN_OK) {
2332
0
            return -1;
2333
0
        }
2334
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2335
1
            return 0;
2336
1
        }
2337
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2338
        // 1. Remove all partition version kvs of this table
2339
1
        auto partition_version_key_begin =
2340
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2341
1
        auto partition_version_key_end =
2342
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2343
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2344
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2345
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2346
1
                     << " table_id=" << table_id;
2347
        // 2. Remove the table version kv of this table
2348
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2349
1
        txn->remove(tbl_version_key);
2350
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2351
        // 3. Remove mow delete bitmap update lock and tablet job lock
2352
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2353
1
        txn->remove(lock_key);
2354
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2355
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2356
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2357
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2358
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2359
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2360
1
                     << " table_id=" << table_id;
2361
1
        err = txn->commit();
2362
1
        if (err != TxnErrorCode::TXN_OK) {
2363
0
            return -1;
2364
0
        }
2365
1
        metrics_context.total_recycled_num = ++num_recycled;
2366
1
        metrics_context.report();
2367
1
        is_recycled = true;
2368
1
        return 0;
2369
1
    };
2370
2371
12
    if (config::enable_recycler_stats_metrics) {
2372
0
        scan_and_statistics_versions();
2373
0
    }
2374
    // recycle_func and loop_done for scan and recycle
2375
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2376
14
}
2377
2378
3
int InstanceRecycler::recycle_orphan_partitions() {
2379
3
    int64_t num_scanned = 0;
2380
3
    int64_t num_recycled = 0;
2381
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2382
2383
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2384
3
            .tag("instance_id", instance_id_);
2385
2386
3
    auto start_time = steady_clock::now();
2387
2388
3
    DORIS_CLOUD_DEFER {
2389
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2390
3
        metrics_context.finish_report();
2391
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2392
3
                .tag("instance_id", instance_id_)
2393
3
                .tag("num_scanned", num_scanned)
2394
3
                .tag("num_recycled", num_recycled);
2395
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2388
3
    DORIS_CLOUD_DEFER {
2389
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2390
3
        metrics_context.finish_report();
2391
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2392
3
                .tag("instance_id", instance_id_)
2393
3
                .tag("num_scanned", num_scanned)
2394
3
                .tag("num_recycled", num_recycled);
2395
3
    };
2396
2397
3
    bool is_empty_table = false;        // whether the table has no indexes
2398
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2399
3
    int64_t current_table_id = 0;       // current scanning table id
2400
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2401
3
                         &current_table_id, &is_table_kvs_recycled,
2402
3
                         this](std::string_view k, std::string_view) {
2403
2
        ++num_scanned;
2404
2405
2
        std::string_view k1(k);
2406
2
        int64_t db_id, table_id, partition_id;
2407
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2408
2
                                                            &partition_id)) {
2409
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2410
0
            return -1;
2411
2
        } else if (table_id != current_table_id) {
2412
2
            current_table_id = table_id;
2413
2
            is_table_kvs_recycled = false;
2414
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2415
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2416
2
            if (err != TxnErrorCode::TXN_OK) {
2417
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2418
0
                             << " table_id=" << table_id << " err=" << err;
2419
0
                return -1;
2420
0
            }
2421
2
        }
2422
2423
2
        if (!is_empty_table) {
2424
            // table is not empty, skip recycle
2425
1
            return 0;
2426
1
        }
2427
2428
1
        std::unique_ptr<Transaction> txn;
2429
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2430
1
        if (err != TxnErrorCode::TXN_OK) {
2431
0
            return -1;
2432
0
        }
2433
2434
        // 1. Remove all partition related kvs
2435
1
        std::string partition_meta_key =
2436
1
                versioned::meta_partition_key({instance_id_, partition_id});
2437
1
        std::string partition_index_key =
2438
1
                versioned::partition_index_key({instance_id_, partition_id});
2439
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2440
1
                {instance_id_, db_id, table_id, partition_id});
2441
1
        std::string partition_version_key =
2442
1
                versioned::partition_version_key({instance_id_, partition_id});
2443
1
        txn->remove(partition_index_key);
2444
1
        txn->remove(partition_inverted_key);
2445
1
        versioned_remove_all(txn.get(), partition_meta_key);
2446
1
        versioned_remove_all(txn.get(), partition_version_key);
2447
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2448
1
                     << " table_id=" << table_id << " db_id=" << db_id
2449
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2450
1
                     << " partition_version_key=" << hex(partition_version_key);
2451
2452
1
        if (!is_table_kvs_recycled) {
2453
1
            is_table_kvs_recycled = true;
2454
2455
            // 2. Remove the table version kv of this table
2456
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2457
1
            versioned_remove_all(txn.get(), table_version_key);
2458
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2459
            // 3. Remove mow delete bitmap update lock and tablet job lock
2460
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2461
1
            txn->remove(lock_key);
2462
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2463
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2464
1
            std::string tablet_job_key_end =
2465
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2466
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2467
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2468
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2469
1
                         << " table_id=" << table_id;
2470
1
        }
2471
2472
1
        err = txn->commit();
2473
1
        if (err != TxnErrorCode::TXN_OK) {
2474
0
            return -1;
2475
0
        }
2476
1
        metrics_context.total_recycled_num = ++num_recycled;
2477
1
        metrics_context.report();
2478
1
        return 0;
2479
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2402
2
                         this](std::string_view k, std::string_view) {
2403
2
        ++num_scanned;
2404
2405
2
        std::string_view k1(k);
2406
2
        int64_t db_id, table_id, partition_id;
2407
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2408
2
                                                            &partition_id)) {
2409
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2410
0
            return -1;
2411
2
        } else if (table_id != current_table_id) {
2412
2
            current_table_id = table_id;
2413
2
            is_table_kvs_recycled = false;
2414
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2415
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2416
2
            if (err != TxnErrorCode::TXN_OK) {
2417
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2418
0
                             << " table_id=" << table_id << " err=" << err;
2419
0
                return -1;
2420
0
            }
2421
2
        }
2422
2423
2
        if (!is_empty_table) {
2424
            // table is not empty, skip recycle
2425
1
            return 0;
2426
1
        }
2427
2428
1
        std::unique_ptr<Transaction> txn;
2429
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2430
1
        if (err != TxnErrorCode::TXN_OK) {
2431
0
            return -1;
2432
0
        }
2433
2434
        // 1. Remove all partition related kvs
2435
1
        std::string partition_meta_key =
2436
1
                versioned::meta_partition_key({instance_id_, partition_id});
2437
1
        std::string partition_index_key =
2438
1
                versioned::partition_index_key({instance_id_, partition_id});
2439
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2440
1
                {instance_id_, db_id, table_id, partition_id});
2441
1
        std::string partition_version_key =
2442
1
                versioned::partition_version_key({instance_id_, partition_id});
2443
1
        txn->remove(partition_index_key);
2444
1
        txn->remove(partition_inverted_key);
2445
1
        versioned_remove_all(txn.get(), partition_meta_key);
2446
1
        versioned_remove_all(txn.get(), partition_version_key);
2447
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2448
1
                     << " table_id=" << table_id << " db_id=" << db_id
2449
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2450
1
                     << " partition_version_key=" << hex(partition_version_key);
2451
2452
1
        if (!is_table_kvs_recycled) {
2453
1
            is_table_kvs_recycled = true;
2454
2455
            // 2. Remove the table version kv of this table
2456
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2457
1
            versioned_remove_all(txn.get(), table_version_key);
2458
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2459
            // 3. Remove mow delete bitmap update lock and tablet job lock
2460
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2461
1
            txn->remove(lock_key);
2462
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2463
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2464
1
            std::string tablet_job_key_end =
2465
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2466
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2467
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2468
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2469
1
                         << " table_id=" << table_id;
2470
1
        }
2471
2472
1
        err = txn->commit();
2473
1
        if (err != TxnErrorCode::TXN_OK) {
2474
0
            return -1;
2475
0
        }
2476
1
        metrics_context.total_recycled_num = ++num_recycled;
2477
1
        metrics_context.report();
2478
1
        return 0;
2479
1
    };
2480
2481
    // recycle_func and loop_done for scan and recycle
2482
3
    return scan_and_recycle(
2483
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2484
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2485
3
            std::move(recycle_func));
2486
3
}
2487
2488
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2489
                                      RecyclerMetricsContext& metrics_context,
2490
47
                                      int64_t partition_id) {
2491
47
    bool is_multi_version =
2492
47
            instance_info_.has_multi_version_status() &&
2493
47
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2494
47
    int64_t num_scanned = 0;
2495
47
    std::atomic_long num_recycled = 0;
2496
2497
47
    std::string tablet_key_begin, tablet_key_end;
2498
47
    std::string stats_key_begin, stats_key_end;
2499
47
    std::string job_key_begin, job_key_end;
2500
2501
47
    std::string tablet_belongs;
2502
47
    if (partition_id > 0) {
2503
        // recycle tablets in a partition belonging to the index
2504
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2505
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2506
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2507
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2508
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2509
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2510
33
        tablet_belongs = "partition";
2511
33
    } else {
2512
        // recycle tablets in the index
2513
14
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2514
14
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2515
14
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2516
14
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2517
14
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2518
14
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2519
14
        tablet_belongs = "index";
2520
14
    }
2521
2522
47
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2523
47
            .tag("table_id", table_id)
2524
47
            .tag("index_id", index_id)
2525
47
            .tag("partition_id", partition_id);
2526
2527
47
    auto start_time = steady_clock::now();
2528
2529
47
    DORIS_CLOUD_DEFER {
2530
47
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2531
47
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2532
47
                .tag("instance_id", instance_id_)
2533
47
                .tag("table_id", table_id)
2534
47
                .tag("index_id", index_id)
2535
47
                .tag("partition_id", partition_id)
2536
47
                .tag("num_scanned", num_scanned)
2537
47
                .tag("num_recycled", num_recycled);
2538
47
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2529
4
    DORIS_CLOUD_DEFER {
2530
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2531
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2532
4
                .tag("instance_id", instance_id_)
2533
4
                .tag("table_id", table_id)
2534
4
                .tag("index_id", index_id)
2535
4
                .tag("partition_id", partition_id)
2536
4
                .tag("num_scanned", num_scanned)
2537
4
                .tag("num_recycled", num_recycled);
2538
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2529
43
    DORIS_CLOUD_DEFER {
2530
43
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2531
43
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2532
43
                .tag("instance_id", instance_id_)
2533
43
                .tag("table_id", table_id)
2534
43
                .tag("index_id", index_id)
2535
43
                .tag("partition_id", partition_id)
2536
43
                .tag("num_scanned", num_scanned)
2537
43
                .tag("num_recycled", num_recycled);
2538
43
    };
2539
2540
    // The first string_view represents the tablet key which has been recycled
2541
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
2542
47
    using TabletKeyPair = std::pair<std::string_view, bool>;
2543
47
    SyncExecutor<TabletKeyPair> sync_executor(
2544
47
            _thread_pool_group.recycle_tablet_pool,
2545
47
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2546
47
                        index_id, partition_id),
2547
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2547
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2547
235
            [](const TabletKeyPair& k) { return k.first.empty(); });
2548
2549
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
2550
47
    std::vector<std::string> tablet_idx_keys;
2551
47
    std::vector<std::string> restore_job_keys;
2552
47
    std::vector<std::string> init_rs_keys;
2553
47
    std::vector<std::string> tablet_compact_stats_keys;
2554
47
    std::vector<std::string> tablet_load_stats_keys;
2555
47
    std::vector<std::string> versioned_meta_tablet_keys;
2556
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2557
8.24k
        bool use_range_remove = true;
2558
8.24k
        ++num_scanned;
2559
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
2560
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2561
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2562
0
            use_range_remove = false;
2563
0
            return -1;
2564
0
        }
2565
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2566
2567
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2568
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2569
4.00k
            return -1;
2570
4.00k
        }
2571
2572
4.24k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2573
4.24k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2574
4.24k
        if (is_multi_version) {
2575
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2576
6
            tablet_compact_stats_keys.push_back(
2577
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2578
6
            tablet_load_stats_keys.push_back(
2579
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2580
6
            versioned_meta_tablet_keys.push_back(
2581
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2582
6
        }
2583
4.24k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2584
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2585
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2586
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2587
0
                LOG_WARNING("failed to recycle tablet")
2588
0
                        .tag("instance_id", instance_id_)
2589
0
                        .tag("tablet_id", tid);
2590
0
                range_move = false;
2591
0
                return {std::string_view(), range_move};
2592
0
            }
2593
4.23k
            ++num_recycled;
2594
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2595
4.23k
            return {k, range_move};
2596
4.23k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2585
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2586
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2587
0
                LOG_WARNING("failed to recycle tablet")
2588
0
                        .tag("instance_id", instance_id_)
2589
0
                        .tag("tablet_id", tid);
2590
0
                range_move = false;
2591
0
                return {std::string_view(), range_move};
2592
0
            }
2593
4.00k
            ++num_recycled;
2594
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2595
4.00k
            return {k, range_move};
2596
4.00k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2585
235
                           &metrics_context, k]() mutable -> TabletKeyPair {
2586
235
            if (recycle_tablet(tid, metrics_context) != 0) {
2587
0
                LOG_WARNING("failed to recycle tablet")
2588
0
                        .tag("instance_id", instance_id_)
2589
0
                        .tag("tablet_id", tid);
2590
0
                range_move = false;
2591
0
                return {std::string_view(), range_move};
2592
0
            }
2593
235
            ++num_recycled;
2594
235
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2595
235
            return {k, range_move};
2596
235
        });
2597
4.23k
        return 0;
2598
4.24k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2556
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2557
8.00k
        bool use_range_remove = true;
2558
8.00k
        ++num_scanned;
2559
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2560
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2561
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2562
0
            use_range_remove = false;
2563
0
            return -1;
2564
0
        }
2565
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2566
2567
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2568
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2569
4.00k
            return -1;
2570
4.00k
        }
2571
2572
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2573
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2574
4.00k
        if (is_multi_version) {
2575
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2576
0
            tablet_compact_stats_keys.push_back(
2577
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2578
0
            tablet_load_stats_keys.push_back(
2579
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2580
0
            versioned_meta_tablet_keys.push_back(
2581
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2582
0
        }
2583
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2584
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2585
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2586
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2587
4.00k
                LOG_WARNING("failed to recycle tablet")
2588
4.00k
                        .tag("instance_id", instance_id_)
2589
4.00k
                        .tag("tablet_id", tid);
2590
4.00k
                range_move = false;
2591
4.00k
                return {std::string_view(), range_move};
2592
4.00k
            }
2593
4.00k
            ++num_recycled;
2594
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2595
4.00k
            return {k, range_move};
2596
4.00k
        });
2597
4.00k
        return 0;
2598
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2556
238
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2557
238
        bool use_range_remove = true;
2558
238
        ++num_scanned;
2559
238
        doris::TabletMetaCloudPB tablet_meta_pb;
2560
238
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2561
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2562
0
            use_range_remove = false;
2563
0
            return -1;
2564
0
        }
2565
238
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2566
2567
238
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2568
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2569
0
            return -1;
2570
0
        }
2571
2572
238
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2573
238
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2574
238
        if (is_multi_version) {
2575
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2576
6
            tablet_compact_stats_keys.push_back(
2577
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2578
6
            tablet_load_stats_keys.push_back(
2579
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2580
6
            versioned_meta_tablet_keys.push_back(
2581
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2582
6
        }
2583
238
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2584
235
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2585
235
                           &metrics_context, k]() mutable -> TabletKeyPair {
2586
235
            if (recycle_tablet(tid, metrics_context) != 0) {
2587
235
                LOG_WARNING("failed to recycle tablet")
2588
235
                        .tag("instance_id", instance_id_)
2589
235
                        .tag("tablet_id", tid);
2590
235
                range_move = false;
2591
235
                return {std::string_view(), range_move};
2592
235
            }
2593
235
            ++num_recycled;
2594
235
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2595
235
            return {k, range_move};
2596
235
        });
2597
235
        return 0;
2598
238
    };
2599
2600
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2601
47
    auto loop_done = [&, this]() -> int {
2602
47
        bool finished = true;
2603
47
        auto tablet_keys = sync_executor.when_all(&finished);
2604
47
        if (!finished) {
2605
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2606
0
            return -1;
2607
0
        }
2608
47
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2609
        // sort the vector using key's order
2610
45
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2611
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2611
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2611
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2612
45
        bool use_range_remove = true;
2613
4.23k
        for (auto& [_, remove] : tablet_keys) {
2614
4.23k
            if (!remove) {
2615
0
                use_range_remove = remove;
2616
0
                break;
2617
0
            }
2618
4.23k
        }
2619
45
        DORIS_CLOUD_DEFER {
2620
45
            tablet_idx_keys.clear();
2621
45
            restore_job_keys.clear();
2622
45
            init_rs_keys.clear();
2623
45
            tablet_compact_stats_keys.clear();
2624
45
            tablet_load_stats_keys.clear();
2625
45
            versioned_meta_tablet_keys.clear();
2626
45
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2619
2
        DORIS_CLOUD_DEFER {
2620
2
            tablet_idx_keys.clear();
2621
2
            restore_job_keys.clear();
2622
2
            init_rs_keys.clear();
2623
2
            tablet_compact_stats_keys.clear();
2624
2
            tablet_load_stats_keys.clear();
2625
2
            versioned_meta_tablet_keys.clear();
2626
2
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2619
43
        DORIS_CLOUD_DEFER {
2620
43
            tablet_idx_keys.clear();
2621
43
            restore_job_keys.clear();
2622
43
            init_rs_keys.clear();
2623
43
            tablet_compact_stats_keys.clear();
2624
43
            tablet_load_stats_keys.clear();
2625
43
            versioned_meta_tablet_keys.clear();
2626
43
        };
2627
45
        std::unique_ptr<Transaction> txn;
2628
45
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2629
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2630
0
            return -1;
2631
0
        }
2632
45
        std::string tablet_key_end;
2633
45
        if (!tablet_keys.empty()) {
2634
43
            if (use_range_remove) {
2635
43
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2636
43
                txn->remove(tablet_keys.front().first, tablet_key_end);
2637
43
            } else {
2638
0
                for (auto& [k, _] : tablet_keys) {
2639
0
                    txn->remove(k);
2640
0
                }
2641
0
            }
2642
43
        }
2643
45
        if (is_multi_version) {
2644
6
            for (auto& k : tablet_compact_stats_keys) {
2645
                // Remove all versions of tablet compact stats for recycled tablet
2646
6
                LOG_INFO("remove versioned tablet compact stats key")
2647
6
                        .tag("compact_stats_key", hex(k));
2648
6
                versioned_remove_all(txn.get(), k);
2649
6
            }
2650
6
            for (auto& k : tablet_load_stats_keys) {
2651
                // Remove all versions of tablet load stats for recycled tablet
2652
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2653
6
                versioned_remove_all(txn.get(), k);
2654
6
            }
2655
6
            for (auto& k : versioned_meta_tablet_keys) {
2656
                // Remove all versions of meta tablet for recycled tablet
2657
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2658
6
                versioned_remove_all(txn.get(), k);
2659
6
            }
2660
5
        }
2661
4.24k
        for (auto& k : tablet_idx_keys) {
2662
4.24k
            txn->remove(k);
2663
4.24k
        }
2664
4.24k
        for (auto& k : restore_job_keys) {
2665
4.24k
            txn->remove(k);
2666
4.24k
        }
2667
45
        for (auto& k : init_rs_keys) {
2668
0
            txn->remove(k);
2669
0
        }
2670
45
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2671
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2672
0
                         << ", err=" << err;
2673
0
            return -1;
2674
0
        }
2675
45
        return 0;
2676
45
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2601
4
    auto loop_done = [&, this]() -> int {
2602
4
        bool finished = true;
2603
4
        auto tablet_keys = sync_executor.when_all(&finished);
2604
4
        if (!finished) {
2605
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2606
0
            return -1;
2607
0
        }
2608
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2609
        // sort the vector using key's order
2610
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2611
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2612
2
        bool use_range_remove = true;
2613
4.00k
        for (auto& [_, remove] : tablet_keys) {
2614
4.00k
            if (!remove) {
2615
0
                use_range_remove = remove;
2616
0
                break;
2617
0
            }
2618
4.00k
        }
2619
2
        DORIS_CLOUD_DEFER {
2620
2
            tablet_idx_keys.clear();
2621
2
            restore_job_keys.clear();
2622
2
            init_rs_keys.clear();
2623
2
            tablet_compact_stats_keys.clear();
2624
2
            tablet_load_stats_keys.clear();
2625
2
            versioned_meta_tablet_keys.clear();
2626
2
        };
2627
2
        std::unique_ptr<Transaction> txn;
2628
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2629
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2630
0
            return -1;
2631
0
        }
2632
2
        std::string tablet_key_end;
2633
2
        if (!tablet_keys.empty()) {
2634
2
            if (use_range_remove) {
2635
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2636
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2637
2
            } else {
2638
0
                for (auto& [k, _] : tablet_keys) {
2639
0
                    txn->remove(k);
2640
0
                }
2641
0
            }
2642
2
        }
2643
2
        if (is_multi_version) {
2644
0
            for (auto& k : tablet_compact_stats_keys) {
2645
                // Remove all versions of tablet compact stats for recycled tablet
2646
0
                LOG_INFO("remove versioned tablet compact stats key")
2647
0
                        .tag("compact_stats_key", hex(k));
2648
0
                versioned_remove_all(txn.get(), k);
2649
0
            }
2650
0
            for (auto& k : tablet_load_stats_keys) {
2651
                // Remove all versions of tablet load stats for recycled tablet
2652
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2653
0
                versioned_remove_all(txn.get(), k);
2654
0
            }
2655
0
            for (auto& k : versioned_meta_tablet_keys) {
2656
                // Remove all versions of meta tablet for recycled tablet
2657
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2658
0
                versioned_remove_all(txn.get(), k);
2659
0
            }
2660
0
        }
2661
4.00k
        for (auto& k : tablet_idx_keys) {
2662
4.00k
            txn->remove(k);
2663
4.00k
        }
2664
4.00k
        for (auto& k : restore_job_keys) {
2665
4.00k
            txn->remove(k);
2666
4.00k
        }
2667
2
        for (auto& k : init_rs_keys) {
2668
0
            txn->remove(k);
2669
0
        }
2670
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2671
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2672
0
                         << ", err=" << err;
2673
0
            return -1;
2674
0
        }
2675
2
        return 0;
2676
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2601
43
    auto loop_done = [&, this]() -> int {
2602
43
        bool finished = true;
2603
43
        auto tablet_keys = sync_executor.when_all(&finished);
2604
43
        if (!finished) {
2605
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2606
0
            return -1;
2607
0
        }
2608
43
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2609
        // sort the vector using key's order
2610
43
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2611
43
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2612
43
        bool use_range_remove = true;
2613
235
        for (auto& [_, remove] : tablet_keys) {
2614
235
            if (!remove) {
2615
0
                use_range_remove = remove;
2616
0
                break;
2617
0
            }
2618
235
        }
2619
43
        DORIS_CLOUD_DEFER {
2620
43
            tablet_idx_keys.clear();
2621
43
            restore_job_keys.clear();
2622
43
            init_rs_keys.clear();
2623
43
            tablet_compact_stats_keys.clear();
2624
43
            tablet_load_stats_keys.clear();
2625
43
            versioned_meta_tablet_keys.clear();
2626
43
        };
2627
43
        std::unique_ptr<Transaction> txn;
2628
43
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2629
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2630
0
            return -1;
2631
0
        }
2632
43
        std::string tablet_key_end;
2633
43
        if (!tablet_keys.empty()) {
2634
41
            if (use_range_remove) {
2635
41
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2636
41
                txn->remove(tablet_keys.front().first, tablet_key_end);
2637
41
            } else {
2638
0
                for (auto& [k, _] : tablet_keys) {
2639
0
                    txn->remove(k);
2640
0
                }
2641
0
            }
2642
41
        }
2643
43
        if (is_multi_version) {
2644
6
            for (auto& k : tablet_compact_stats_keys) {
2645
                // Remove all versions of tablet compact stats for recycled tablet
2646
6
                LOG_INFO("remove versioned tablet compact stats key")
2647
6
                        .tag("compact_stats_key", hex(k));
2648
6
                versioned_remove_all(txn.get(), k);
2649
6
            }
2650
6
            for (auto& k : tablet_load_stats_keys) {
2651
                // Remove all versions of tablet load stats for recycled tablet
2652
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2653
6
                versioned_remove_all(txn.get(), k);
2654
6
            }
2655
6
            for (auto& k : versioned_meta_tablet_keys) {
2656
                // Remove all versions of meta tablet for recycled tablet
2657
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2658
6
                versioned_remove_all(txn.get(), k);
2659
6
            }
2660
5
        }
2661
238
        for (auto& k : tablet_idx_keys) {
2662
238
            txn->remove(k);
2663
238
        }
2664
238
        for (auto& k : restore_job_keys) {
2665
238
            txn->remove(k);
2666
238
        }
2667
43
        for (auto& k : init_rs_keys) {
2668
0
            txn->remove(k);
2669
0
        }
2670
43
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2671
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2672
0
                         << ", err=" << err;
2673
0
            return -1;
2674
0
        }
2675
43
        return 0;
2676
43
    };
2677
2678
47
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
2679
47
                               std::move(loop_done));
2680
47
    if (ret != 0) {
2681
2
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
2682
2
        return ret;
2683
2
    }
2684
2685
    // directly remove tablet stats and tablet jobs of these dropped index or partition
2686
45
    std::unique_ptr<Transaction> txn;
2687
45
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2688
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
2689
0
        return -1;
2690
0
    }
2691
45
    txn->remove(stats_key_begin, stats_key_end);
2692
45
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
2693
45
                 << " end=" << hex(stats_key_end);
2694
45
    txn->remove(job_key_begin, job_key_end);
2695
45
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
2696
45
    std::string schema_key_begin, schema_key_end;
2697
45
    std::string schema_dict_key;
2698
45
    std::string versioned_schema_key_begin, versioned_schema_key_end;
2699
45
    if (partition_id <= 0) {
2700
        // Delete schema kv of this index
2701
13
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
2702
13
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
2703
13
        txn->remove(schema_key_begin, schema_key_end);
2704
13
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
2705
13
                     << " end=" << hex(schema_key_end);
2706
13
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
2707
13
        txn->remove(schema_dict_key);
2708
13
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
2709
13
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
2710
13
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
2711
13
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
2712
13
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
2713
13
                     << " end=" << hex(versioned_schema_key_end);
2714
13
    }
2715
2716
45
    TxnErrorCode err = txn->commit();
2717
45
    if (err != TxnErrorCode::TXN_OK) {
2718
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
2719
0
                     << " err=" << err;
2720
0
        return -1;
2721
0
    }
2722
2723
45
    return ret;
2724
45
}
2725
2726
4.81k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
2727
4.81k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
2728
4.81k
    int64_t num_segments = rs_meta_pb.num_segments();
2729
4.81k
    if (num_segments <= 0) return 0;
2730
2731
4.81k
    std::vector<std::string> file_paths;
2732
4.81k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
2733
0
        return -1;
2734
0
    }
2735
2736
    // Process inverted indexes
2737
4.81k
    std::vector<std::pair<int64_t, std::string>> index_ids;
2738
    // default format as v1.
2739
4.81k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2740
4.81k
    bool delete_rowset_data_by_prefix = false;
2741
4.81k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2742
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2743
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2744
0
        delete_rowset_data_by_prefix = true;
2745
4.81k
    } else if (rs_meta_pb.has_tablet_schema()) {
2746
9.00k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
2747
9.00k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2748
9.00k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2749
9.00k
            }
2750
9.00k
        }
2751
4.40k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
2752
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
2753
2.00k
        }
2754
4.40k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
2755
        // schema version and index id are not found, delete rowset data by prefix directly.
2756
0
        delete_rowset_data_by_prefix = true;
2757
409
    } else {
2758
        // otherwise, try to get schema kv
2759
409
        InvertedIndexInfo index_info;
2760
409
        int inverted_index_get_ret = inverted_index_id_cache_->get(
2761
409
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
2762
409
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2763
409
                                 &inverted_index_get_ret);
2764
409
        if (inverted_index_get_ret == 0) {
2765
409
            index_format = index_info.first;
2766
409
            index_ids = index_info.second;
2767
409
        } else if (inverted_index_get_ret == 1) {
2768
            // 1. Schema kv not found means tablet has been recycled
2769
            // Maybe some tablet recycle failed by some bugs
2770
            // We need to delete again to double check
2771
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2772
            // because we are uncertain about the inverted index information.
2773
            // If there are inverted indexes, some data might not be deleted,
2774
            // but this is acceptable as we have made our best effort to delete the data.
2775
0
            LOG_INFO(
2776
0
                    "delete rowset data schema kv not found, need to delete again to double "
2777
0
                    "check")
2778
0
                    .tag("instance_id", instance_id_)
2779
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2780
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
2781
            // Currently index_ids is guaranteed to be empty,
2782
            // but we clear it again here as a safeguard against future code changes
2783
            // that might cause index_ids to no longer be empty
2784
0
            index_format = InvertedIndexStorageFormatPB::V2;
2785
0
            index_ids.clear();
2786
0
        } else {
2787
            // failed to get schema kv, delete rowset data by prefix directly.
2788
0
            delete_rowset_data_by_prefix = true;
2789
0
        }
2790
409
    }
2791
2792
4.81k
    if (delete_rowset_data_by_prefix) {
2793
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
2794
0
                                  rs_meta_pb.rowset_id_v2());
2795
0
    }
2796
2797
4.81k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
2798
4.81k
    if (it == accessor_map_.end()) {
2799
800
        LOG_WARNING("instance has no such resource id")
2800
800
                .tag("instance_id", instance_id_)
2801
800
                .tag("resource_id", rs_meta_pb.resource_id());
2802
800
        return -1;
2803
800
    }
2804
4.01k
    auto& accessor = it->second;
2805
2806
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
2807
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
2808
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
2809
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2810
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
2811
40.0k
            for (const auto& index_id : index_ids) {
2812
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
2813
40.0k
                                                            index_id.second));
2814
40.0k
            }
2815
20.0k
        } else if (!index_ids.empty()) {
2816
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
2817
0
        }
2818
20.0k
    }
2819
2820
    // Process delete bitmap
2821
4.01k
    file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2822
    // TODO(AlexYue): seems could do do batch
2823
4.01k
    return accessor->delete_files(file_paths);
2824
4.81k
}
2825
2826
61.5k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
2827
61.5k
    LOG_INFO("begin process_packed_file_location_index")
2828
61.5k
            .tag("instance_id", instance_id_)
2829
61.5k
            .tag("tablet_id", rs_meta_pb.tablet_id())
2830
61.5k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2831
61.5k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
2832
61.5k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
2833
61.5k
    if (index_map.empty()) {
2834
61.4k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
2835
61.4k
                .tag("instance_id", instance_id_)
2836
61.4k
                .tag("tablet_id", rs_meta_pb.tablet_id())
2837
61.4k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
2838
61.4k
        return 0;
2839
61.4k
    }
2840
15
    struct PackedSmallFileInfo {
2841
15
        std::string small_file_path;
2842
15
    };
2843
15
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
2844
15
    packed_file_updates.reserve(index_map.size());
2845
27
    for (const auto& [small_path, index_pb] : index_map) {
2846
27
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
2847
0
            continue;
2848
0
        }
2849
27
        packed_file_updates[index_pb.packed_file_path()].push_back(
2850
27
                PackedSmallFileInfo {small_path});
2851
27
    }
2852
15
    if (packed_file_updates.empty()) {
2853
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
2854
0
                .tag("instance_id", instance_id_)
2855
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
2856
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2857
0
                .tag("index_map_size", index_map.size());
2858
0
        return 0;
2859
0
    }
2860
2861
15
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
2862
15
    int ret = 0;
2863
24
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
2864
24
        if (small_files.empty()) {
2865
0
            continue;
2866
0
        }
2867
2868
24
        bool success = false;
2869
24
        for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
2870
24
            std::unique_ptr<Transaction> txn;
2871
24
            TxnErrorCode err = txn_kv_->create_txn(&txn);
2872
24
            if (err != TxnErrorCode::TXN_OK) {
2873
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
2874
0
                        .tag("instance_id", instance_id_)
2875
0
                        .tag("packed_file_path", packed_file_path)
2876
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2877
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2878
0
                        .tag("err", err);
2879
0
                ret = -1;
2880
0
                break;
2881
0
            }
2882
2883
24
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
2884
24
            std::string packed_val;
2885
24
            err = txn->get(packed_key, &packed_val);
2886
24
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2887
0
                LOG_WARNING("packed file info not found when recycling rowset")
2888
0
                        .tag("instance_id", instance_id_)
2889
0
                        .tag("packed_file_path", packed_file_path)
2890
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2891
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2892
0
                        .tag("key", hex(packed_key))
2893
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
2894
                // Skip this packed file entry and continue with others
2895
0
                success = true;
2896
0
                break;
2897
0
            }
2898
24
            if (err != TxnErrorCode::TXN_OK) {
2899
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
2900
0
                        .tag("instance_id", instance_id_)
2901
0
                        .tag("packed_file_path", packed_file_path)
2902
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2903
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2904
0
                        .tag("err", err);
2905
0
                ret = -1;
2906
0
                break;
2907
0
            }
2908
2909
24
            cloud::PackedFileInfoPB packed_info;
2910
24
            if (!packed_info.ParseFromString(packed_val)) {
2911
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
2912
0
                        .tag("instance_id", instance_id_)
2913
0
                        .tag("packed_file_path", packed_file_path)
2914
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2915
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
2916
0
                ret = -1;
2917
0
                break;
2918
0
            }
2919
2920
24
            LOG_INFO("packed file update check")
2921
24
                    .tag("instance_id", instance_id_)
2922
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2923
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2924
24
                    .tag("merged_file_path", packed_file_path)
2925
24
                    .tag("requested_small_files", small_files.size())
2926
24
                    .tag("merge_entries", packed_info.slices_size());
2927
2928
24
            auto* small_file_entries = packed_info.mutable_slices();
2929
24
            int64_t changed_files = 0;
2930
24
            int64_t missing_entries = 0;
2931
24
            int64_t already_deleted = 0;
2932
27
            for (const auto& small_file_info : small_files) {
2933
27
                bool found = false;
2934
87
                for (auto& small_file_entry : *small_file_entries) {
2935
87
                    if (small_file_entry.path() == small_file_info.small_file_path) {
2936
27
                        if (!small_file_entry.deleted()) {
2937
27
                            small_file_entry.set_deleted(true);
2938
27
                            if (!small_file_entry.corrected()) {
2939
27
                                small_file_entry.set_corrected(true);
2940
27
                            }
2941
27
                            ++changed_files;
2942
27
                        } else {
2943
0
                            ++already_deleted;
2944
0
                        }
2945
27
                        found = true;
2946
27
                        break;
2947
27
                    }
2948
87
                }
2949
27
                if (!found) {
2950
0
                    ++missing_entries;
2951
0
                    LOG_WARNING("packed file info missing small file entry")
2952
0
                            .tag("instance_id", instance_id_)
2953
0
                            .tag("packed_file_path", packed_file_path)
2954
0
                            .tag("small_file_path", small_file_info.small_file_path)
2955
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2956
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
2957
0
                }
2958
27
            }
2959
2960
24
            if (changed_files == 0) {
2961
0
                LOG_INFO("skip merge file update: no merge entries changed")
2962
0
                        .tag("instance_id", instance_id_)
2963
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2964
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
2965
0
                        .tag("merged_file_path", packed_file_path)
2966
0
                        .tag("missing_entries", missing_entries)
2967
0
                        .tag("already_deleted", already_deleted)
2968
0
                        .tag("requested_small_files", small_files.size())
2969
0
                        .tag("merge_entries", packed_info.slices_size());
2970
0
                success = true;
2971
0
                break;
2972
0
            }
2973
2974
24
            int64_t left_file_count = 0;
2975
24
            int64_t left_file_bytes = 0;
2976
141
            for (const auto& small_file_entry : packed_info.slices()) {
2977
141
                if (!small_file_entry.deleted()) {
2978
57
                    ++left_file_count;
2979
57
                    left_file_bytes += small_file_entry.size();
2980
57
                }
2981
141
            }
2982
24
            packed_info.set_remaining_slice_bytes(left_file_bytes);
2983
24
            packed_info.set_ref_cnt(left_file_count);
2984
24
            LOG_INFO("updated packed file reference info")
2985
24
                    .tag("instance_id", instance_id_)
2986
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2987
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2988
24
                    .tag("packed_file_path", packed_file_path)
2989
24
                    .tag("ref_cnt", left_file_count)
2990
24
                    .tag("left_file_bytes", left_file_bytes);
2991
2992
24
            if (left_file_count == 0) {
2993
7
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
2994
7
            }
2995
2996
24
            std::string updated_val;
2997
24
            if (!packed_info.SerializeToString(&updated_val)) {
2998
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
2999
0
                        .tag("instance_id", instance_id_)
3000
0
                        .tag("packed_file_path", packed_file_path)
3001
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3002
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3003
0
                ret = -1;
3004
0
                break;
3005
0
            }
3006
3007
24
            txn->put(packed_key, updated_val);
3008
24
            err = txn->commit();
3009
24
            if (err == TxnErrorCode::TXN_OK) {
3010
24
                success = true;
3011
24
                if (left_file_count == 0) {
3012
7
                    LOG_INFO("packed file ready to delete, deleting immediately")
3013
7
                            .tag("instance_id", instance_id_)
3014
7
                            .tag("packed_file_path", packed_file_path);
3015
7
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3016
0
                        ret = -1;
3017
0
                    }
3018
7
                }
3019
24
                break;
3020
24
            }
3021
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
3022
0
                if (attempt >= max_retry_times) {
3023
0
                    LOG_WARNING("packed file info update conflict after max retry")
3024
0
                            .tag("instance_id", instance_id_)
3025
0
                            .tag("packed_file_path", packed_file_path)
3026
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3027
0
                            .tag("tablet_id", rs_meta_pb.tablet_id())
3028
0
                            .tag("changed_files", changed_files)
3029
0
                            .tag("attempt", attempt);
3030
0
                    ret = -1;
3031
0
                    break;
3032
0
                }
3033
0
                LOG_WARNING("packed file info update conflict, retrying")
3034
0
                        .tag("instance_id", instance_id_)
3035
0
                        .tag("packed_file_path", packed_file_path)
3036
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3037
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3038
0
                        .tag("changed_files", changed_files)
3039
0
                        .tag("attempt", attempt);
3040
0
                sleep_for_packed_file_retry();
3041
0
                continue;
3042
0
            }
3043
3044
0
            LOG_WARNING("failed to commit packed file info update")
3045
0
                    .tag("instance_id", instance_id_)
3046
0
                    .tag("packed_file_path", packed_file_path)
3047
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3048
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3049
0
                    .tag("err", err)
3050
0
                    .tag("changed_files", changed_files);
3051
0
            ret = -1;
3052
0
            break;
3053
0
        }
3054
3055
24
        if (!success) {
3056
0
            ret = -1;
3057
0
        }
3058
24
    }
3059
3060
15
    return ret;
3061
15
}
3062
3063
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
3064
                                                const std::string& packed_key,
3065
7
                                                const cloud::PackedFileInfoPB& packed_info) {
3066
7
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
3067
0
        LOG_WARNING("packed file missing resource id when recycling")
3068
0
                .tag("instance_id", instance_id_)
3069
0
                .tag("packed_file_path", packed_file_path);
3070
0
        return -1;
3071
0
    }
3072
3073
7
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
3074
7
    if (!accessor) {
3075
0
        LOG_WARNING("no accessor available to delete packed file")
3076
0
                .tag("instance_id", instance_id_)
3077
0
                .tag("packed_file_path", packed_file_path)
3078
0
                .tag("resource_id", packed_info.resource_id());
3079
0
        return -1;
3080
0
    }
3081
3082
7
    int del_ret = accessor->delete_file(packed_file_path);
3083
7
    if (del_ret != 0 && del_ret != 1) {
3084
0
        LOG_WARNING("failed to delete packed file")
3085
0
                .tag("instance_id", instance_id_)
3086
0
                .tag("packed_file_path", packed_file_path)
3087
0
                .tag("resource_id", resource_id)
3088
0
                .tag("ret", del_ret);
3089
0
        return -1;
3090
0
    }
3091
7
    if (del_ret == 1) {
3092
0
        LOG_INFO("packed file already removed")
3093
0
                .tag("instance_id", instance_id_)
3094
0
                .tag("packed_file_path", packed_file_path)
3095
0
                .tag("resource_id", resource_id);
3096
7
    } else {
3097
7
        LOG_INFO("deleted packed file")
3098
7
                .tag("instance_id", instance_id_)
3099
7
                .tag("packed_file_path", packed_file_path)
3100
7
                .tag("resource_id", resource_id);
3101
7
    }
3102
3103
7
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
3104
7
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3105
7
        std::unique_ptr<Transaction> del_txn;
3106
7
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
3107
7
        if (err != TxnErrorCode::TXN_OK) {
3108
0
            LOG_WARNING("failed to create txn when removing packed file kv")
3109
0
                    .tag("instance_id", instance_id_)
3110
0
                    .tag("packed_file_path", packed_file_path)
3111
0
                    .tag("attempt", attempt)
3112
0
                    .tag("err", err);
3113
0
            return -1;
3114
0
        }
3115
3116
7
        std::string latest_val;
3117
7
        err = del_txn->get(packed_key, &latest_val);
3118
7
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3119
0
            return 0;
3120
0
        }
3121
7
        if (err != TxnErrorCode::TXN_OK) {
3122
0
            LOG_WARNING("failed to re-read packed file kv before removal")
3123
0
                    .tag("instance_id", instance_id_)
3124
0
                    .tag("packed_file_path", packed_file_path)
3125
0
                    .tag("attempt", attempt)
3126
0
                    .tag("err", err);
3127
0
            return -1;
3128
0
        }
3129
3130
7
        cloud::PackedFileInfoPB latest_info;
3131
7
        if (!latest_info.ParseFromString(latest_val)) {
3132
0
            LOG_WARNING("failed to parse packed file info before removal")
3133
0
                    .tag("instance_id", instance_id_)
3134
0
                    .tag("packed_file_path", packed_file_path)
3135
0
                    .tag("attempt", attempt);
3136
0
            return -1;
3137
0
        }
3138
3139
7
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
3140
7
              latest_info.ref_cnt() == 0)) {
3141
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
3142
0
                    .tag("instance_id", instance_id_)
3143
0
                    .tag("packed_file_path", packed_file_path)
3144
0
                    .tag("attempt", attempt);
3145
0
            return 0;
3146
0
        }
3147
3148
7
        del_txn->remove(packed_key);
3149
7
        err = del_txn->commit();
3150
7
        if (err == TxnErrorCode::TXN_OK) {
3151
7
            LOG_INFO("removed packed file metadata")
3152
7
                    .tag("instance_id", instance_id_)
3153
7
                    .tag("packed_file_path", packed_file_path);
3154
7
            return 0;
3155
7
        }
3156
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3157
0
            if (attempt >= max_retry_times) {
3158
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
3159
0
                        .tag("instance_id", instance_id_)
3160
0
                        .tag("packed_file_path", packed_file_path)
3161
0
                        .tag("attempt", attempt);
3162
0
                return -1;
3163
0
            }
3164
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
3165
0
                    .tag("instance_id", instance_id_)
3166
0
                    .tag("packed_file_path", packed_file_path)
3167
0
                    .tag("attempt", attempt);
3168
0
            sleep_for_packed_file_retry();
3169
0
            continue;
3170
0
        }
3171
0
        LOG_WARNING("failed to remove packed file kv")
3172
0
                .tag("instance_id", instance_id_)
3173
0
                .tag("packed_file_path", packed_file_path)
3174
0
                .tag("attempt", attempt)
3175
0
                .tag("err", err);
3176
0
        return -1;
3177
0
    }
3178
0
    return -1;
3179
7
}
3180
3181
int InstanceRecycler::delete_rowset_data(
3182
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
3183
48
        RecyclerMetricsContext& metrics_context) {
3184
48
    int ret = 0;
3185
    // resource_id -> file_paths
3186
48
    std::map<std::string, std::vector<std::string>> resource_file_paths;
3187
    // (resource_id, tablet_id, rowset_id)
3188
48
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
3189
48
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
3190
3191
54.1k
    for (const auto& [_, rs] : rowsets) {
3192
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
3193
        // due to aborted schema change.
3194
54.1k
        if (is_formal_rowset) {
3195
3.16k
            std::lock_guard lock(recycled_tablets_mtx_);
3196
3.16k
            if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) {
3197
                // Tablet has been recycled and this rowset has no packed slices, so file data
3198
                // should already be gone; skip to avoid redundant deletes. Rowsets with packed
3199
                // slice info must still run to decrement packed file ref counts.
3200
0
                continue;
3201
0
            }
3202
3.16k
        }
3203
3204
54.1k
        auto it = accessor_map_.find(rs.resource_id());
3205
        // possible if the accessor is not initilized correctly
3206
54.1k
        if (it == accessor_map_.end()) [[unlikely]] {
3207
1
            LOG_WARNING("instance has no such resource id")
3208
1
                    .tag("instance_id", instance_id_)
3209
1
                    .tag("resource_id", rs.resource_id());
3210
1
            ret = -1;
3211
1
            continue;
3212
1
        }
3213
3214
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
3215
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
3216
54.1k
        int64_t tablet_id = rs.tablet_id();
3217
54.1k
        LOG_INFO("recycle rowset merge index size")
3218
54.1k
                .tag("instance_id", instance_id_)
3219
54.1k
                .tag("tablet_id", tablet_id)
3220
54.1k
                .tag("rowset_id", rowset_id)
3221
54.1k
                .tag("merge_index_size", rs.packed_slice_locations_size());
3222
54.1k
        if (decrement_packed_file_ref_counts(rs) != 0) {
3223
0
            ret = -1;
3224
0
            continue;
3225
0
        }
3226
54.1k
        int64_t num_segments = rs.num_segments();
3227
54.1k
        if (num_segments <= 0) {
3228
0
            metrics_context.total_recycled_num++;
3229
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
3230
0
            continue;
3231
0
        }
3232
3233
        // Process delete bitmap
3234
54.1k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3235
3236
        // Process inverted indexes
3237
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
3238
        // default format as v1.
3239
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3240
54.1k
        int inverted_index_get_ret = 0;
3241
54.1k
        if (rs.has_tablet_schema()) {
3242
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
3243
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3244
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3245
53.5k
                }
3246
53.5k
            }
3247
26.6k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
3248
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
3249
26.5k
            }
3250
27.5k
        } else {
3251
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
3252
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
3253
0
                                "instance_id="
3254
0
                             << instance_id_ << " tablet_id=" << tablet_id
3255
0
                             << " rowset_id=" << rowset_id;
3256
0
                ret = -1;
3257
0
                continue;
3258
0
            }
3259
27.5k
            InvertedIndexInfo index_info;
3260
27.5k
            inverted_index_get_ret =
3261
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
3262
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3263
27.5k
                                     &inverted_index_get_ret);
3264
27.5k
            if (inverted_index_get_ret == 0) {
3265
27.0k
                index_format = index_info.first;
3266
27.0k
                index_ids = index_info.second;
3267
27.0k
            } else if (inverted_index_get_ret == 1) {
3268
                // 1. Schema kv not found means tablet has been recycled
3269
                // Maybe some tablet recycle failed by some bugs
3270
                // We need to delete again to double check
3271
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3272
                // because we are uncertain about the inverted index information.
3273
                // If there are inverted indexes, some data might not be deleted,
3274
                // but this is acceptable as we have made our best effort to delete the data.
3275
507
                LOG_INFO(
3276
507
                        "delete rowset data schema kv not found, need to delete again to double "
3277
507
                        "check")
3278
507
                        .tag("instance_id", instance_id_)
3279
507
                        .tag("tablet_id", tablet_id)
3280
507
                        .tag("rowset", rs.ShortDebugString());
3281
                // Currently index_ids is guaranteed to be empty,
3282
                // but we clear it again here as a safeguard against future code changes
3283
                // that might cause index_ids to no longer be empty
3284
507
                index_format = InvertedIndexStorageFormatPB::V2;
3285
507
                index_ids.clear();
3286
18.4E
            } else {
3287
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
3288
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3289
18.4E
                ret = -1;
3290
18.4E
                continue;
3291
18.4E
            }
3292
27.5k
        }
3293
54.1k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3294
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3295
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3296
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3297
5
            continue;
3298
5
        }
3299
324k
        for (int64_t i = 0; i < num_segments; ++i) {
3300
270k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3301
270k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3302
538k
                for (const auto& index_id : index_ids) {
3303
538k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
3304
538k
                                                                index_id.first, index_id.second));
3305
538k
                }
3306
268k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3307
                // try to recycle inverted index v2 when get_ret == 1
3308
                // we treat schema not found as if it has a v2 format inverted index
3309
                // to reduce chance of data leakage
3310
2.50k
                if (inverted_index_get_ret == 1) {
3311
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3312
2.50k
                            .tag("instance_id", instance_id_)
3313
2.50k
                            .tag("inverted index v2 path",
3314
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3315
2.50k
                }
3316
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3317
2.50k
            }
3318
270k
        }
3319
54.1k
    }
3320
3321
48
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3322
48
                                                 "delete_rowset_data",
3323
50
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3323
50
                                                 [](const int& ret) { return ret != 0; });
3324
48
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3325
45
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3326
45
            DCHECK(accessor_map_.count(*rid))
3327
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3328
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3329
45
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3330
45
                                     &accessor_map_);
3331
45
            if (!accessor_map_.contains(*rid)) {
3332
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3333
0
                        .tag("resource_id", resource_id)
3334
0
                        .tag("instance_id", instance_id_);
3335
0
                return -1;
3336
0
            }
3337
45
            auto& accessor = accessor_map_[*rid];
3338
45
            int ret = accessor->delete_files(*paths);
3339
45
            if (!ret) {
3340
                // deduplication of different files with the same rowset id
3341
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3342
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3343
45
                std::set<std::string> deleted_rowset_id;
3344
3345
45
                std::for_each(paths->begin(), paths->end(),
3346
45
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3347
863k
                               this](const std::string& path) {
3348
863k
                                  std::vector<std::string> str;
3349
863k
                                  butil::SplitString(path, '/', &str);
3350
863k
                                  std::string rowset_id;
3351
863k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3352
860k
                                      rowset_id = str.back().substr(0, pos);
3353
860k
                                  } else {
3354
2.65k
                                      if (path.find("packed_file/") != std::string::npos) {
3355
0
                                          return; // packed files do not have rowset_id encoded
3356
0
                                      }
3357
2.65k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3358
2.65k
                                      return;
3359
2.65k
                                  }
3360
860k
                                  auto rs_meta = rowsets.find(rowset_id);
3361
860k
                                  if (rs_meta != rowsets.end() &&
3362
863k
                                      !deleted_rowset_id.contains(rowset_id)) {
3363
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3364
54.1k
                                      metrics_context.total_recycled_data_size +=
3365
54.1k
                                              rs_meta->second.total_disk_size();
3366
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3367
54.1k
                                              rs_meta->second.num_segments();
3368
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3369
54.1k
                                              rs_meta->second.total_disk_size();
3370
54.1k
                                      metrics_context.total_recycled_num++;
3371
54.1k
                                  }
3372
860k
                              });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3347
863k
                               this](const std::string& path) {
3348
863k
                                  std::vector<std::string> str;
3349
863k
                                  butil::SplitString(path, '/', &str);
3350
863k
                                  std::string rowset_id;
3351
863k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3352
860k
                                      rowset_id = str.back().substr(0, pos);
3353
860k
                                  } else {
3354
2.65k
                                      if (path.find("packed_file/") != std::string::npos) {
3355
0
                                          return; // packed files do not have rowset_id encoded
3356
0
                                      }
3357
2.65k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3358
2.65k
                                      return;
3359
2.65k
                                  }
3360
860k
                                  auto rs_meta = rowsets.find(rowset_id);
3361
860k
                                  if (rs_meta != rowsets.end() &&
3362
863k
                                      !deleted_rowset_id.contains(rowset_id)) {
3363
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3364
54.1k
                                      metrics_context.total_recycled_data_size +=
3365
54.1k
                                              rs_meta->second.total_disk_size();
3366
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3367
54.1k
                                              rs_meta->second.num_segments();
3368
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3369
54.1k
                                              rs_meta->second.total_disk_size();
3370
54.1k
                                      metrics_context.total_recycled_num++;
3371
54.1k
                                  }
3372
860k
                              });
3373
45
                segment_metrics_context_.report();
3374
45
                metrics_context.report();
3375
45
            }
3376
45
            return ret;
3377
45
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3325
45
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3326
45
            DCHECK(accessor_map_.count(*rid))
3327
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3328
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3329
45
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3330
45
                                     &accessor_map_);
3331
45
            if (!accessor_map_.contains(*rid)) {
3332
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3333
0
                        .tag("resource_id", resource_id)
3334
0
                        .tag("instance_id", instance_id_);
3335
0
                return -1;
3336
0
            }
3337
45
            auto& accessor = accessor_map_[*rid];
3338
45
            int ret = accessor->delete_files(*paths);
3339
45
            if (!ret) {
3340
                // deduplication of different files with the same rowset id
3341
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3342
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3343
45
                std::set<std::string> deleted_rowset_id;
3344
3345
45
                std::for_each(paths->begin(), paths->end(),
3346
45
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3347
45
                               this](const std::string& path) {
3348
45
                                  std::vector<std::string> str;
3349
45
                                  butil::SplitString(path, '/', &str);
3350
45
                                  std::string rowset_id;
3351
45
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3352
45
                                      rowset_id = str.back().substr(0, pos);
3353
45
                                  } else {
3354
45
                                      if (path.find("packed_file/") != std::string::npos) {
3355
45
                                          return; // packed files do not have rowset_id encoded
3356
45
                                      }
3357
45
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3358
45
                                      return;
3359
45
                                  }
3360
45
                                  auto rs_meta = rowsets.find(rowset_id);
3361
45
                                  if (rs_meta != rowsets.end() &&
3362
45
                                      !deleted_rowset_id.contains(rowset_id)) {
3363
45
                                      deleted_rowset_id.emplace(rowset_id);
3364
45
                                      metrics_context.total_recycled_data_size +=
3365
45
                                              rs_meta->second.total_disk_size();
3366
45
                                      segment_metrics_context_.total_recycled_num +=
3367
45
                                              rs_meta->second.num_segments();
3368
45
                                      segment_metrics_context_.total_recycled_data_size +=
3369
45
                                              rs_meta->second.total_disk_size();
3370
45
                                      metrics_context.total_recycled_num++;
3371
45
                                  }
3372
45
                              });
3373
45
                segment_metrics_context_.report();
3374
45
                metrics_context.report();
3375
45
            }
3376
45
            return ret;
3377
45
        });
3378
45
    }
3379
48
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
3380
5
        LOG_INFO(
3381
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
3382
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
3383
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
3384
5
        concurrent_delete_executor.add([&]() -> int {
3385
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3386
5
            if (!ret) {
3387
5
                auto rs = rowsets.at(rowset_id);
3388
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3389
5
                metrics_context.total_recycled_num++;
3390
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3391
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3392
5
                metrics_context.report();
3393
5
                segment_metrics_context_.report();
3394
5
            }
3395
5
            return ret;
3396
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
3384
5
        concurrent_delete_executor.add([&]() -> int {
3385
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3386
5
            if (!ret) {
3387
5
                auto rs = rowsets.at(rowset_id);
3388
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3389
5
                metrics_context.total_recycled_num++;
3390
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3391
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3392
5
                metrics_context.report();
3393
5
                segment_metrics_context_.report();
3394
5
            }
3395
5
            return ret;
3396
5
        });
3397
5
    }
3398
3399
48
    bool finished = true;
3400
48
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3401
50
    for (int r : rets) {
3402
50
        if (r != 0) {
3403
0
            ret = -1;
3404
0
            break;
3405
0
        }
3406
50
    }
3407
48
    ret = finished ? ret : -1;
3408
48
    return ret;
3409
48
}
3410
3411
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
3412
3.10k
                                         const std::string& rowset_id) {
3413
3.10k
    auto it = accessor_map_.find(resource_id);
3414
3.10k
    if (it == accessor_map_.end()) {
3415
200
        LOG_WARNING("instance has no such resource id")
3416
200
                .tag("instance_id", instance_id_)
3417
200
                .tag("resource_id", resource_id)
3418
200
                .tag("tablet_id", tablet_id)
3419
200
                .tag("rowset_id", rowset_id);
3420
200
        return -1;
3421
200
    }
3422
2.90k
    auto& accessor = it->second;
3423
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
3424
3.10k
}
3425
3426
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
3427
4
    if (key.empty()) {
3428
0
        return false;
3429
0
    }
3430
4
    std::string_view key_view = key;
3431
4
    key_view.remove_prefix(1); // remove keyspace prefix
3432
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
3433
4
    if (decode_key(&key_view, &decoded) != 0) {
3434
0
        return false;
3435
0
    }
3436
4
    if (decoded.size() < 4) {
3437
0
        return false;
3438
0
    }
3439
4
    try {
3440
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
3441
4
    } catch (const std::bad_variant_access&) {
3442
0
        return false;
3443
0
    }
3444
4
    return true;
3445
4
}
3446
3447
14
int InstanceRecycler::recycle_packed_files() {
3448
14
    const std::string task_name = "recycle_packed_files";
3449
14
    auto start_tp = steady_clock::now();
3450
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
3451
14
    int ret = 0;
3452
14
    PackedFileRecycleStats stats;
3453
3454
14
    register_recycle_task(task_name, start_time);
3455
14
    DORIS_CLOUD_DEFER {
3456
14
        unregister_recycle_task(task_name);
3457
14
        int64_t cost =
3458
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3459
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3460
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3461
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3462
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3463
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3464
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3465
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3466
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3467
14
                                                             stats.bytes_object_deleted);
3468
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3469
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3470
14
                .tag("instance_id", instance_id_)
3471
14
                .tag("num_scanned", stats.num_scanned)
3472
14
                .tag("num_corrected", stats.num_corrected)
3473
14
                .tag("num_deleted", stats.num_deleted)
3474
14
                .tag("num_failed", stats.num_failed)
3475
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3476
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3477
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3478
14
                .tag("bytes_deleted", stats.bytes_deleted)
3479
14
                .tag("ret", ret);
3480
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
3455
14
    DORIS_CLOUD_DEFER {
3456
14
        unregister_recycle_task(task_name);
3457
14
        int64_t cost =
3458
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3459
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3460
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3461
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3462
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3463
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3464
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3465
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3466
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3467
14
                                                             stats.bytes_object_deleted);
3468
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3469
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3470
14
                .tag("instance_id", instance_id_)
3471
14
                .tag("num_scanned", stats.num_scanned)
3472
14
                .tag("num_corrected", stats.num_corrected)
3473
14
                .tag("num_deleted", stats.num_deleted)
3474
14
                .tag("num_failed", stats.num_failed)
3475
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3476
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3477
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3478
14
                .tag("bytes_deleted", stats.bytes_deleted)
3479
14
                .tag("ret", ret);
3480
14
    };
3481
3482
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3483
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3484
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3485
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
3482
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3483
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3484
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3485
4
    };
3486
3487
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
3488
3489
14
    std::string begin = packed_file_key({instance_id_, ""});
3490
14
    std::string end = packed_file_key({instance_id_, "\xff"});
3491
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
3492
0
        ret = -1;
3493
0
    }
3494
3495
14
    return ret;
3496
14
}
3497
3498
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
3499
                                                  RecyclerMetricsContext& metrics_context,
3500
0
                                                  int64_t partition_id, bool is_empty_tablet) {
3501
0
    std::string tablet_key_begin, tablet_key_end;
3502
3503
0
    if (partition_id > 0) {
3504
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
3505
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
3506
0
    } else {
3507
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
3508
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
3509
0
    }
3510
    // for calculate the total num or bytes of recyled objects
3511
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
3512
0
                                                          std::string_view v) -> int {
3513
0
        doris::TabletMetaCloudPB tablet_meta_pb;
3514
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
3515
0
            return 0;
3516
0
        }
3517
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
3518
3519
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
3520
0
            return 0;
3521
0
        }
3522
3523
0
        if (!is_empty_tablet) {
3524
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
3525
0
                return 0;
3526
0
            }
3527
0
            tablet_metrics_context_.total_need_recycle_num++;
3528
0
        }
3529
0
        return 0;
3530
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
3531
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
3532
0
    metrics_context.report(true);
3533
0
    tablet_metrics_context_.report(true);
3534
0
    segment_metrics_context_.report(true);
3535
0
    return ret;
3536
0
}
3537
3538
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
3539
0
                                                 RecyclerMetricsContext& metrics_context) {
3540
0
    int ret = 0;
3541
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
3542
0
    std::unique_ptr<Transaction> txn;
3543
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3544
0
        LOG_WARNING("failed to recycle tablet ")
3545
0
                .tag("tablet id", tablet_id)
3546
0
                .tag("instance_id", instance_id_)
3547
0
                .tag("reason", "failed to create txn");
3548
0
        ret = -1;
3549
0
    }
3550
0
    GetRowsetResponse resp;
3551
0
    std::string msg;
3552
0
    MetaServiceCode code = MetaServiceCode::OK;
3553
    // get rowsets in tablet
3554
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3555
0
                        tablet_id, code, msg, &resp);
3556
0
    if (code != MetaServiceCode::OK) {
3557
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3558
0
                .tag("tablet id", tablet_id)
3559
0
                .tag("msg", msg)
3560
0
                .tag("code", code)
3561
0
                .tag("instance id", instance_id_);
3562
0
        ret = -1;
3563
0
    }
3564
0
    for (const auto& rs_meta : resp.rowset_meta()) {
3565
        /*
3566
        * For compatibility, we skip the loop for [0-1] here.
3567
        * The purpose of this loop is to delete object files,
3568
        * and since [0-1] only has meta and doesn't have object files,
3569
        * skipping it doesn't affect system correctness.
3570
        *
3571
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
3572
        * would return error -1 directly, causing the recycle operation to fail.
3573
        *
3574
        * [0-1] doesn't have resource id is a bug.
3575
        * In the future, we will fix this problem, after that,
3576
        * we can remove this if statement.
3577
        *
3578
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
3579
        */
3580
3581
0
        if (rs_meta.end_version() == 1) {
3582
            // Assert that [0-1] has no resource_id to make sure
3583
            // this if statement will not be forgetted to remove
3584
            // when the resource id bug is fixed
3585
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3586
0
            continue;
3587
0
        }
3588
0
        if (!rs_meta.has_resource_id()) {
3589
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3590
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3591
0
                    .tag("instance_id", instance_id_)
3592
0
                    .tag("tablet_id", tablet_id);
3593
0
            continue;
3594
0
        }
3595
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3596
0
        auto it = accessor_map_.find(rs_meta.resource_id());
3597
        // possible if the accessor is not initilized correctly
3598
0
        if (it == accessor_map_.end()) [[unlikely]] {
3599
0
            LOG_WARNING(
3600
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3601
0
                    "recycle process")
3602
0
                    .tag("tablet id", tablet_id)
3603
0
                    .tag("instance_id", instance_id_)
3604
0
                    .tag("resource_id", rs_meta.resource_id())
3605
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3606
0
            continue;
3607
0
        }
3608
3609
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
3610
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
3611
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
3612
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
3613
0
    }
3614
0
    return ret;
3615
0
}
3616
3617
4.24k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
3618
4.24k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
3619
4.24k
            .tag("instance_id", instance_id_)
3620
4.24k
            .tag("tablet_id", tablet_id);
3621
3622
4.24k
    if (should_recycle_versioned_keys()) {
3623
11
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
3624
11
        if (ret != 0) {
3625
0
            return ret;
3626
0
        }
3627
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
3628
        // during the recycle_versioned_tablet process.
3629
        //
3630
        // .. And remove restore job rowsets of this tablet too
3631
11
    }
3632
3633
4.24k
    int ret = 0;
3634
4.24k
    auto start_time = steady_clock::now();
3635
3636
4.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
3637
3638
    // collect resource ids
3639
246
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
3640
246
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
3641
246
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
3642
246
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
3643
246
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
3644
246
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
3645
3646
246
    std::set<std::string> resource_ids;
3647
246
    int64_t recycle_rowsets_number = 0;
3648
246
    int64_t recycle_segments_number = 0;
3649
246
    int64_t recycle_rowsets_data_size = 0;
3650
246
    int64_t recycle_rowsets_index_size = 0;
3651
246
    int64_t recycle_restore_job_rowsets_number = 0;
3652
246
    int64_t recycle_restore_job_segments_number = 0;
3653
246
    int64_t recycle_restore_job_rowsets_data_size = 0;
3654
246
    int64_t recycle_restore_job_rowsets_index_size = 0;
3655
246
    int64_t max_rowset_version = 0;
3656
246
    int64_t min_rowset_creation_time = INT64_MAX;
3657
246
    int64_t max_rowset_creation_time = 0;
3658
246
    int64_t min_rowset_expiration_time = INT64_MAX;
3659
246
    int64_t max_rowset_expiration_time = 0;
3660
3661
246
    DORIS_CLOUD_DEFER {
3662
246
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3663
246
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3664
246
                .tag("instance_id", instance_id_)
3665
246
                .tag("tablet_id", tablet_id)
3666
246
                .tag("recycle rowsets number", recycle_rowsets_number)
3667
246
                .tag("recycle segments number", recycle_segments_number)
3668
246
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3669
246
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3670
246
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
3671
246
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
3672
246
                .tag("all restore job rowsets recycle data size",
3673
246
                     recycle_restore_job_rowsets_data_size)
3674
246
                .tag("all restore job rowsets recycle index size",
3675
246
                     recycle_restore_job_rowsets_index_size)
3676
246
                .tag("max rowset version", max_rowset_version)
3677
246
                .tag("min rowset creation time", min_rowset_creation_time)
3678
246
                .tag("max rowset creation time", max_rowset_creation_time)
3679
246
                .tag("min rowset expiration time", min_rowset_expiration_time)
3680
246
                .tag("max rowset expiration time", max_rowset_expiration_time)
3681
246
                .tag("task type", metrics_context.operation_type)
3682
246
                .tag("ret", ret);
3683
246
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3661
246
    DORIS_CLOUD_DEFER {
3662
246
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3663
246
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3664
246
                .tag("instance_id", instance_id_)
3665
246
                .tag("tablet_id", tablet_id)
3666
246
                .tag("recycle rowsets number", recycle_rowsets_number)
3667
246
                .tag("recycle segments number", recycle_segments_number)
3668
246
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3669
246
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3670
246
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
3671
246
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
3672
246
                .tag("all restore job rowsets recycle data size",
3673
246
                     recycle_restore_job_rowsets_data_size)
3674
246
                .tag("all restore job rowsets recycle index size",
3675
246
                     recycle_restore_job_rowsets_index_size)
3676
246
                .tag("max rowset version", max_rowset_version)
3677
246
                .tag("min rowset creation time", min_rowset_creation_time)
3678
246
                .tag("max rowset creation time", max_rowset_creation_time)
3679
246
                .tag("min rowset expiration time", min_rowset_expiration_time)
3680
246
                .tag("max rowset expiration time", max_rowset_expiration_time)
3681
246
                .tag("task type", metrics_context.operation_type)
3682
246
                .tag("ret", ret);
3683
246
    };
3684
3685
246
    std::unique_ptr<Transaction> txn;
3686
246
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3687
0
        LOG_WARNING("failed to recycle tablet ")
3688
0
                .tag("tablet id", tablet_id)
3689
0
                .tag("instance_id", instance_id_)
3690
0
                .tag("reason", "failed to create txn");
3691
0
        ret = -1;
3692
0
    }
3693
246
    GetRowsetResponse resp;
3694
246
    std::string msg;
3695
246
    MetaServiceCode code = MetaServiceCode::OK;
3696
    // get rowsets in tablet
3697
246
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3698
246
                        tablet_id, code, msg, &resp);
3699
246
    if (code != MetaServiceCode::OK) {
3700
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3701
0
                .tag("tablet id", tablet_id)
3702
0
                .tag("msg", msg)
3703
0
                .tag("code", code)
3704
0
                .tag("instance id", instance_id_);
3705
0
        ret = -1;
3706
0
    }
3707
246
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
3708
3709
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
3710
        // The rowset has no resource id and segments when it was generated by compaction
3711
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
3712
2.50k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
3713
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
3714
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3715
0
                    .tag("instance_id", instance_id_)
3716
0
                    .tag("tablet_id", tablet_id);
3717
0
            recycle_rowsets_number += 1;
3718
0
            continue;
3719
0
        }
3720
2.50k
        if (!rs_meta.has_resource_id()) {
3721
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3722
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
3723
1
                    .tag("instance_id", instance_id_)
3724
1
                    .tag("tablet_id", tablet_id);
3725
1
            return -1;
3726
1
        }
3727
18.4E
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3728
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
3729
        // possible if the accessor is not initilized correctly
3730
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
3731
1
            LOG_WARNING(
3732
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3733
1
                    "recycle process")
3734
1
                    .tag("tablet id", tablet_id)
3735
1
                    .tag("instance_id", instance_id_)
3736
1
                    .tag("resource_id", rs_meta.resource_id())
3737
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3738
1
            return -1;
3739
1
        }
3740
2.50k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
3741
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
3742
0
                    .tag("instance_id", instance_id_)
3743
0
                    .tag("tablet_id", tablet_id)
3744
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
3745
0
            return -1;
3746
0
        }
3747
2.50k
        recycle_rowsets_number += 1;
3748
2.50k
        recycle_segments_number += rs_meta.num_segments();
3749
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
3750
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
3751
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
3752
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
3753
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
3754
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
3755
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
3756
2.50k
        resource_ids.emplace(rs_meta.resource_id());
3757
2.50k
    }
3758
3759
    // get restore job rowset in tablet
3760
244
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
3761
244
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
3762
244
    if (code != MetaServiceCode::OK) {
3763
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
3764
0
                .tag("tablet id", tablet_id)
3765
0
                .tag("msg", msg)
3766
0
                .tag("code", code)
3767
0
                .tag("instance id", instance_id_);
3768
0
        return -1;
3769
0
    }
3770
3771
244
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
3772
0
        if (!rs_meta.has_resource_id()) {
3773
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3774
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3775
0
                    .tag("instance_id", instance_id_)
3776
0
                    .tag("tablet_id", tablet_id);
3777
0
            return -1;
3778
0
        }
3779
3780
0
        auto it = accessor_map_.find(rs_meta.resource_id());
3781
        // possible if the accessor is not initilized correctly
3782
0
        if (it == accessor_map_.end()) [[unlikely]] {
3783
0
            LOG_WARNING(
3784
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3785
0
                    "recycle process")
3786
0
                    .tag("tablet id", tablet_id)
3787
0
                    .tag("instance_id", instance_id_)
3788
0
                    .tag("resource_id", rs_meta.resource_id())
3789
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3790
0
            return -1;
3791
0
        }
3792
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
3793
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
3794
0
                    .tag("instance_id", instance_id_)
3795
0
                    .tag("tablet_id", tablet_id)
3796
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
3797
0
            return -1;
3798
0
        }
3799
0
        recycle_restore_job_rowsets_number += 1;
3800
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
3801
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
3802
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
3803
0
        resource_ids.emplace(rs_meta.resource_id());
3804
0
    }
3805
3806
244
    LOG_INFO("recycle tablet start to delete object")
3807
244
            .tag("instance id", instance_id_)
3808
244
            .tag("tablet id", tablet_id)
3809
244
            .tag("recycle tablet resource ids are",
3810
244
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
3811
244
                                 [](std::string rs_id, const auto& it) {
3812
204
                                     return rs_id.empty() ? it : rs_id + ", " + it;
3813
204
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
3811
204
                                 [](std::string rs_id, const auto& it) {
3812
204
                                     return rs_id.empty() ? it : rs_id + ", " + it;
3813
204
                                 }));
3814
3815
244
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
3816
244
            _thread_pool_group.s3_producer_pool,
3817
244
            fmt::format("delete tablet {} s3 rowset", tablet_id),
3818
244
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
3818
204
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
3819
3820
    // delete all rowset data in this tablet
3821
    // ATTN: there may be data leak if not all accessor initilized successfully
3822
    //       partial data deleted if the tablet is stored cross-storage vault
3823
    //       vault id is not attached to TabletMeta...
3824
244
    for (const auto& resource_id : resource_ids) {
3825
204
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
3826
204
        concurrent_delete_executor.add(
3827
204
                [&, rs_id = resource_id,
3828
204
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
3829
204
                    std::unique_ptr<int, std::function<void(int*)>> defer(
3830
204
                            (int*)0x01, [&](int*) { metrics_context.report(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
Line
Count
Source
3830
204
                            (int*)0x01, [&](int*) { metrics_context.report(); });
3831
204
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
3832
204
                    if (res != 0) {
3833
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
3834
1
                                     << " path=" << accessor_ptr->uri()
3835
1
                                     << " task type=" << metrics_context.operation_type;
3836
1
                        return std::make_pair(-1, rs_id);
3837
1
                    }
3838
203
                    return std::make_pair(0, rs_id);
3839
204
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
3828
204
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
3829
204
                    std::unique_ptr<int, std::function<void(int*)>> defer(
3830
204
                            (int*)0x01, [&](int*) { metrics_context.report(); });
3831
204
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
3832
204
                    if (res != 0) {
3833
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
3834
1
                                     << " path=" << accessor_ptr->uri()
3835
1
                                     << " task type=" << metrics_context.operation_type;
3836
1
                        return std::make_pair(-1, rs_id);
3837
1
                    }
3838
203
                    return std::make_pair(0, rs_id);
3839
204
                });
3840
204
    }
3841
3842
244
    bool finished = true;
3843
244
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
3844
244
    for (auto& r : rets) {
3845
204
        if (r.first != 0) {
3846
1
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
3847
1
            ret = -1;
3848
1
        }
3849
204
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
3850
204
    }
3851
244
    ret = finished ? ret : -1;
3852
3853
244
    if (ret != 0) { // failed recycle tablet data
3854
1
        LOG_WARNING("ret!=0")
3855
1
                .tag("finished", finished)
3856
1
                .tag("ret", ret)
3857
1
                .tag("instance_id", instance_id_)
3858
1
                .tag("tablet_id", tablet_id);
3859
1
        return ret;
3860
1
    }
3861
3862
243
    tablet_metrics_context_.total_recycled_data_size +=
3863
243
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3864
243
    tablet_metrics_context_.total_recycled_num += 1;
3865
243
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
3866
243
    segment_metrics_context_.total_recycled_data_size +=
3867
243
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3868
243
    metrics_context.total_recycled_data_size +=
3869
243
            recycle_rowsets_data_size + recycle_rowsets_index_size;
3870
243
    tablet_metrics_context_.report();
3871
243
    segment_metrics_context_.report();
3872
243
    metrics_context.report();
3873
3874
243
    txn.reset();
3875
243
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3876
0
        LOG_WARNING("failed to recycle tablet ")
3877
0
                .tag("tablet id", tablet_id)
3878
0
                .tag("instance_id", instance_id_)
3879
0
                .tag("reason", "failed to create txn");
3880
0
        ret = -1;
3881
0
    }
3882
    // delete all rowset kv in this tablet
3883
243
    txn->remove(rs_key0, rs_key1);
3884
243
    txn->remove(recyc_rs_key0, recyc_rs_key1);
3885
243
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
3886
3887
    // remove delete bitmap for MoW table
3888
243
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
3889
243
    txn->remove(pending_key);
3890
243
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
3891
243
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
3892
243
    txn->remove(delete_bitmap_start, delete_bitmap_end);
3893
3894
243
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
3895
243
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
3896
243
    txn->remove(dbm_start_key, dbm_end_key);
3897
243
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
3898
243
              << " end=" << hex(dbm_end_key);
3899
3900
243
    TxnErrorCode err = txn->commit();
3901
243
    if (err != TxnErrorCode::TXN_OK) {
3902
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
3903
0
        ret = -1;
3904
0
    }
3905
3906
243
    if (ret == 0) {
3907
        // All object files under tablet have been deleted
3908
243
        std::lock_guard lock(recycled_tablets_mtx_);
3909
243
        recycled_tablets_.insert(tablet_id);
3910
243
    }
3911
3912
243
    return ret;
3913
244
}
3914
3915
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
3916
11
                                               RecyclerMetricsContext& metrics_context) {
3917
11
    int ret = 0;
3918
11
    auto start_time = steady_clock::now();
3919
3920
11
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
3921
3922
    // collect resource ids
3923
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
3924
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
3925
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
3926
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
3927
3928
11
    int64_t recycle_rowsets_number = 0;
3929
11
    int64_t recycle_segments_number = 0;
3930
11
    int64_t recycle_rowsets_data_size = 0;
3931
11
    int64_t recycle_rowsets_index_size = 0;
3932
11
    int64_t max_rowset_version = 0;
3933
11
    int64_t min_rowset_creation_time = INT64_MAX;
3934
11
    int64_t max_rowset_creation_time = 0;
3935
11
    int64_t min_rowset_expiration_time = INT64_MAX;
3936
11
    int64_t max_rowset_expiration_time = 0;
3937
3938
11
    DORIS_CLOUD_DEFER {
3939
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3940
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3941
11
                .tag("instance_id", instance_id_)
3942
11
                .tag("tablet_id", tablet_id)
3943
11
                .tag("recycle rowsets number", recycle_rowsets_number)
3944
11
                .tag("recycle segments number", recycle_segments_number)
3945
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3946
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3947
11
                .tag("max rowset version", max_rowset_version)
3948
11
                .tag("min rowset creation time", min_rowset_creation_time)
3949
11
                .tag("max rowset creation time", max_rowset_creation_time)
3950
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
3951
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
3952
11
                .tag("ret", ret);
3953
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3938
11
    DORIS_CLOUD_DEFER {
3939
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3940
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3941
11
                .tag("instance_id", instance_id_)
3942
11
                .tag("tablet_id", tablet_id)
3943
11
                .tag("recycle rowsets number", recycle_rowsets_number)
3944
11
                .tag("recycle segments number", recycle_segments_number)
3945
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3946
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3947
11
                .tag("max rowset version", max_rowset_version)
3948
11
                .tag("min rowset creation time", min_rowset_creation_time)
3949
11
                .tag("max rowset creation time", max_rowset_creation_time)
3950
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
3951
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
3952
11
                .tag("ret", ret);
3953
11
    };
3954
3955
11
    std::unique_ptr<Transaction> txn;
3956
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3957
0
        LOG_WARNING("failed to recycle tablet ")
3958
0
                .tag("tablet id", tablet_id)
3959
0
                .tag("instance_id", instance_id_)
3960
0
                .tag("reason", "failed to create txn");
3961
0
        ret = -1;
3962
0
    }
3963
3964
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
3965
    // by the related operation logs.
3966
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
3967
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
3968
11
    MetaReader meta_reader(instance_id_);
3969
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
3970
11
    if (err == TxnErrorCode::TXN_OK) {
3971
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
3972
11
    }
3973
11
    if (err != TxnErrorCode::TXN_OK) {
3974
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3975
0
                .tag("tablet id", tablet_id)
3976
0
                .tag("err", err)
3977
0
                .tag("instance id", instance_id_);
3978
0
        ret = -1;
3979
0
    }
3980
3981
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
3982
11
             load_rowset_metas.size(), compact_rowset_metas.size())
3983
11
            .tag("instance_id", instance_id_)
3984
11
            .tag("tablet_id", tablet_id);
3985
3986
11
    SyncExecutor<int> concurrent_delete_executor(
3987
11
            _thread_pool_group.s3_producer_pool,
3988
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
3989
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
3990
3991
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
3992
60
        recycle_rowsets_number += 1;
3993
60
        recycle_segments_number += rs_meta.num_segments();
3994
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
3995
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
3996
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
3997
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
3998
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
3999
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4000
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4001
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
3991
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
3992
60
        recycle_rowsets_number += 1;
3993
60
        recycle_segments_number += rs_meta.num_segments();
3994
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
3995
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
3996
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
3997
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
3998
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
3999
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4000
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4001
60
    };
4002
4003
11
    std::vector<RowsetDeleteTask> all_tasks;
4004
4005
11
    auto create_delete_task = [this](const RowsetMetaCloudPB& rs_meta, std::string_view recycle_key,
4006
11
                                     std::string_view non_versioned_rowset_key =
4007
60
                                             "") -> RowsetDeleteTask {
4008
60
        RowsetDeleteTask task;
4009
60
        task.rowset_meta = rs_meta;
4010
60
        task.recycle_rowset_key = std::string(recycle_key);
4011
60
        task.non_versioned_rowset_key = std::string(non_versioned_rowset_key);
4012
60
        task.versioned_rowset_key = versioned::meta_rowset_key(
4013
60
                {instance_id_, rs_meta.tablet_id(), rs_meta.rowset_id_v2()});
4014
60
        return task;
4015
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clERKNS_17RowsetMetaCloudPBESt17basic_string_viewIcSt11char_traitsIcEESB_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clERKNS_17RowsetMetaCloudPBESt17basic_string_viewIcSt11char_traitsIcEESB_
Line
Count
Source
4007
60
                                             "") -> RowsetDeleteTask {
4008
60
        RowsetDeleteTask task;
4009
60
        task.rowset_meta = rs_meta;
4010
60
        task.recycle_rowset_key = std::string(recycle_key);
4011
60
        task.non_versioned_rowset_key = std::string(non_versioned_rowset_key);
4012
60
        task.versioned_rowset_key = versioned::meta_rowset_key(
4013
60
                {instance_id_, rs_meta.tablet_id(), rs_meta.rowset_id_v2()});
4014
60
        return task;
4015
60
    };
4016
4017
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
4018
60
        update_rowset_stats(rs_meta);
4019
        // Version 0-1 rowset has no resource_id and no actual data files,
4020
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4021
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4022
60
        std::string rowset_load_key =
4023
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
4024
60
        std::string rowset_key = meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4025
60
        RowsetDeleteTask task = create_delete_task(
4026
60
                rs_meta, encode_versioned_key(rowset_load_key, versionstamp), rowset_key);
4027
60
        all_tasks.push_back(std::move(task));
4028
60
    }
4029
4030
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
4031
0
        update_rowset_stats(rs_meta);
4032
        // Version 0-1 rowset has no resource_id and no actual data files,
4033
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4034
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4035
0
        std::string rowset_compact_key = versioned::meta_rowset_compact_key(
4036
0
                {instance_id_, tablet_id, rs_meta.end_version()});
4037
0
        std::string rowset_key = meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4038
0
        RowsetDeleteTask task = create_delete_task(
4039
0
                rs_meta, encode_versioned_key(rowset_compact_key, versionstamp), rowset_key);
4040
0
        all_tasks.push_back(std::move(task));
4041
0
    }
4042
4043
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
4044
0
        RecycleRowsetPB recycle_rowset;
4045
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
4046
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4047
0
            return -1;
4048
0
        }
4049
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
4050
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
4051
                // in old version, keep this key-value pair and it needs to be checked manually
4052
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4053
0
                return -1;
4054
0
            }
4055
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
4056
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4057
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4058
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
4059
0
                return -1;
4060
0
            }
4061
            // decode rowset_id
4062
0
            auto k1 = k;
4063
0
            k1.remove_prefix(1);
4064
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4065
0
            decode_key(&k1, &out);
4066
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4067
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4068
0
            LOG_INFO("delete old-version rowset data")
4069
0
                    .tag("instance_id", instance_id_)
4070
0
                    .tag("tablet_id", tablet_id)
4071
0
                    .tag("rowset_id", rowset_id);
4072
4073
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
4074
            // so we must use prefix deletion directly instead of batch delete.
4075
0
            concurrent_delete_executor.add(
4076
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
4077
                        // delete by prefix, the recycle rowset key will be deleted by range later.
4078
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
4079
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
4080
0
        } else {
4081
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
4082
            // Version 0-1 rowset has no resource_id and no actual data files,
4083
            // but still needs ref_count key cleanup, so we add it to all_tasks.
4084
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
4085
0
            RowsetDeleteTask task = create_delete_task(rowset_meta, k);
4086
0
            all_tasks.push_back(std::move(task));
4087
0
        }
4088
0
        return 0;
4089
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_
4090
4091
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
4092
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
4093
0
                .tag("tablet id", tablet_id)
4094
0
                .tag("instance_id", instance_id_)
4095
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
4096
0
        ret = -1;
4097
0
    }
4098
4099
    // Phase 1: Classify tasks by ref_count
4100
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
4101
60
    for (auto& task : all_tasks) {
4102
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
4103
60
        if (classify_ret < 0) {
4104
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
4105
0
                    .tag("instance_id", instance_id_)
4106
0
                    .tag("tablet_id", tablet_id)
4107
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4108
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
4109
0
                return recycle_rowset_meta_and_data(t.recycle_rowset_key, t.rowset_meta,
4110
0
                                                    t.non_versioned_rowset_key);
4111
0
            });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_5clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_5clEv
4112
0
        }
4113
60
    }
4114
4115
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
4116
4117
11
    LOG_INFO("batch delete plan created")
4118
11
            .tag("instance_id", instance_id_)
4119
11
            .tag("tablet_id", tablet_id)
4120
11
            .tag("plan_count", batch_delete_tasks.size());
4121
4122
    // Phase 2: Execute batch delete using existing delete_rowset_data
4123
11
    if (!batch_delete_tasks.empty()) {
4124
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
4125
49
        for (const auto& task : batch_delete_tasks) {
4126
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
4127
49
            if (task.rowset_meta.resource_id().empty()) {
4128
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
4129
10
                        .tag("instance_id", instance_id_)
4130
10
                        .tag("tablet_id", tablet_id)
4131
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4132
10
                continue;
4133
10
            }
4134
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
4135
39
        }
4136
4137
        // Only call delete_rowset_data if there are rowsets with actual data to delete
4138
10
        bool delete_success = true;
4139
10
        if (!rowsets_to_delete.empty()) {
4140
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
4141
9
                                                         "batch_delete_versioned_tablet");
4142
9
            int delete_ret = delete_rowset_data(
4143
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
4144
9
            if (delete_ret != 0) {
4145
0
                LOG_WARNING("batch delete execution failed")
4146
0
                        .tag("instance_id", instance_id_)
4147
0
                        .tag("tablet_id", tablet_id);
4148
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
4149
0
                ret = -1;
4150
0
                delete_success = false;
4151
0
            }
4152
9
        }
4153
4154
        // Phase 3: Only cleanup metadata if data deletion succeeded.
4155
        // If deletion failed, keep recycle_rowset_key so next round will retry.
4156
10
        if (delete_success) {
4157
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
4158
10
            if (cleanup_ret != 0) {
4159
0
                LOG_WARNING("batch delete cleanup failed")
4160
0
                        .tag("instance_id", instance_id_)
4161
0
                        .tag("tablet_id", tablet_id);
4162
0
                ret = -1;
4163
0
            }
4164
10
        }
4165
10
    }
4166
4167
    // Always wait for fallback tasks to complete before returning
4168
11
    bool finished = true;
4169
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4170
11
    for (int r : rets) {
4171
0
        if (r != 0) {
4172
0
            ret = -1;
4173
0
        }
4174
0
    }
4175
4176
11
    ret = finished ? ret : -1;
4177
4178
11
    if (ret != 0) { // failed recycle tablet data
4179
0
        LOG_WARNING("recycle versioned tablet failed")
4180
0
                .tag("finished", finished)
4181
0
                .tag("ret", ret)
4182
0
                .tag("instance_id", instance_id_)
4183
0
                .tag("tablet_id", tablet_id);
4184
0
        return ret;
4185
0
    }
4186
4187
11
    tablet_metrics_context_.total_recycled_data_size +=
4188
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4189
11
    tablet_metrics_context_.total_recycled_num += 1;
4190
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4191
11
    segment_metrics_context_.total_recycled_data_size +=
4192
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4193
11
    metrics_context.total_recycled_data_size +=
4194
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4195
11
    tablet_metrics_context_.report();
4196
11
    segment_metrics_context_.report();
4197
11
    metrics_context.report();
4198
4199
11
    txn.reset();
4200
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4201
0
        LOG_WARNING("failed to recycle tablet ")
4202
0
                .tag("tablet id", tablet_id)
4203
0
                .tag("instance_id", instance_id_)
4204
0
                .tag("reason", "failed to create txn");
4205
0
        ret = -1;
4206
0
    }
4207
    // delete all rowset kv in this tablet
4208
11
    txn->remove(rs_key0, rs_key1);
4209
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4210
4211
    // remove delete bitmap for MoW table
4212
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4213
11
    txn->remove(pending_key);
4214
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4215
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4216
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4217
4218
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4219
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4220
11
    txn->remove(dbm_start_key, dbm_end_key);
4221
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4222
11
              << " end=" << hex(dbm_end_key);
4223
4224
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
4225
11
    std::string tablet_index_val;
4226
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
4227
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
4228
0
        LOG_WARNING("failed to get tablet index kv")
4229
0
                .tag("instance_id", instance_id_)
4230
0
                .tag("tablet_id", tablet_id)
4231
0
                .tag("err", err);
4232
0
        ret = -1;
4233
11
    } else if (err == TxnErrorCode::TXN_OK) {
4234
        // If the tablet index kv exists, we need to delete it
4235
10
        TabletIndexPB tablet_index_pb;
4236
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
4237
0
            LOG_WARNING("failed to parse tablet index pb")
4238
0
                    .tag("instance_id", instance_id_)
4239
0
                    .tag("tablet_id", tablet_id);
4240
0
            ret = -1;
4241
10
        } else {
4242
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
4243
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
4244
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
4245
10
            txn->remove(versioned_inverted_idx_key);
4246
10
            txn->remove(versioned_idx_key);
4247
10
        }
4248
10
    }
4249
4250
11
    err = txn->commit();
4251
11
    if (err != TxnErrorCode::TXN_OK) {
4252
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4253
0
        ret = -1;
4254
0
    }
4255
4256
11
    if (ret == 0) {
4257
        // All object files under tablet have been deleted
4258
11
        std::lock_guard lock(recycled_tablets_mtx_);
4259
11
        recycled_tablets_.insert(tablet_id);
4260
11
    }
4261
4262
11
    return ret;
4263
11
}
4264
4265
18
int InstanceRecycler::recycle_rowsets() {
4266
18
    if (should_recycle_versioned_keys()) {
4267
5
        return recycle_versioned_rowsets();
4268
5
    }
4269
4270
13
    const std::string task_name = "recycle_rowsets";
4271
13
    int64_t num_scanned = 0;
4272
13
    int64_t num_expired = 0;
4273
13
    int64_t num_prepare = 0;
4274
13
    int64_t num_compacted = 0;
4275
13
    int64_t num_empty_rowset = 0;
4276
13
    size_t total_rowset_key_size = 0;
4277
13
    size_t total_rowset_value_size = 0;
4278
13
    size_t expired_rowset_size = 0;
4279
13
    std::atomic_long num_recycled = 0;
4280
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4281
4282
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4283
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4284
13
    std::string recyc_rs_key0;
4285
13
    std::string recyc_rs_key1;
4286
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4287
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4288
4289
13
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4290
4291
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4292
13
    register_recycle_task(task_name, start_time);
4293
4294
13
    DORIS_CLOUD_DEFER {
4295
13
        unregister_recycle_task(task_name);
4296
13
        int64_t cost =
4297
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4298
13
        metrics_context.finish_report();
4299
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4300
13
                .tag("instance_id", instance_id_)
4301
13
                .tag("num_scanned", num_scanned)
4302
13
                .tag("num_expired", num_expired)
4303
13
                .tag("num_recycled", num_recycled)
4304
13
                .tag("num_recycled.prepare", num_prepare)
4305
13
                .tag("num_recycled.compacted", num_compacted)
4306
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4307
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4308
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4309
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
4310
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4294
13
    DORIS_CLOUD_DEFER {
4295
13
        unregister_recycle_task(task_name);
4296
13
        int64_t cost =
4297
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4298
13
        metrics_context.finish_report();
4299
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4300
13
                .tag("instance_id", instance_id_)
4301
13
                .tag("num_scanned", num_scanned)
4302
13
                .tag("num_expired", num_expired)
4303
13
                .tag("num_recycled", num_recycled)
4304
13
                .tag("num_recycled.prepare", num_prepare)
4305
13
                .tag("num_recycled.compacted", num_compacted)
4306
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4307
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4308
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4309
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
4310
13
    };
4311
4312
13
    std::vector<std::string> rowset_keys;
4313
    // rowset_id -> rowset_meta
4314
    // store rowset id and meta for statistics rs size when delete
4315
13
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
4316
4317
    // Store keys of rowset recycled by background workers
4318
13
    std::mutex async_recycled_rowset_keys_mutex;
4319
13
    std::vector<std::string> async_recycled_rowset_keys;
4320
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4321
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4322
13
    worker_pool->start();
4323
    // TODO bacth delete
4324
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4325
4.00k
        std::string dbm_start_key =
4326
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4327
4.00k
        std::string dbm_end_key = dbm_start_key;
4328
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4329
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4330
4.00k
        if (ret != 0) {
4331
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4332
0
                         << instance_id_;
4333
0
        }
4334
4.00k
        return ret;
4335
4.00k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4324
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4325
4.00k
        std::string dbm_start_key =
4326
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4327
4.00k
        std::string dbm_end_key = dbm_start_key;
4328
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4329
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4330
4.00k
        if (ret != 0) {
4331
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4332
0
                         << instance_id_;
4333
0
        }
4334
4.00k
        return ret;
4335
4.00k
    };
4336
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4337
900
                                            int64_t tablet_id, const std::string& rowset_id) {
4338
        // Try to delete rowset data in background thread
4339
900
        int ret = worker_pool->submit_with_timeout(
4340
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4341
801
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4342
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4343
0
                        return;
4344
0
                    }
4345
801
                    std::vector<std::string> keys;
4346
801
                    {
4347
801
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4348
801
                        async_recycled_rowset_keys.push_back(std::move(key));
4349
801
                        if (async_recycled_rowset_keys.size() > 100) {
4350
7
                            keys.swap(async_recycled_rowset_keys);
4351
7
                        }
4352
801
                    }
4353
801
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4354
801
                    if (keys.empty()) return;
4355
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4356
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4357
0
                                     << instance_id_;
4358
7
                    } else {
4359
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4360
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4361
7
                                           num_recycled, start_time);
4362
7
                    }
4363
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4340
801
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4341
801
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4342
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4343
0
                        return;
4344
0
                    }
4345
801
                    std::vector<std::string> keys;
4346
801
                    {
4347
801
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4348
801
                        async_recycled_rowset_keys.push_back(std::move(key));
4349
801
                        if (async_recycled_rowset_keys.size() > 100) {
4350
7
                            keys.swap(async_recycled_rowset_keys);
4351
7
                        }
4352
801
                    }
4353
801
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4354
801
                    if (keys.empty()) return;
4355
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4356
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4357
0
                                     << instance_id_;
4358
7
                    } else {
4359
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4360
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4361
7
                                           num_recycled, start_time);
4362
7
                    }
4363
7
                },
4364
900
                0);
4365
900
        if (ret == 0) return 0;
4366
        // Submit task failed, delete rowset data in current thread
4367
99
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4368
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4369
0
            return -1;
4370
0
        }
4371
99
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4372
0
            return -1;
4373
0
        }
4374
99
        rowset_keys.push_back(std::move(key));
4375
99
        return 0;
4376
99
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4337
900
                                            int64_t tablet_id, const std::string& rowset_id) {
4338
        // Try to delete rowset data in background thread
4339
900
        int ret = worker_pool->submit_with_timeout(
4340
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4341
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4342
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4343
900
                        return;
4344
900
                    }
4345
900
                    std::vector<std::string> keys;
4346
900
                    {
4347
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4348
900
                        async_recycled_rowset_keys.push_back(std::move(key));
4349
900
                        if (async_recycled_rowset_keys.size() > 100) {
4350
900
                            keys.swap(async_recycled_rowset_keys);
4351
900
                        }
4352
900
                    }
4353
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4354
900
                    if (keys.empty()) return;
4355
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4356
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4357
900
                                     << instance_id_;
4358
900
                    } else {
4359
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4360
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4361
900
                                           num_recycled, start_time);
4362
900
                    }
4363
900
                },
4364
900
                0);
4365
900
        if (ret == 0) return 0;
4366
        // Submit task failed, delete rowset data in current thread
4367
99
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4368
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4369
0
            return -1;
4370
0
        }
4371
99
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4372
0
            return -1;
4373
0
        }
4374
99
        rowset_keys.push_back(std::move(key));
4375
99
        return 0;
4376
99
    };
4377
4378
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4379
4380
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4381
4.00k
        ++num_scanned;
4382
4.00k
        total_rowset_key_size += k.size();
4383
4.00k
        total_rowset_value_size += v.size();
4384
4.00k
        RecycleRowsetPB rowset;
4385
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4386
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4387
0
            return -1;
4388
0
        }
4389
4390
4.00k
        int64_t current_time = ::time(nullptr);
4391
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4392
4393
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4394
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4395
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4396
4.00k
        if (current_time < expiration) { // not expired
4397
0
            return 0;
4398
0
        }
4399
4.00k
        ++num_expired;
4400
4.00k
        expired_rowset_size += v.size();
4401
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4402
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4403
                // in old version, keep this key-value pair and it needs to be checked manually
4404
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4405
0
                return -1;
4406
0
            }
4407
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4408
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4409
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4410
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4411
0
                rowset_keys.emplace_back(k);
4412
0
                return -1;
4413
0
            }
4414
            // decode rowset_id
4415
250
            auto k1 = k;
4416
250
            k1.remove_prefix(1);
4417
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4418
250
            decode_key(&k1, &out);
4419
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4420
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4421
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4422
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4423
250
                      << " task_type=" << metrics_context.operation_type;
4424
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4425
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4426
0
                return -1;
4427
0
            }
4428
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4429
250
            metrics_context.total_recycled_num++;
4430
250
            segment_metrics_context_.total_recycled_data_size +=
4431
250
                    rowset.rowset_meta().total_disk_size();
4432
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4433
250
            segment_metrics_context_.report();
4434
250
            metrics_context.report();
4435
250
            return 0;
4436
250
        }
4437
        // TODO(plat1ko): check rowset not referenced
4438
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
4439
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4440
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4441
0
                LOG_INFO("recycle rowset that has empty resource id");
4442
0
            } else {
4443
                // other situations, keep this key-value pair and it needs to be checked manually
4444
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4445
0
                return -1;
4446
0
            }
4447
0
        }
4448
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4449
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4450
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4451
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4452
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4453
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4454
3.75k
                  << " rowset_meta_size=" << v.size()
4455
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4456
3.75k
                  << " task_type=" << metrics_context.operation_type;
4457
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4458
            // unable to calculate file path, can only be deleted by rowset id prefix
4459
650
            num_prepare += 1;
4460
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4461
650
                                             rowset_meta->tablet_id(),
4462
650
                                             rowset_meta->rowset_id_v2()) != 0) {
4463
0
                return -1;
4464
0
            }
4465
3.10k
        } else {
4466
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4467
3.10k
            rowset_keys.emplace_back(k);
4468
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4469
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4470
3.10k
                ++num_empty_rowset;
4471
3.10k
            }
4472
3.10k
        }
4473
3.75k
        return 0;
4474
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4380
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4381
4.00k
        ++num_scanned;
4382
4.00k
        total_rowset_key_size += k.size();
4383
4.00k
        total_rowset_value_size += v.size();
4384
4.00k
        RecycleRowsetPB rowset;
4385
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4386
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4387
0
            return -1;
4388
0
        }
4389
4390
4.00k
        int64_t current_time = ::time(nullptr);
4391
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4392
4393
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4394
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4395
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4396
4.00k
        if (current_time < expiration) { // not expired
4397
0
            return 0;
4398
0
        }
4399
4.00k
        ++num_expired;
4400
4.00k
        expired_rowset_size += v.size();
4401
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4402
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4403
                // in old version, keep this key-value pair and it needs to be checked manually
4404
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4405
0
                return -1;
4406
0
            }
4407
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4408
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4409
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4410
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4411
0
                rowset_keys.emplace_back(k);
4412
0
                return -1;
4413
0
            }
4414
            // decode rowset_id
4415
250
            auto k1 = k;
4416
250
            k1.remove_prefix(1);
4417
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4418
250
            decode_key(&k1, &out);
4419
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4420
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4421
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4422
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4423
250
                      << " task_type=" << metrics_context.operation_type;
4424
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4425
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4426
0
                return -1;
4427
0
            }
4428
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4429
250
            metrics_context.total_recycled_num++;
4430
250
            segment_metrics_context_.total_recycled_data_size +=
4431
250
                    rowset.rowset_meta().total_disk_size();
4432
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4433
250
            segment_metrics_context_.report();
4434
250
            metrics_context.report();
4435
250
            return 0;
4436
250
        }
4437
        // TODO(plat1ko): check rowset not referenced
4438
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
4439
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4440
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4441
0
                LOG_INFO("recycle rowset that has empty resource id");
4442
0
            } else {
4443
                // other situations, keep this key-value pair and it needs to be checked manually
4444
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4445
0
                return -1;
4446
0
            }
4447
0
        }
4448
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4449
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4450
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4451
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4452
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4453
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4454
3.75k
                  << " rowset_meta_size=" << v.size()
4455
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4456
3.75k
                  << " task_type=" << metrics_context.operation_type;
4457
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4458
            // unable to calculate file path, can only be deleted by rowset id prefix
4459
650
            num_prepare += 1;
4460
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4461
650
                                             rowset_meta->tablet_id(),
4462
650
                                             rowset_meta->rowset_id_v2()) != 0) {
4463
0
                return -1;
4464
0
            }
4465
3.10k
        } else {
4466
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4467
3.10k
            rowset_keys.emplace_back(k);
4468
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4469
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4470
3.10k
                ++num_empty_rowset;
4471
3.10k
            }
4472
3.10k
        }
4473
3.75k
        return 0;
4474
3.75k
    };
4475
4476
21
    auto loop_done = [&]() -> int {
4477
21
        std::vector<std::string> rowset_keys_to_delete;
4478
        // rowset_id -> rowset_meta
4479
        // store rowset id and meta for statistics rs size when delete
4480
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4481
21
        rowset_keys_to_delete.swap(rowset_keys);
4482
21
        rowsets_to_delete.swap(rowsets);
4483
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4484
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4485
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4486
21
                                   metrics_context) != 0) {
4487
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4488
0
                return;
4489
0
            }
4490
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4491
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4492
0
                    return;
4493
0
                }
4494
3.10k
            }
4495
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4496
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4497
0
                return;
4498
0
            }
4499
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4500
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4484
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4485
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4486
21
                                   metrics_context) != 0) {
4487
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4488
0
                return;
4489
0
            }
4490
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4491
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4492
0
                    return;
4493
0
                }
4494
3.10k
            }
4495
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4496
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4497
0
                return;
4498
0
            }
4499
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4500
21
        });
4501
21
        return 0;
4502
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4476
21
    auto loop_done = [&]() -> int {
4477
21
        std::vector<std::string> rowset_keys_to_delete;
4478
        // rowset_id -> rowset_meta
4479
        // store rowset id and meta for statistics rs size when delete
4480
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4481
21
        rowset_keys_to_delete.swap(rowset_keys);
4482
21
        rowsets_to_delete.swap(rowsets);
4483
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4484
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4485
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4486
21
                                   metrics_context) != 0) {
4487
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4488
21
                return;
4489
21
            }
4490
21
            for (const auto& [_, rs] : rowsets_to_delete) {
4491
21
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4492
21
                    return;
4493
21
                }
4494
21
            }
4495
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4496
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4497
21
                return;
4498
21
            }
4499
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4500
21
        });
4501
21
        return 0;
4502
21
    };
4503
4504
13
    if (config::enable_recycler_stats_metrics) {
4505
0
        scan_and_statistics_rowsets();
4506
0
    }
4507
    // recycle_func and loop_done for scan and recycle
4508
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4509
13
                               std::move(loop_done));
4510
4511
13
    worker_pool->stop();
4512
4513
13
    if (!async_recycled_rowset_keys.empty()) {
4514
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
4515
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4516
0
            return -1;
4517
2
        } else {
4518
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
4519
2
        }
4520
2
    }
4521
13
    return ret;
4522
13
}
4523
4524
13
int InstanceRecycler::recycle_restore_jobs() {
4525
13
    const std::string task_name = "recycle_restore_jobs";
4526
13
    int64_t num_scanned = 0;
4527
13
    int64_t num_expired = 0;
4528
13
    int64_t num_recycled = 0;
4529
13
    int64_t num_aborted = 0;
4530
4531
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4532
4533
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
4534
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
4535
13
    std::string restore_job_key0;
4536
13
    std::string restore_job_key1;
4537
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
4538
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
4539
4540
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
4541
4542
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4543
13
    register_recycle_task(task_name, start_time);
4544
4545
13
    DORIS_CLOUD_DEFER {
4546
13
        unregister_recycle_task(task_name);
4547
13
        int64_t cost =
4548
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4549
13
        metrics_context.finish_report();
4550
4551
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4552
13
                .tag("instance_id", instance_id_)
4553
13
                .tag("num_scanned", num_scanned)
4554
13
                .tag("num_expired", num_expired)
4555
13
                .tag("num_recycled", num_recycled)
4556
13
                .tag("num_aborted", num_aborted);
4557
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
4545
13
    DORIS_CLOUD_DEFER {
4546
13
        unregister_recycle_task(task_name);
4547
13
        int64_t cost =
4548
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4549
13
        metrics_context.finish_report();
4550
4551
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4552
13
                .tag("instance_id", instance_id_)
4553
13
                .tag("num_scanned", num_scanned)
4554
13
                .tag("num_expired", num_expired)
4555
13
                .tag("num_recycled", num_recycled)
4556
13
                .tag("num_aborted", num_aborted);
4557
13
    };
4558
4559
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4560
4561
13
    std::vector<std::string_view> restore_job_keys;
4562
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4563
41
        ++num_scanned;
4564
41
        RestoreJobCloudPB restore_job_pb;
4565
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4566
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4567
0
            return -1;
4568
0
        }
4569
41
        int64_t expiration =
4570
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4571
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4572
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4573
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
4574
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
4575
0
                   << " state=" << restore_job_pb.state();
4576
41
        int64_t current_time = ::time(nullptr);
4577
41
        if (current_time < expiration) { // not expired
4578
0
            return 0;
4579
0
        }
4580
41
        ++num_expired;
4581
4582
41
        int64_t tablet_id = restore_job_pb.tablet_id();
4583
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
4584
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
4585
4586
41
        std::unique_ptr<Transaction> txn;
4587
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4588
41
        if (err != TxnErrorCode::TXN_OK) {
4589
0
            LOG_WARNING("failed to recycle restore job")
4590
0
                    .tag("err", err)
4591
0
                    .tag("tablet id", tablet_id)
4592
0
                    .tag("instance_id", instance_id_)
4593
0
                    .tag("reason", "failed to create txn");
4594
0
            return -1;
4595
0
        }
4596
4597
41
        std::string val;
4598
41
        err = txn->get(k, &val);
4599
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
4600
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
4601
0
            return 0;
4602
0
        }
4603
41
        if (err != TxnErrorCode::TXN_OK) {
4604
0
            LOG_WARNING("failed to get kv");
4605
0
            return -1;
4606
0
        }
4607
41
        restore_job_pb.Clear();
4608
41
        if (!restore_job_pb.ParseFromString(val)) {
4609
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
4610
0
            return -1;
4611
0
        }
4612
4613
        // PREPARED or COMMITTED, change state to DROPPED and return
4614
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
4615
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
4616
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
4617
0
            restore_job_pb.set_need_recycle_data(true);
4618
0
            txn->put(k, restore_job_pb.SerializeAsString());
4619
0
            err = txn->commit();
4620
0
            if (err != TxnErrorCode::TXN_OK) {
4621
0
                LOG_WARNING("failed to commit txn: {}", err);
4622
0
                return -1;
4623
0
            }
4624
0
            num_aborted++;
4625
0
            return 0;
4626
0
        }
4627
4628
        // Change state to RECYCLING
4629
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
4630
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
4631
21
            txn->put(k, restore_job_pb.SerializeAsString());
4632
21
            err = txn->commit();
4633
21
            if (err != TxnErrorCode::TXN_OK) {
4634
0
                LOG_WARNING("failed to commit txn: {}", err);
4635
0
                return -1;
4636
0
            }
4637
21
            return 0;
4638
21
        }
4639
4640
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4641
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4642
4643
        // Recycle all data associated with the restore job.
4644
        // This includes rowsets, segments, and related resources.
4645
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
4646
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
4647
0
            LOG_WARNING("failed to recycle tablet")
4648
0
                    .tag("tablet_id", tablet_id)
4649
0
                    .tag("instance_id", instance_id_);
4650
0
            return -1;
4651
0
        }
4652
4653
        // delete all restore job rowset kv
4654
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4655
4656
20
        err = txn->commit();
4657
20
        if (err != TxnErrorCode::TXN_OK) {
4658
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
4659
0
                    .tag("err", err)
4660
0
                    .tag("tablet id", tablet_id)
4661
0
                    .tag("instance_id", instance_id_)
4662
0
                    .tag("reason", "failed to commit txn");
4663
0
            return -1;
4664
0
        }
4665
4666
20
        metrics_context.total_recycled_num = ++num_recycled;
4667
20
        metrics_context.report();
4668
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4669
20
        restore_job_keys.push_back(k);
4670
4671
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
4672
20
                  << " tablet_id=" << tablet_id;
4673
20
        return 0;
4674
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4562
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4563
41
        ++num_scanned;
4564
41
        RestoreJobCloudPB restore_job_pb;
4565
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4566
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4567
0
            return -1;
4568
0
        }
4569
41
        int64_t expiration =
4570
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4571
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4572
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4573
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
4574
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
4575
0
                   << " state=" << restore_job_pb.state();
4576
41
        int64_t current_time = ::time(nullptr);
4577
41
        if (current_time < expiration) { // not expired
4578
0
            return 0;
4579
0
        }
4580
41
        ++num_expired;
4581
4582
41
        int64_t tablet_id = restore_job_pb.tablet_id();
4583
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
4584
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
4585
4586
41
        std::unique_ptr<Transaction> txn;
4587
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4588
41
        if (err != TxnErrorCode::TXN_OK) {
4589
0
            LOG_WARNING("failed to recycle restore job")
4590
0
                    .tag("err", err)
4591
0
                    .tag("tablet id", tablet_id)
4592
0
                    .tag("instance_id", instance_id_)
4593
0
                    .tag("reason", "failed to create txn");
4594
0
            return -1;
4595
0
        }
4596
4597
41
        std::string val;
4598
41
        err = txn->get(k, &val);
4599
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
4600
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
4601
0
            return 0;
4602
0
        }
4603
41
        if (err != TxnErrorCode::TXN_OK) {
4604
0
            LOG_WARNING("failed to get kv");
4605
0
            return -1;
4606
0
        }
4607
41
        restore_job_pb.Clear();
4608
41
        if (!restore_job_pb.ParseFromString(val)) {
4609
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
4610
0
            return -1;
4611
0
        }
4612
4613
        // PREPARED or COMMITTED, change state to DROPPED and return
4614
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
4615
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
4616
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
4617
0
            restore_job_pb.set_need_recycle_data(true);
4618
0
            txn->put(k, restore_job_pb.SerializeAsString());
4619
0
            err = txn->commit();
4620
0
            if (err != TxnErrorCode::TXN_OK) {
4621
0
                LOG_WARNING("failed to commit txn: {}", err);
4622
0
                return -1;
4623
0
            }
4624
0
            num_aborted++;
4625
0
            return 0;
4626
0
        }
4627
4628
        // Change state to RECYCLING
4629
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
4630
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
4631
21
            txn->put(k, restore_job_pb.SerializeAsString());
4632
21
            err = txn->commit();
4633
21
            if (err != TxnErrorCode::TXN_OK) {
4634
0
                LOG_WARNING("failed to commit txn: {}", err);
4635
0
                return -1;
4636
0
            }
4637
21
            return 0;
4638
21
        }
4639
4640
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4641
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4642
4643
        // Recycle all data associated with the restore job.
4644
        // This includes rowsets, segments, and related resources.
4645
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
4646
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
4647
0
            LOG_WARNING("failed to recycle tablet")
4648
0
                    .tag("tablet_id", tablet_id)
4649
0
                    .tag("instance_id", instance_id_);
4650
0
            return -1;
4651
0
        }
4652
4653
        // delete all restore job rowset kv
4654
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4655
4656
20
        err = txn->commit();
4657
20
        if (err != TxnErrorCode::TXN_OK) {
4658
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
4659
0
                    .tag("err", err)
4660
0
                    .tag("tablet id", tablet_id)
4661
0
                    .tag("instance_id", instance_id_)
4662
0
                    .tag("reason", "failed to commit txn");
4663
0
            return -1;
4664
0
        }
4665
4666
20
        metrics_context.total_recycled_num = ++num_recycled;
4667
20
        metrics_context.report();
4668
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4669
20
        restore_job_keys.push_back(k);
4670
4671
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
4672
20
                  << " tablet_id=" << tablet_id;
4673
20
        return 0;
4674
20
    };
4675
4676
13
    auto loop_done = [&restore_job_keys, this]() -> int {
4677
3
        if (restore_job_keys.empty()) return 0;
4678
1
        DORIS_CLOUD_DEFER {
4679
1
            restore_job_keys.clear();
4680
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4678
1
        DORIS_CLOUD_DEFER {
4679
1
            restore_job_keys.clear();
4680
1
        };
4681
4682
1
        std::unique_ptr<Transaction> txn;
4683
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4684
1
        if (err != TxnErrorCode::TXN_OK) {
4685
0
            LOG_WARNING("failed to recycle restore job")
4686
0
                    .tag("err", err)
4687
0
                    .tag("instance_id", instance_id_)
4688
0
                    .tag("reason", "failed to create txn");
4689
0
            return -1;
4690
0
        }
4691
20
        for (auto& k : restore_job_keys) {
4692
20
            txn->remove(k);
4693
20
        }
4694
1
        err = txn->commit();
4695
1
        if (err != TxnErrorCode::TXN_OK) {
4696
0
            LOG_WARNING("failed to recycle restore job")
4697
0
                    .tag("err", err)
4698
0
                    .tag("instance_id", instance_id_)
4699
0
                    .tag("reason", "failed to commit txn");
4700
0
            return -1;
4701
0
        }
4702
1
        return 0;
4703
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
4676
3
    auto loop_done = [&restore_job_keys, this]() -> int {
4677
3
        if (restore_job_keys.empty()) return 0;
4678
1
        DORIS_CLOUD_DEFER {
4679
1
            restore_job_keys.clear();
4680
1
        };
4681
4682
1
        std::unique_ptr<Transaction> txn;
4683
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4684
1
        if (err != TxnErrorCode::TXN_OK) {
4685
0
            LOG_WARNING("failed to recycle restore job")
4686
0
                    .tag("err", err)
4687
0
                    .tag("instance_id", instance_id_)
4688
0
                    .tag("reason", "failed to create txn");
4689
0
            return -1;
4690
0
        }
4691
20
        for (auto& k : restore_job_keys) {
4692
20
            txn->remove(k);
4693
20
        }
4694
1
        err = txn->commit();
4695
1
        if (err != TxnErrorCode::TXN_OK) {
4696
0
            LOG_WARNING("failed to recycle restore job")
4697
0
                    .tag("err", err)
4698
0
                    .tag("instance_id", instance_id_)
4699
0
                    .tag("reason", "failed to commit txn");
4700
0
            return -1;
4701
0
        }
4702
1
        return 0;
4703
1
    };
4704
4705
13
    if (config::enable_recycler_stats_metrics) {
4706
0
        scan_and_statistics_restore_jobs();
4707
0
    }
4708
4709
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
4710
13
                            std::move(loop_done));
4711
13
}
4712
4713
8
int InstanceRecycler::recycle_versioned_rowsets() {
4714
8
    const std::string task_name = "recycle_rowsets";
4715
8
    int64_t num_scanned = 0;
4716
8
    int64_t num_expired = 0;
4717
8
    int64_t num_prepare = 0;
4718
8
    int64_t num_compacted = 0;
4719
8
    int64_t num_empty_rowset = 0;
4720
8
    size_t total_rowset_key_size = 0;
4721
8
    size_t total_rowset_value_size = 0;
4722
8
    size_t expired_rowset_size = 0;
4723
8
    std::atomic_long num_recycled = 0;
4724
8
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4725
4726
8
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4727
8
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4728
8
    std::string recyc_rs_key0;
4729
8
    std::string recyc_rs_key1;
4730
8
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4731
8
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4732
4733
8
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4734
4735
8
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4736
8
    register_recycle_task(task_name, start_time);
4737
4738
8
    DORIS_CLOUD_DEFER {
4739
8
        unregister_recycle_task(task_name);
4740
8
        int64_t cost =
4741
8
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4742
8
        metrics_context.finish_report();
4743
8
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4744
8
                .tag("instance_id", instance_id_)
4745
8
                .tag("num_scanned", num_scanned)
4746
8
                .tag("num_expired", num_expired)
4747
8
                .tag("num_recycled", num_recycled)
4748
8
                .tag("num_recycled.prepare", num_prepare)
4749
8
                .tag("num_recycled.compacted", num_compacted)
4750
8
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4751
8
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4752
8
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4753
8
                .tag("expired_rowset_meta_size", expired_rowset_size);
4754
8
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
4738
8
    DORIS_CLOUD_DEFER {
4739
8
        unregister_recycle_task(task_name);
4740
8
        int64_t cost =
4741
8
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4742
8
        metrics_context.finish_report();
4743
8
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4744
8
                .tag("instance_id", instance_id_)
4745
8
                .tag("num_scanned", num_scanned)
4746
8
                .tag("num_expired", num_expired)
4747
8
                .tag("num_recycled", num_recycled)
4748
8
                .tag("num_recycled.prepare", num_prepare)
4749
8
                .tag("num_recycled.compacted", num_compacted)
4750
8
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4751
8
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4752
8
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4753
8
                .tag("expired_rowset_meta_size", expired_rowset_size);
4754
8
    };
4755
4756
8
    std::vector<std::string> orphan_rowset_keys;
4757
4758
    // Store keys of rowset recycled by background workers
4759
8
    std::mutex async_recycled_rowset_keys_mutex;
4760
8
    std::vector<std::string> async_recycled_rowset_keys;
4761
8
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4762
8
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4763
8
    worker_pool->start();
4764
8
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4765
200
                                            int64_t tablet_id, const std::string& rowset_id) {
4766
        // Try to delete rowset data in background thread
4767
200
        int ret = worker_pool->submit_with_timeout(
4768
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4769
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4770
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4771
200
                        return;
4772
200
                    }
4773
                    // The async recycled rowsets are staled format or has not been used,
4774
                    // so we don't need to check the rowset ref count key.
4775
0
                    std::vector<std::string> keys;
4776
0
                    {
4777
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4778
0
                        async_recycled_rowset_keys.push_back(std::move(key));
4779
0
                        if (async_recycled_rowset_keys.size() > 100) {
4780
0
                            keys.swap(async_recycled_rowset_keys);
4781
0
                        }
4782
0
                    }
4783
0
                    if (keys.empty()) return;
4784
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
0
                                     << instance_id_;
4787
0
                    } else {
4788
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
0
                                           num_recycled, start_time);
4791
0
                    }
4792
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4768
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4769
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4770
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4771
200
                        return;
4772
200
                    }
4773
                    // The async recycled rowsets are staled format or has not been used,
4774
                    // so we don't need to check the rowset ref count key.
4775
0
                    std::vector<std::string> keys;
4776
0
                    {
4777
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4778
0
                        async_recycled_rowset_keys.push_back(std::move(key));
4779
0
                        if (async_recycled_rowset_keys.size() > 100) {
4780
0
                            keys.swap(async_recycled_rowset_keys);
4781
0
                        }
4782
0
                    }
4783
0
                    if (keys.empty()) return;
4784
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
0
                                     << instance_id_;
4787
0
                    } else {
4788
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
0
                                           num_recycled, start_time);
4791
0
                    }
4792
0
                },
4793
200
                0);
4794
200
        if (ret == 0) return 0;
4795
        // Submit task failed, delete rowset data in current thread
4796
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4797
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4798
0
            return -1;
4799
0
        }
4800
0
        orphan_rowset_keys.push_back(std::move(key));
4801
0
        return 0;
4802
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4765
200
                                            int64_t tablet_id, const std::string& rowset_id) {
4766
        // Try to delete rowset data in background thread
4767
200
        int ret = worker_pool->submit_with_timeout(
4768
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4769
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4770
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4771
200
                        return;
4772
200
                    }
4773
                    // The async recycled rowsets are staled format or has not been used,
4774
                    // so we don't need to check the rowset ref count key.
4775
200
                    std::vector<std::string> keys;
4776
200
                    {
4777
200
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4778
200
                        async_recycled_rowset_keys.push_back(std::move(key));
4779
200
                        if (async_recycled_rowset_keys.size() > 100) {
4780
200
                            keys.swap(async_recycled_rowset_keys);
4781
200
                        }
4782
200
                    }
4783
200
                    if (keys.empty()) return;
4784
200
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
200
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
200
                                     << instance_id_;
4787
200
                    } else {
4788
200
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
200
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
200
                                           num_recycled, start_time);
4791
200
                    }
4792
200
                },
4793
200
                0);
4794
200
        if (ret == 0) return 0;
4795
        // Submit task failed, delete rowset data in current thread
4796
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4797
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4798
0
            return -1;
4799
0
        }
4800
0
        orphan_rowset_keys.push_back(std::move(key));
4801
0
        return 0;
4802
0
    };
4803
4804
8
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4805
4806
1.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
4807
1.01k
        ++num_scanned;
4808
1.01k
        total_rowset_key_size += k.size();
4809
1.01k
        total_rowset_value_size += v.size();
4810
1.01k
        RecycleRowsetPB rowset;
4811
1.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4812
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4813
0
            return -1;
4814
0
        }
4815
4816
1.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4817
4818
1.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4819
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
4820
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4821
1.01k
        int64_t current_time = ::time(nullptr);
4822
1.01k
        if (current_time < final_expiration) { // not expired
4823
0
            return 0;
4824
0
        }
4825
1.01k
        ++num_expired;
4826
1.01k
        expired_rowset_size += v.size();
4827
1.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4828
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4829
                // in old version, keep this key-value pair and it needs to be checked manually
4830
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4831
0
                return -1;
4832
0
            }
4833
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4834
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4835
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4836
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4837
0
                orphan_rowset_keys.emplace_back(k);
4838
0
                return -1;
4839
0
            }
4840
            // decode rowset_id
4841
0
            auto k1 = k;
4842
0
            k1.remove_prefix(1);
4843
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4844
0
            decode_key(&k1, &out);
4845
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4846
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4847
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4848
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
4849
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4850
0
                                             rowset.tablet_id(), rowset_id) != 0) {
4851
0
                return -1;
4852
0
            }
4853
0
            return 0;
4854
0
        }
4855
        // TODO(plat1ko): check rowset not referenced
4856
1.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
4857
1.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4858
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4859
0
                LOG_INFO("recycle rowset that has empty resource id");
4860
0
            } else {
4861
                // other situations, keep this key-value pair and it needs to be checked manually
4862
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4863
0
                return -1;
4864
0
            }
4865
0
        }
4866
1.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4867
1.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
4868
1.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4869
1.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4870
1.01k
                  << "] txn_id=" << rowset_meta->txn_id()
4871
1.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4872
1.01k
                  << " rowset_meta_size=" << v.size()
4873
1.01k
                  << " creation_time=" << rowset_meta->creation_time();
4874
1.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4875
            // unable to calculate file path, can only be deleted by rowset id prefix
4876
200
            num_prepare += 1;
4877
200
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4878
200
                                             rowset_meta->tablet_id(),
4879
200
                                             rowset_meta->rowset_id_v2()) != 0) {
4880
0
                return -1;
4881
0
            }
4882
813
        } else {
4883
813
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
4884
813
            worker_pool->submit(
4885
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
4886
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
4887
800
                            return;
4888
800
                        }
4889
13
                        num_compacted += is_compacted;
4890
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
4891
13
                        if (rowset_meta.num_segments() == 0) {
4892
0
                            ++num_empty_rowset;
4893
0
                        }
4894
13
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
4885
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
4886
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
4887
800
                            return;
4888
800
                        }
4889
13
                        num_compacted += is_compacted;
4890
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
4891
13
                        if (rowset_meta.num_segments() == 0) {
4892
0
                            ++num_empty_rowset;
4893
0
                        }
4894
13
                    });
4895
813
        }
4896
1.01k
        return 0;
4897
1.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4806
1.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
4807
1.01k
        ++num_scanned;
4808
1.01k
        total_rowset_key_size += k.size();
4809
1.01k
        total_rowset_value_size += v.size();
4810
1.01k
        RecycleRowsetPB rowset;
4811
1.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4812
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4813
0
            return -1;
4814
0
        }
4815
4816
1.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4817
4818
1.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4819
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
4820
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4821
1.01k
        int64_t current_time = ::time(nullptr);
4822
1.01k
        if (current_time < final_expiration) { // not expired
4823
0
            return 0;
4824
0
        }
4825
1.01k
        ++num_expired;
4826
1.01k
        expired_rowset_size += v.size();
4827
1.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4828
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4829
                // in old version, keep this key-value pair and it needs to be checked manually
4830
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4831
0
                return -1;
4832
0
            }
4833
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4834
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4835
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4836
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4837
0
                orphan_rowset_keys.emplace_back(k);
4838
0
                return -1;
4839
0
            }
4840
            // decode rowset_id
4841
0
            auto k1 = k;
4842
0
            k1.remove_prefix(1);
4843
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4844
0
            decode_key(&k1, &out);
4845
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4846
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4847
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4848
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
4849
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4850
0
                                             rowset.tablet_id(), rowset_id) != 0) {
4851
0
                return -1;
4852
0
            }
4853
0
            return 0;
4854
0
        }
4855
        // TODO(plat1ko): check rowset not referenced
4856
1.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
4857
1.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4858
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4859
0
                LOG_INFO("recycle rowset that has empty resource id");
4860
0
            } else {
4861
                // other situations, keep this key-value pair and it needs to be checked manually
4862
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4863
0
                return -1;
4864
0
            }
4865
0
        }
4866
1.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4867
1.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
4868
1.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4869
1.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4870
1.01k
                  << "] txn_id=" << rowset_meta->txn_id()
4871
1.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4872
1.01k
                  << " rowset_meta_size=" << v.size()
4873
1.01k
                  << " creation_time=" << rowset_meta->creation_time();
4874
1.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4875
            // unable to calculate file path, can only be deleted by rowset id prefix
4876
200
            num_prepare += 1;
4877
200
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4878
200
                                             rowset_meta->tablet_id(),
4879
200
                                             rowset_meta->rowset_id_v2()) != 0) {
4880
0
                return -1;
4881
0
            }
4882
813
        } else {
4883
813
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
4884
813
            worker_pool->submit(
4885
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
4886
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
4887
813
                            return;
4888
813
                        }
4889
813
                        num_compacted += is_compacted;
4890
813
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
4891
813
                        if (rowset_meta.num_segments() == 0) {
4892
813
                            ++num_empty_rowset;
4893
813
                        }
4894
813
                    });
4895
813
        }
4896
1.01k
        return 0;
4897
1.01k
    };
4898
4899
8
    if (config::enable_recycler_stats_metrics) {
4900
0
        scan_and_statistics_rowsets();
4901
0
    }
4902
4903
8
    auto loop_done = [&]() -> int {
4904
5
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
4905
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4906
0
        }
4907
5
        orphan_rowset_keys.clear();
4908
5
        return 0;
4909
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
4903
5
    auto loop_done = [&]() -> int {
4904
5
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
4905
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4906
0
        }
4907
5
        orphan_rowset_keys.clear();
4908
5
        return 0;
4909
5
    };
4910
4911
    // recycle_func and loop_done for scan and recycle
4912
8
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4913
8
                               std::move(loop_done));
4914
4915
8
    worker_pool->stop();
4916
4917
8
    if (!async_recycled_rowset_keys.empty()) {
4918
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
4919
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4920
0
            return -1;
4921
0
        } else {
4922
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
4923
0
        }
4924
0
    }
4925
8
    return ret;
4926
8
}
4927
4928
int InstanceRecycler::recycle_rowset_meta_and_data(std::string_view recycle_rowset_key,
4929
                                                   const RowsetMetaCloudPB& rowset_meta,
4930
813
                                                   std::string_view non_versioned_rowset_key) {
4931
813
    constexpr int MAX_RETRY = 10;
4932
813
    int64_t tablet_id = rowset_meta.tablet_id();
4933
813
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
4934
813
    std::string_view reference_instance_id = instance_id_;
4935
813
    if (rowset_meta.has_reference_instance_id()) {
4936
8
        reference_instance_id = rowset_meta.reference_instance_id();
4937
8
    }
4938
4939
813
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
4940
813
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
4941
813
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(recycle_rowset_key));
4942
813
    AnnotateTag instance_id_tag("instance_id", instance_id_);
4943
813
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
4944
813
    for (int i = 0; i < MAX_RETRY; ++i) {
4945
813
        std::unique_ptr<Transaction> txn;
4946
813
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4947
813
        if (err != TxnErrorCode::TXN_OK) {
4948
0
            LOG_WARNING("failed to create txn").tag("err", err);
4949
0
            return -1;
4950
0
        }
4951
4952
813
        std::string rowset_ref_count_key =
4953
813
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
4954
813
        int64_t ref_count = 0;
4955
813
        {
4956
813
            std::string value;
4957
813
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
4958
813
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
4959
                // This is the old version rowset, we could recycle it directly.
4960
802
                ref_count = 1;
4961
802
            } else if (err != TxnErrorCode::TXN_OK) {
4962
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
4963
0
                return -1;
4964
11
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
4965
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
4966
0
                return -1;
4967
0
            }
4968
813
        }
4969
4970
813
        if (ref_count == 1) {
4971
            // It would not be added since it is recycling.
4972
810
            if (delete_rowset_data(rowset_meta) != 0) {
4973
800
                LOG_WARNING("failed to delete rowset data");
4974
800
                return -1;
4975
800
            }
4976
4977
            // Reset the transaction to avoid timeout.
4978
10
            err = txn_kv_->create_txn(&txn);
4979
10
            if (err != TxnErrorCode::TXN_OK) {
4980
0
                LOG_WARNING("failed to create txn").tag("err", err);
4981
0
                return -1;
4982
0
            }
4983
10
            txn->remove(rowset_ref_count_key);
4984
10
            LOG_INFO("delete rowset data ref count key")
4985
10
                    .tag("txn_id", rowset_meta.txn_id())
4986
10
                    .tag("ref_count_key", hex(rowset_ref_count_key));
4987
4988
10
            std::string dbm_start_key =
4989
10
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
4990
10
            std::string dbm_end_key = meta_delete_bitmap_key(
4991
10
                    {reference_instance_id, tablet_id, rowset_id,
4992
10
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
4993
10
            txn->remove(dbm_start_key, dbm_end_key);
4994
10
            LOG_INFO("remove delete bitmap kv")
4995
10
                    .tag("begin", hex(dbm_start_key))
4996
10
                    .tag("end", hex(dbm_end_key));
4997
4998
10
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
4999
10
                    {reference_instance_id, tablet_id, rowset_id});
5000
10
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
5001
10
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
5002
10
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5003
10
            LOG_INFO("remove versioned delete bitmap kv")
5004
10
                    .tag("begin", hex(versioned_dbm_start_key))
5005
10
                    .tag("end", hex(versioned_dbm_end_key));
5006
5007
10
            std::string meta_rowset_key_begin =
5008
10
                    versioned::meta_rowset_key({reference_instance_id, tablet_id, rowset_id});
5009
10
            std::string meta_rowset_key_end = meta_rowset_key_begin;
5010
10
            encode_int64(INT64_MAX, &meta_rowset_key_end);
5011
10
            txn->remove(meta_rowset_key_begin, meta_rowset_key_end);
5012
10
            LOG_INFO("remove meta rowset key").tag("key", hex(meta_rowset_key_begin));
5013
10
        } else {
5014
            // Decrease the rowset ref count.
5015
            //
5016
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
5017
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
5018
3
            txn->atomic_add(rowset_ref_count_key, -1);
5019
3
            LOG_INFO("decrease rowset data ref count")
5020
3
                    .tag("txn_id", rowset_meta.txn_id())
5021
3
                    .tag("ref_count", ref_count - 1)
5022
3
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5023
3
        }
5024
5025
13
        if (!recycle_rowset_key.empty()) { // empty when recycle ref rowsets for deleted instance
5026
13
            txn->remove(recycle_rowset_key);
5027
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(recycle_rowset_key));
5028
13
        }
5029
13
        if (!non_versioned_rowset_key.empty()) {
5030
0
            txn->remove(non_versioned_rowset_key);
5031
0
            LOG_INFO("remove non versioned rowset key").tag("key", hex(non_versioned_rowset_key));
5032
0
        }
5033
5034
13
        err = txn->commit();
5035
13
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
5036
            // The rowset ref count key has been changed, we need to retry.
5037
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
5038
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5039
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
5040
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5041
0
            continue;
5042
13
        } else if (err != TxnErrorCode::TXN_OK) {
5043
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
5044
0
            return -1;
5045
0
        }
5046
13
        LOG_INFO("recycle rowset meta and data success");
5047
13
        return 0;
5048
13
    }
5049
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
5050
0
            .tag("tablet_id", tablet_id)
5051
0
            .tag("rowset_id", rowset_id)
5052
0
            .tag("retry", MAX_RETRY);
5053
0
    return -1;
5054
813
}
5055
5056
18
int InstanceRecycler::recycle_tmp_rowsets() {
5057
18
    const std::string task_name = "recycle_tmp_rowsets";
5058
18
    int64_t num_scanned = 0;
5059
18
    int64_t num_expired = 0;
5060
18
    std::atomic_long num_recycled = 0;
5061
18
    size_t expired_rowset_size = 0;
5062
18
    size_t total_rowset_key_size = 0;
5063
18
    size_t total_rowset_value_size = 0;
5064
18
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5065
5066
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5067
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5068
18
    std::string tmp_rs_key0;
5069
18
    std::string tmp_rs_key1;
5070
18
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5071
18
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5072
5073
18
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
5074
5075
18
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5076
18
    register_recycle_task(task_name, start_time);
5077
5078
18
    DORIS_CLOUD_DEFER {
5079
18
        unregister_recycle_task(task_name);
5080
18
        int64_t cost =
5081
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5082
18
        metrics_context.finish_report();
5083
18
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5084
18
                .tag("instance_id", instance_id_)
5085
18
                .tag("num_scanned", num_scanned)
5086
18
                .tag("num_expired", num_expired)
5087
18
                .tag("num_recycled", num_recycled)
5088
18
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5089
18
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5090
18
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5091
18
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5078
4
    DORIS_CLOUD_DEFER {
5079
4
        unregister_recycle_task(task_name);
5080
4
        int64_t cost =
5081
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5082
4
        metrics_context.finish_report();
5083
4
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5084
4
                .tag("instance_id", instance_id_)
5085
4
                .tag("num_scanned", num_scanned)
5086
4
                .tag("num_expired", num_expired)
5087
4
                .tag("num_recycled", num_recycled)
5088
4
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5089
4
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5090
4
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5091
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5078
14
    DORIS_CLOUD_DEFER {
5079
14
        unregister_recycle_task(task_name);
5080
14
        int64_t cost =
5081
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5082
14
        metrics_context.finish_report();
5083
14
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5084
14
                .tag("instance_id", instance_id_)
5085
14
                .tag("num_scanned", num_scanned)
5086
14
                .tag("num_expired", num_expired)
5087
14
                .tag("num_recycled", num_recycled)
5088
14
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5089
14
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5090
14
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5091
14
    };
5092
5093
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
5094
5095
18
    std::vector<std::string> tmp_rowset_keys;
5096
18
    std::vector<std::string> tmp_rowset_ref_count_keys;
5097
5098
    // rowset_id -> rowset_meta
5099
    // store tmp_rowset id and meta for statistics rs size when delete
5100
18
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
5101
18
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5102
18
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
5103
18
    worker_pool->start();
5104
5105
18
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5106
5107
18
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
5108
18
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
5109
18
                             &earlest_ts, &tmp_rowset_ref_count_keys, this,
5110
57.0k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5111
57.0k
        ++num_scanned;
5112
57.0k
        total_rowset_key_size += k.size();
5113
57.0k
        total_rowset_value_size += v.size();
5114
57.0k
        doris::RowsetMetaCloudPB rowset;
5115
57.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5116
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5117
0
            return -1;
5118
0
        }
5119
57.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5120
57.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5121
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5122
0
                   << " txn_expiration=" << rowset.txn_expiration()
5123
0
                   << " rowset_creation_time=" << rowset.creation_time();
5124
57.0k
        int64_t current_time = ::time(nullptr);
5125
57.0k
        if (current_time < expiration) { // not expired
5126
0
            return 0;
5127
0
        }
5128
5129
57.0k
        DCHECK_GT(rowset.txn_id(), 0)
5130
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
5131
57.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
5132
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
5133
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
5134
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
5135
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
5136
2.00k
                      << "] txn_id=" << rowset.txn_id()
5137
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
5138
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
5139
2.00k
            return 0;
5140
2.00k
        }
5141
5142
55.0k
        ++num_expired;
5143
55.0k
        expired_rowset_size += v.size();
5144
55.0k
        if (!rowset.has_resource_id()) {
5145
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5146
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5147
0
                return -1;
5148
0
            }
5149
            // might be a delete pred rowset
5150
4.00k
            tmp_rowset_keys.emplace_back(k);
5151
4.00k
            return 0;
5152
4.00k
        }
5153
        // TODO(plat1ko): check rowset not referenced
5154
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5155
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5156
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5157
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5158
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5159
51.0k
                  << " num_expired=" << num_expired
5160
51.0k
                  << " task_type=" << metrics_context.operation_type;
5161
5162
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5163
        // Remove the rowset ref count key directly since it has not been used.
5164
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5165
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5166
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5167
51.0k
                  << "key=" << hex(rowset_ref_count_key);
5168
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5169
5170
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5171
51.0k
        return 0;
5172
55.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5110
6.00k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5111
6.00k
        ++num_scanned;
5112
6.00k
        total_rowset_key_size += k.size();
5113
6.00k
        total_rowset_value_size += v.size();
5114
6.00k
        doris::RowsetMetaCloudPB rowset;
5115
6.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5116
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5117
0
            return -1;
5118
0
        }
5119
6.00k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5120
6.00k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5121
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5122
0
                   << " txn_expiration=" << rowset.txn_expiration()
5123
0
                   << " rowset_creation_time=" << rowset.creation_time();
5124
6.00k
        int64_t current_time = ::time(nullptr);
5125
6.00k
        if (current_time < expiration) { // not expired
5126
0
            return 0;
5127
0
        }
5128
5129
6.00k
        DCHECK_GT(rowset.txn_id(), 0)
5130
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
5131
6.00k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
5132
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
5133
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
5134
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
5135
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
5136
2.00k
                      << "] txn_id=" << rowset.txn_id()
5137
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
5138
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
5139
2.00k
            return 0;
5140
2.00k
        }
5141
5142
4.00k
        ++num_expired;
5143
4.00k
        expired_rowset_size += v.size();
5144
4.00k
        if (!rowset.has_resource_id()) {
5145
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5146
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5147
0
                return -1;
5148
0
            }
5149
            // might be a delete pred rowset
5150
4.00k
            tmp_rowset_keys.emplace_back(k);
5151
4.00k
            return 0;
5152
4.00k
        }
5153
        // TODO(plat1ko): check rowset not referenced
5154
0
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5155
0
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5156
0
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5157
0
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5158
0
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5159
0
                  << " num_expired=" << num_expired
5160
0
                  << " task_type=" << metrics_context.operation_type;
5161
5162
0
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5163
        // Remove the rowset ref count key directly since it has not been used.
5164
0
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5165
0
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5166
0
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5167
0
                  << "key=" << hex(rowset_ref_count_key);
5168
0
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5169
5170
0
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5171
0
        return 0;
5172
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5110
51.0k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5111
51.0k
        ++num_scanned;
5112
51.0k
        total_rowset_key_size += k.size();
5113
51.0k
        total_rowset_value_size += v.size();
5114
51.0k
        doris::RowsetMetaCloudPB rowset;
5115
51.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5116
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5117
0
            return -1;
5118
0
        }
5119
51.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5120
51.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5121
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5122
0
                   << " txn_expiration=" << rowset.txn_expiration()
5123
0
                   << " rowset_creation_time=" << rowset.creation_time();
5124
51.0k
        int64_t current_time = ::time(nullptr);
5125
51.0k
        if (current_time < expiration) { // not expired
5126
0
            return 0;
5127
0
        }
5128
5129
51.0k
        DCHECK_GT(rowset.txn_id(), 0)
5130
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
5131
51.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
5132
0
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
5133
0
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
5134
0
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
5135
0
                      << rowset.start_version() << '-' << rowset.end_version()
5136
0
                      << "] txn_id=" << rowset.txn_id()
5137
0
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
5138
0
                      << " txn_expiration=" << rowset.txn_expiration();
5139
0
            return 0;
5140
0
        }
5141
5142
51.0k
        ++num_expired;
5143
51.0k
        expired_rowset_size += v.size();
5144
51.0k
        if (!rowset.has_resource_id()) {
5145
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5146
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5147
0
                return -1;
5148
0
            }
5149
            // might be a delete pred rowset
5150
0
            tmp_rowset_keys.emplace_back(k);
5151
0
            return 0;
5152
0
        }
5153
        // TODO(plat1ko): check rowset not referenced
5154
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5155
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5156
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5157
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5158
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5159
51.0k
                  << " num_expired=" << num_expired
5160
51.0k
                  << " task_type=" << metrics_context.operation_type;
5161
5162
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5163
        // Remove the rowset ref count key directly since it has not been used.
5164
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5165
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5166
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5167
51.0k
                  << "key=" << hex(rowset_ref_count_key);
5168
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5169
5170
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5171
51.0k
        return 0;
5172
51.0k
    };
5173
5174
    // TODO bacth delete
5175
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5176
51.0k
        std::string dbm_start_key =
5177
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5178
51.0k
        std::string dbm_end_key = dbm_start_key;
5179
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5180
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5181
51.0k
        if (ret != 0) {
5182
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5183
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5184
0
                         << ", rowset_id=" << rowset_id;
5185
0
        }
5186
51.0k
        return ret;
5187
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5175
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5176
51.0k
        std::string dbm_start_key =
5177
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5178
51.0k
        std::string dbm_end_key = dbm_start_key;
5179
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5180
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5181
51.0k
        if (ret != 0) {
5182
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5183
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5184
0
                         << ", rowset_id=" << rowset_id;
5185
0
        }
5186
51.0k
        return ret;
5187
51.0k
    };
5188
5189
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5190
51.0k
        auto delete_bitmap_start =
5191
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5192
51.0k
        auto delete_bitmap_end =
5193
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5194
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5195
51.0k
        if (ret != 0) {
5196
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5197
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5198
0
        }
5199
51.0k
        return ret;
5200
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5189
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5190
51.0k
        auto delete_bitmap_start =
5191
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5192
51.0k
        auto delete_bitmap_end =
5193
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5194
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5195
51.0k
        if (ret != 0) {
5196
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5197
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5198
0
        }
5199
51.0k
        return ret;
5200
51.0k
    };
5201
5202
18
    auto loop_done = [&]() -> int {
5203
10
        DORIS_CLOUD_DEFER {
5204
10
            tmp_rowset_keys.clear();
5205
10
            tmp_rowsets.clear();
5206
10
            tmp_rowset_ref_count_keys.clear();
5207
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5203
3
        DORIS_CLOUD_DEFER {
5204
3
            tmp_rowset_keys.clear();
5205
3
            tmp_rowsets.clear();
5206
3
            tmp_rowset_ref_count_keys.clear();
5207
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5203
7
        DORIS_CLOUD_DEFER {
5204
7
            tmp_rowset_keys.clear();
5205
7
            tmp_rowsets.clear();
5206
7
            tmp_rowset_ref_count_keys.clear();
5207
7
        };
5208
10
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5209
10
                             tmp_rowsets_to_delete = tmp_rowsets,
5210
10
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5211
10
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5212
10
                                   metrics_context) != 0) {
5213
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5214
0
                return;
5215
0
            }
5216
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5217
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5218
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5219
0
                                 << rs.ShortDebugString();
5220
0
                    return;
5221
0
                }
5222
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5223
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5224
0
                                 << rs.ShortDebugString();
5225
0
                    return;
5226
0
                }
5227
51.0k
            }
5228
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5229
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5230
0
                return;
5231
0
            }
5232
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5233
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5234
0
                return;
5235
0
            }
5236
10
            num_recycled += tmp_rowset_keys.size();
5237
10
            return;
5238
10
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5210
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5211
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5212
3
                                   metrics_context) != 0) {
5213
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5214
0
                return;
5215
0
            }
5216
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5217
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5218
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5219
0
                                 << rs.ShortDebugString();
5220
0
                    return;
5221
0
                }
5222
0
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5223
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5224
0
                                 << rs.ShortDebugString();
5225
0
                    return;
5226
0
                }
5227
0
            }
5228
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5229
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5230
0
                return;
5231
0
            }
5232
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5233
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5234
0
                return;
5235
0
            }
5236
3
            num_recycled += tmp_rowset_keys.size();
5237
3
            return;
5238
3
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5210
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5211
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5212
7
                                   metrics_context) != 0) {
5213
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5214
0
                return;
5215
0
            }
5216
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5217
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5218
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5219
0
                                 << rs.ShortDebugString();
5220
0
                    return;
5221
0
                }
5222
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5223
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5224
0
                                 << rs.ShortDebugString();
5225
0
                    return;
5226
0
                }
5227
51.0k
            }
5228
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5229
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5230
0
                return;
5231
0
            }
5232
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5233
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5234
0
                return;
5235
0
            }
5236
7
            num_recycled += tmp_rowset_keys.size();
5237
7
            return;
5238
7
        });
5239
10
        return 0;
5240
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5202
3
    auto loop_done = [&]() -> int {
5203
3
        DORIS_CLOUD_DEFER {
5204
3
            tmp_rowset_keys.clear();
5205
3
            tmp_rowsets.clear();
5206
3
            tmp_rowset_ref_count_keys.clear();
5207
3
        };
5208
3
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5209
3
                             tmp_rowsets_to_delete = tmp_rowsets,
5210
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5211
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5212
3
                                   metrics_context) != 0) {
5213
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5214
3
                return;
5215
3
            }
5216
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5217
3
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5218
3
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5219
3
                                 << rs.ShortDebugString();
5220
3
                    return;
5221
3
                }
5222
3
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5223
3
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5224
3
                                 << rs.ShortDebugString();
5225
3
                    return;
5226
3
                }
5227
3
            }
5228
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5229
3
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5230
3
                return;
5231
3
            }
5232
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5233
3
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5234
3
                return;
5235
3
            }
5236
3
            num_recycled += tmp_rowset_keys.size();
5237
3
            return;
5238
3
        });
5239
3
        return 0;
5240
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5202
7
    auto loop_done = [&]() -> int {
5203
7
        DORIS_CLOUD_DEFER {
5204
7
            tmp_rowset_keys.clear();
5205
7
            tmp_rowsets.clear();
5206
7
            tmp_rowset_ref_count_keys.clear();
5207
7
        };
5208
7
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5209
7
                             tmp_rowsets_to_delete = tmp_rowsets,
5210
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5211
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5212
7
                                   metrics_context) != 0) {
5213
7
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5214
7
                return;
5215
7
            }
5216
7
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5217
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5218
7
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5219
7
                                 << rs.ShortDebugString();
5220
7
                    return;
5221
7
                }
5222
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5223
7
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5224
7
                                 << rs.ShortDebugString();
5225
7
                    return;
5226
7
                }
5227
7
            }
5228
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5229
7
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5230
7
                return;
5231
7
            }
5232
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5233
7
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5234
7
                return;
5235
7
            }
5236
7
            num_recycled += tmp_rowset_keys.size();
5237
7
            return;
5238
7
        });
5239
7
        return 0;
5240
7
    };
5241
5242
18
    if (config::enable_recycler_stats_metrics) {
5243
0
        scan_and_statistics_tmp_rowsets();
5244
0
    }
5245
    // recycle_func and loop_done for scan and recycle
5246
18
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
5247
18
                               std::move(loop_done));
5248
5249
18
    worker_pool->stop();
5250
18
    return ret;
5251
18
}
5252
5253
int InstanceRecycler::scan_and_recycle(
5254
        std::string begin, std::string_view end,
5255
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
5256
236
        std::function<int()> loop_done) {
5257
236
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
5258
236
    int ret = 0;
5259
236
    int64_t cnt = 0;
5260
236
    int get_range_retried = 0;
5261
236
    std::string err;
5262
236
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5263
236
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5264
236
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5265
236
                  << " ret=" << ret << " err=" << err;
5266
236
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5262
19
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5263
19
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5264
19
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5265
19
                  << " ret=" << ret << " err=" << err;
5266
19
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5262
217
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5263
217
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5264
217
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5265
217
                  << " ret=" << ret << " err=" << err;
5266
217
    };
5267
5268
236
    std::unique_ptr<RangeGetIterator> it;
5269
264
    do {
5270
264
        if (get_range_retried > 1000) {
5271
0
            err = "txn_get exceeds max retry, may not scan all keys";
5272
0
            ret = -1;
5273
0
            return -1;
5274
0
        }
5275
264
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
5276
264
        if (get_ret != 0) { // txn kv may complain "Request for future version"
5277
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
5278
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
5279
0
                         << " get_range_retried=" << get_range_retried;
5280
0
            ++get_range_retried;
5281
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5282
0
            continue; // try again
5283
0
        }
5284
264
        if (!it->has_next()) {
5285
133
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
5286
133
            break; // scan finished
5287
133
        }
5288
100k
        while (it->has_next()) {
5289
100k
            ++cnt;
5290
            // recycle corresponding resources
5291
100k
            auto [k, v] = it->next();
5292
100k
            if (!it->has_next()) {
5293
131
                begin = k;
5294
131
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
5295
131
            }
5296
            // if we want to continue scanning, the recycle_func should not return non-zero
5297
100k
            if (recycle_func(k, v) != 0) {
5298
4.00k
                err = "recycle_func error";
5299
4.00k
                ret = -1;
5300
4.00k
            }
5301
100k
        }
5302
131
        begin.push_back('\x00'); // Update to next smallest key for iteration
5303
        // if we want to continue scanning, the recycle_func should not return non-zero
5304
131
        if (loop_done && loop_done() != 0) {
5305
3
            err = "loop_done error";
5306
3
            ret = -1;
5307
3
        }
5308
131
    } while (it->more() && !stopped());
5309
236
    return ret;
5310
236
}
5311
5312
20
int InstanceRecycler::abort_timeout_txn() {
5313
20
    const std::string task_name = "abort_timeout_txn";
5314
20
    int64_t num_scanned = 0;
5315
20
    int64_t num_timeout = 0;
5316
20
    int64_t num_abort = 0;
5317
20
    int64_t num_advance = 0;
5318
20
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5319
5320
20
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
5321
20
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5322
20
    std::string begin_txn_running_key;
5323
20
    std::string end_txn_running_key;
5324
20
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
5325
20
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
5326
5327
20
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
5328
5329
20
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5330
20
    register_recycle_task(task_name, start_time);
5331
5332
20
    DORIS_CLOUD_DEFER {
5333
20
        unregister_recycle_task(task_name);
5334
20
        int64_t cost =
5335
20
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5336
20
        metrics_context.finish_report();
5337
20
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5338
20
                .tag("instance_id", instance_id_)
5339
20
                .tag("num_scanned", num_scanned)
5340
20
                .tag("num_timeout", num_timeout)
5341
20
                .tag("num_abort", num_abort)
5342
20
                .tag("num_advance", num_advance);
5343
20
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5332
4
    DORIS_CLOUD_DEFER {
5333
4
        unregister_recycle_task(task_name);
5334
4
        int64_t cost =
5335
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5336
4
        metrics_context.finish_report();
5337
4
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5338
4
                .tag("instance_id", instance_id_)
5339
4
                .tag("num_scanned", num_scanned)
5340
4
                .tag("num_timeout", num_timeout)
5341
4
                .tag("num_abort", num_abort)
5342
4
                .tag("num_advance", num_advance);
5343
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5332
16
    DORIS_CLOUD_DEFER {
5333
16
        unregister_recycle_task(task_name);
5334
16
        int64_t cost =
5335
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5336
16
        metrics_context.finish_report();
5337
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5338
16
                .tag("instance_id", instance_id_)
5339
16
                .tag("num_scanned", num_scanned)
5340
16
                .tag("num_timeout", num_timeout)
5341
16
                .tag("num_abort", num_abort)
5342
16
                .tag("num_advance", num_advance);
5343
16
    };
5344
5345
20
    int64_t current_time =
5346
20
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5347
5348
20
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
5349
20
                                  &current_time, &metrics_context,
5350
20
                                  this](std::string_view k, std::string_view v) -> int {
5351
10
        ++num_scanned;
5352
5353
10
        std::unique_ptr<Transaction> txn;
5354
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5355
10
        if (err != TxnErrorCode::TXN_OK) {
5356
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5357
0
            return -1;
5358
0
        }
5359
10
        std::string_view k1 = k;
5360
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5361
10
        k1.remove_prefix(1); // Remove key space
5362
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5363
10
        if (decode_key(&k1, &out) != 0) {
5364
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5365
0
            return -1;
5366
0
        }
5367
10
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5368
10
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5369
10
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5370
        // Update txn_info
5371
10
        std::string txn_inf_key, txn_inf_val;
5372
10
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5373
10
        err = txn->get(txn_inf_key, &txn_inf_val);
5374
10
        if (err != TxnErrorCode::TXN_OK) {
5375
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5376
0
            return -1;
5377
0
        }
5378
10
        TxnInfoPB txn_info;
5379
10
        if (!txn_info.ParseFromString(txn_inf_val)) {
5380
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5381
0
            return -1;
5382
0
        }
5383
5384
10
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5385
4
            txn.reset();
5386
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5387
4
            std::shared_ptr<TxnLazyCommitTask> task =
5388
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5389
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5390
4
            if (ret.first != MetaServiceCode::OK) {
5391
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5392
0
                             << "msg=" << ret.second;
5393
0
                return -1;
5394
0
            }
5395
4
            ++num_advance;
5396
4
            return 0;
5397
6
        } else {
5398
6
            TxnRunningPB txn_running_pb;
5399
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5400
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5401
0
                return -1;
5402
0
            }
5403
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5404
4
                return 0;
5405
4
            }
5406
2
            ++num_timeout;
5407
5408
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5409
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5410
2
            txn_info.set_finish_time(current_time);
5411
2
            txn_info.set_reason("timeout");
5412
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5413
2
            txn_inf_val.clear();
5414
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5415
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5416
0
                return -1;
5417
0
            }
5418
2
            txn->put(txn_inf_key, txn_inf_val);
5419
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5420
            // Put recycle txn key
5421
2
            std::string recyc_txn_key, recyc_txn_val;
5422
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5423
2
            RecycleTxnPB recycle_txn_pb;
5424
2
            recycle_txn_pb.set_creation_time(current_time);
5425
2
            recycle_txn_pb.set_label(txn_info.label());
5426
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5427
0
                LOG_WARNING("failed to serialize txn recycle info")
5428
0
                        .tag("key", hex(k))
5429
0
                        .tag("db_id", db_id)
5430
0
                        .tag("txn_id", txn_id);
5431
0
                return -1;
5432
0
            }
5433
2
            txn->put(recyc_txn_key, recyc_txn_val);
5434
            // Remove txn running key
5435
2
            txn->remove(k);
5436
2
            err = txn->commit();
5437
2
            if (err != TxnErrorCode::TXN_OK) {
5438
0
                LOG_WARNING("failed to commit txn err={}", err)
5439
0
                        .tag("key", hex(k))
5440
0
                        .tag("db_id", db_id)
5441
0
                        .tag("txn_id", txn_id);
5442
0
                return -1;
5443
0
            }
5444
2
            metrics_context.total_recycled_num = ++num_abort;
5445
2
            metrics_context.report();
5446
2
        }
5447
5448
2
        return 0;
5449
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5350
4
                                  this](std::string_view k, std::string_view v) -> int {
5351
4
        ++num_scanned;
5352
5353
4
        std::unique_ptr<Transaction> txn;
5354
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5355
4
        if (err != TxnErrorCode::TXN_OK) {
5356
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5357
0
            return -1;
5358
0
        }
5359
4
        std::string_view k1 = k;
5360
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5361
4
        k1.remove_prefix(1); // Remove key space
5362
4
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5363
4
        if (decode_key(&k1, &out) != 0) {
5364
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5365
0
            return -1;
5366
0
        }
5367
4
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5368
4
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5369
4
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5370
        // Update txn_info
5371
4
        std::string txn_inf_key, txn_inf_val;
5372
4
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5373
4
        err = txn->get(txn_inf_key, &txn_inf_val);
5374
4
        if (err != TxnErrorCode::TXN_OK) {
5375
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5376
0
            return -1;
5377
0
        }
5378
4
        TxnInfoPB txn_info;
5379
4
        if (!txn_info.ParseFromString(txn_inf_val)) {
5380
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5381
0
            return -1;
5382
0
        }
5383
5384
4
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5385
4
            txn.reset();
5386
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5387
4
            std::shared_ptr<TxnLazyCommitTask> task =
5388
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5389
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5390
4
            if (ret.first != MetaServiceCode::OK) {
5391
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5392
0
                             << "msg=" << ret.second;
5393
0
                return -1;
5394
0
            }
5395
4
            ++num_advance;
5396
4
            return 0;
5397
4
        } else {
5398
0
            TxnRunningPB txn_running_pb;
5399
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5400
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5401
0
                return -1;
5402
0
            }
5403
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5404
0
                return 0;
5405
0
            }
5406
0
            ++num_timeout;
5407
5408
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5409
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5410
0
            txn_info.set_finish_time(current_time);
5411
0
            txn_info.set_reason("timeout");
5412
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5413
0
            txn_inf_val.clear();
5414
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5415
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5416
0
                return -1;
5417
0
            }
5418
0
            txn->put(txn_inf_key, txn_inf_val);
5419
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5420
            // Put recycle txn key
5421
0
            std::string recyc_txn_key, recyc_txn_val;
5422
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5423
0
            RecycleTxnPB recycle_txn_pb;
5424
0
            recycle_txn_pb.set_creation_time(current_time);
5425
0
            recycle_txn_pb.set_label(txn_info.label());
5426
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5427
0
                LOG_WARNING("failed to serialize txn recycle info")
5428
0
                        .tag("key", hex(k))
5429
0
                        .tag("db_id", db_id)
5430
0
                        .tag("txn_id", txn_id);
5431
0
                return -1;
5432
0
            }
5433
0
            txn->put(recyc_txn_key, recyc_txn_val);
5434
            // Remove txn running key
5435
0
            txn->remove(k);
5436
0
            err = txn->commit();
5437
0
            if (err != TxnErrorCode::TXN_OK) {
5438
0
                LOG_WARNING("failed to commit txn err={}", err)
5439
0
                        .tag("key", hex(k))
5440
0
                        .tag("db_id", db_id)
5441
0
                        .tag("txn_id", txn_id);
5442
0
                return -1;
5443
0
            }
5444
0
            metrics_context.total_recycled_num = ++num_abort;
5445
0
            metrics_context.report();
5446
0
        }
5447
5448
0
        return 0;
5449
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5350
6
                                  this](std::string_view k, std::string_view v) -> int {
5351
6
        ++num_scanned;
5352
5353
6
        std::unique_ptr<Transaction> txn;
5354
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5355
6
        if (err != TxnErrorCode::TXN_OK) {
5356
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5357
0
            return -1;
5358
0
        }
5359
6
        std::string_view k1 = k;
5360
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5361
6
        k1.remove_prefix(1); // Remove key space
5362
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5363
6
        if (decode_key(&k1, &out) != 0) {
5364
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5365
0
            return -1;
5366
0
        }
5367
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5368
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5369
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5370
        // Update txn_info
5371
6
        std::string txn_inf_key, txn_inf_val;
5372
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5373
6
        err = txn->get(txn_inf_key, &txn_inf_val);
5374
6
        if (err != TxnErrorCode::TXN_OK) {
5375
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5376
0
            return -1;
5377
0
        }
5378
6
        TxnInfoPB txn_info;
5379
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
5380
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5381
0
            return -1;
5382
0
        }
5383
5384
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5385
0
            txn.reset();
5386
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5387
0
            std::shared_ptr<TxnLazyCommitTask> task =
5388
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5389
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5390
0
            if (ret.first != MetaServiceCode::OK) {
5391
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5392
0
                             << "msg=" << ret.second;
5393
0
                return -1;
5394
0
            }
5395
0
            ++num_advance;
5396
0
            return 0;
5397
6
        } else {
5398
6
            TxnRunningPB txn_running_pb;
5399
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5400
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5401
0
                return -1;
5402
0
            }
5403
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5404
4
                return 0;
5405
4
            }
5406
2
            ++num_timeout;
5407
5408
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5409
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5410
2
            txn_info.set_finish_time(current_time);
5411
2
            txn_info.set_reason("timeout");
5412
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5413
2
            txn_inf_val.clear();
5414
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5415
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5416
0
                return -1;
5417
0
            }
5418
2
            txn->put(txn_inf_key, txn_inf_val);
5419
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5420
            // Put recycle txn key
5421
2
            std::string recyc_txn_key, recyc_txn_val;
5422
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5423
2
            RecycleTxnPB recycle_txn_pb;
5424
2
            recycle_txn_pb.set_creation_time(current_time);
5425
2
            recycle_txn_pb.set_label(txn_info.label());
5426
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5427
0
                LOG_WARNING("failed to serialize txn recycle info")
5428
0
                        .tag("key", hex(k))
5429
0
                        .tag("db_id", db_id)
5430
0
                        .tag("txn_id", txn_id);
5431
0
                return -1;
5432
0
            }
5433
2
            txn->put(recyc_txn_key, recyc_txn_val);
5434
            // Remove txn running key
5435
2
            txn->remove(k);
5436
2
            err = txn->commit();
5437
2
            if (err != TxnErrorCode::TXN_OK) {
5438
0
                LOG_WARNING("failed to commit txn err={}", err)
5439
0
                        .tag("key", hex(k))
5440
0
                        .tag("db_id", db_id)
5441
0
                        .tag("txn_id", txn_id);
5442
0
                return -1;
5443
0
            }
5444
2
            metrics_context.total_recycled_num = ++num_abort;
5445
2
            metrics_context.report();
5446
2
        }
5447
5448
2
        return 0;
5449
6
    };
5450
5451
20
    if (config::enable_recycler_stats_metrics) {
5452
0
        scan_and_statistics_abort_timeout_txn();
5453
0
    }
5454
    // recycle_func and loop_done for scan and recycle
5455
20
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
5456
20
                            std::move(handle_txn_running_kv));
5457
20
}
5458
5459
21
int InstanceRecycler::recycle_expired_txn_label() {
5460
21
    const std::string task_name = "recycle_expired_txn_label";
5461
21
    int64_t num_scanned = 0;
5462
21
    int64_t num_expired = 0;
5463
21
    int64_t num_recycled = 0;
5464
21
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5465
21
    int ret = 0;
5466
5467
21
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
5468
21
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5469
21
    std::string begin_recycle_txn_key;
5470
21
    std::string end_recycle_txn_key;
5471
21
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
5472
21
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
5473
21
    std::vector<std::string> recycle_txn_info_keys;
5474
5475
21
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
5476
5477
21
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5478
21
    register_recycle_task(task_name, start_time);
5479
21
    DORIS_CLOUD_DEFER {
5480
21
        unregister_recycle_task(task_name);
5481
21
        int64_t cost =
5482
21
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5483
21
        metrics_context.finish_report();
5484
21
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5485
21
                .tag("instance_id", instance_id_)
5486
21
                .tag("num_scanned", num_scanned)
5487
21
                .tag("num_expired", num_expired)
5488
21
                .tag("num_recycled", num_recycled);
5489
21
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5479
3
    DORIS_CLOUD_DEFER {
5480
3
        unregister_recycle_task(task_name);
5481
3
        int64_t cost =
5482
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5483
3
        metrics_context.finish_report();
5484
3
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5485
3
                .tag("instance_id", instance_id_)
5486
3
                .tag("num_scanned", num_scanned)
5487
3
                .tag("num_expired", num_expired)
5488
3
                .tag("num_recycled", num_recycled);
5489
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5479
18
    DORIS_CLOUD_DEFER {
5480
18
        unregister_recycle_task(task_name);
5481
18
        int64_t cost =
5482
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5483
18
        metrics_context.finish_report();
5484
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5485
18
                .tag("instance_id", instance_id_)
5486
18
                .tag("num_scanned", num_scanned)
5487
18
                .tag("num_expired", num_expired)
5488
18
                .tag("num_recycled", num_recycled);
5489
18
    };
5490
5491
21
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5492
5493
21
    SyncExecutor<int> concurrent_delete_executor(
5494
21
            _thread_pool_group.s3_producer_pool,
5495
21
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
5496
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5496
3
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5496
23.0k
            [](const int& ret) { return ret != 0; });
5497
5498
21
    int64_t current_time_ms =
5499
21
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5500
5501
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5502
30.0k
        ++num_scanned;
5503
30.0k
        RecycleTxnPB recycle_txn_pb;
5504
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5505
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5506
0
            return -1;
5507
0
        }
5508
30.0k
        if ((config::force_immediate_recycle) ||
5509
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5510
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5511
30.0k
             current_time_ms)) {
5512
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5513
23.0k
            num_expired++;
5514
23.0k
            recycle_txn_info_keys.emplace_back(k);
5515
23.0k
        }
5516
30.0k
        return 0;
5517
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5501
3
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5502
3
        ++num_scanned;
5503
3
        RecycleTxnPB recycle_txn_pb;
5504
3
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5505
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5506
0
            return -1;
5507
0
        }
5508
3
        if ((config::force_immediate_recycle) ||
5509
3
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5510
3
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5511
3
             current_time_ms)) {
5512
3
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5513
3
            num_expired++;
5514
3
            recycle_txn_info_keys.emplace_back(k);
5515
3
        }
5516
3
        return 0;
5517
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5501
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5502
30.0k
        ++num_scanned;
5503
30.0k
        RecycleTxnPB recycle_txn_pb;
5504
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5505
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5506
0
            return -1;
5507
0
        }
5508
30.0k
        if ((config::force_immediate_recycle) ||
5509
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5510
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5511
30.0k
             current_time_ms)) {
5512
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5513
23.0k
            num_expired++;
5514
23.0k
            recycle_txn_info_keys.emplace_back(k);
5515
23.0k
        }
5516
30.0k
        return 0;
5517
30.0k
    };
5518
5519
    // int 0 for success, 1 for conflict, -1 for error
5520
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5521
23.0k
        std::string_view k1 = k;
5522
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5523
23.0k
        k1.remove_prefix(1); // Remove key space
5524
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5525
23.0k
        int ret = decode_key(&k1, &out);
5526
23.0k
        if (ret != 0) {
5527
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5528
0
            return -1;
5529
0
        }
5530
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5531
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5532
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5533
23.0k
        std::unique_ptr<Transaction> txn;
5534
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5535
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5536
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5537
0
            return -1;
5538
0
        }
5539
        // Remove txn index kv
5540
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
5541
23.0k
        txn->remove(index_key);
5542
        // Remove txn info kv
5543
23.0k
        std::string info_key, info_val;
5544
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5545
23.0k
        err = txn->get(info_key, &info_val);
5546
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5547
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5548
0
            return -1;
5549
0
        }
5550
23.0k
        TxnInfoPB txn_info;
5551
23.0k
        if (!txn_info.ParseFromString(info_val)) {
5552
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5553
0
            return -1;
5554
0
        }
5555
23.0k
        txn->remove(info_key);
5556
        // Remove sub txn index kvs
5557
23.0k
        std::vector<std::string> sub_txn_index_keys;
5558
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5559
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5560
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
5561
22.9k
        }
5562
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5563
22.9k
            txn->remove(sub_txn_index_key);
5564
22.9k
        }
5565
        // Update txn label
5566
23.0k
        std::string label_key, label_val;
5567
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5568
23.0k
        err = txn->get(label_key, &label_val);
5569
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5570
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5571
0
                         << " err=" << err;
5572
0
            return -1;
5573
0
        }
5574
23.0k
        TxnLabelPB txn_label;
5575
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5576
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5577
0
            return -1;
5578
0
        }
5579
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5580
23.0k
        if (it != txn_label.txn_ids().end()) {
5581
23.0k
            txn_label.mutable_txn_ids()->erase(it);
5582
23.0k
        }
5583
23.0k
        if (txn_label.txn_ids().empty()) {
5584
23.0k
            txn->remove(label_key);
5585
23.0k
            TEST_SYNC_POINT_CALLBACK(
5586
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5587
23.0k
        } else {
5588
73
            if (!txn_label.SerializeToString(&label_val)) {
5589
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5590
0
                return -1;
5591
0
            }
5592
73
            TEST_SYNC_POINT_CALLBACK(
5593
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5594
73
            txn->atomic_set_ver_value(label_key, label_val);
5595
73
            TEST_SYNC_POINT_CALLBACK(
5596
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5597
73
        }
5598
        // Remove recycle txn kv
5599
23.0k
        txn->remove(k);
5600
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5601
23.0k
        err = txn->commit();
5602
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5603
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
5604
62
                TEST_SYNC_POINT_CALLBACK(
5605
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5606
                // log the txn_id and label
5607
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5608
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5609
62
                             << " txn_label=" << txn_info.label();
5610
62
                return 1;
5611
62
            }
5612
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5613
0
            return -1;
5614
62
        }
5615
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
5616
23.0k
        metrics_context.report();
5617
5618
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5619
23.0k
        return 0;
5620
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5520
3
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5521
3
        std::string_view k1 = k;
5522
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5523
3
        k1.remove_prefix(1); // Remove key space
5524
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5525
3
        int ret = decode_key(&k1, &out);
5526
3
        if (ret != 0) {
5527
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5528
0
            return -1;
5529
0
        }
5530
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5531
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5532
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5533
3
        std::unique_ptr<Transaction> txn;
5534
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5535
3
        if (err != TxnErrorCode::TXN_OK) {
5536
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5537
0
            return -1;
5538
0
        }
5539
        // Remove txn index kv
5540
3
        auto index_key = txn_index_key({instance_id_, txn_id});
5541
3
        txn->remove(index_key);
5542
        // Remove txn info kv
5543
3
        std::string info_key, info_val;
5544
3
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5545
3
        err = txn->get(info_key, &info_val);
5546
3
        if (err != TxnErrorCode::TXN_OK) {
5547
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5548
0
            return -1;
5549
0
        }
5550
3
        TxnInfoPB txn_info;
5551
3
        if (!txn_info.ParseFromString(info_val)) {
5552
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5553
0
            return -1;
5554
0
        }
5555
3
        txn->remove(info_key);
5556
        // Remove sub txn index kvs
5557
3
        std::vector<std::string> sub_txn_index_keys;
5558
3
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5559
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5560
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
5561
0
        }
5562
3
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5563
0
            txn->remove(sub_txn_index_key);
5564
0
        }
5565
        // Update txn label
5566
3
        std::string label_key, label_val;
5567
3
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5568
3
        err = txn->get(label_key, &label_val);
5569
3
        if (err != TxnErrorCode::TXN_OK) {
5570
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5571
0
                         << " err=" << err;
5572
0
            return -1;
5573
0
        }
5574
3
        TxnLabelPB txn_label;
5575
3
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5576
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5577
0
            return -1;
5578
0
        }
5579
3
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5580
3
        if (it != txn_label.txn_ids().end()) {
5581
3
            txn_label.mutable_txn_ids()->erase(it);
5582
3
        }
5583
3
        if (txn_label.txn_ids().empty()) {
5584
3
            txn->remove(label_key);
5585
3
            TEST_SYNC_POINT_CALLBACK(
5586
3
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5587
3
        } else {
5588
0
            if (!txn_label.SerializeToString(&label_val)) {
5589
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5590
0
                return -1;
5591
0
            }
5592
0
            TEST_SYNC_POINT_CALLBACK(
5593
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5594
0
            txn->atomic_set_ver_value(label_key, label_val);
5595
0
            TEST_SYNC_POINT_CALLBACK(
5596
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5597
0
        }
5598
        // Remove recycle txn kv
5599
3
        txn->remove(k);
5600
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5601
3
        err = txn->commit();
5602
3
        if (err != TxnErrorCode::TXN_OK) {
5603
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
5604
0
                TEST_SYNC_POINT_CALLBACK(
5605
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5606
                // log the txn_id and label
5607
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5608
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5609
0
                             << " txn_label=" << txn_info.label();
5610
0
                return 1;
5611
0
            }
5612
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5613
0
            return -1;
5614
0
        }
5615
3
        metrics_context.total_recycled_num = ++num_recycled;
5616
3
        metrics_context.report();
5617
5618
3
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5619
3
        return 0;
5620
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5520
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5521
23.0k
        std::string_view k1 = k;
5522
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5523
23.0k
        k1.remove_prefix(1); // Remove key space
5524
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5525
23.0k
        int ret = decode_key(&k1, &out);
5526
23.0k
        if (ret != 0) {
5527
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5528
0
            return -1;
5529
0
        }
5530
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5531
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5532
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5533
23.0k
        std::unique_ptr<Transaction> txn;
5534
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5535
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5536
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5537
0
            return -1;
5538
0
        }
5539
        // Remove txn index kv
5540
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
5541
23.0k
        txn->remove(index_key);
5542
        // Remove txn info kv
5543
23.0k
        std::string info_key, info_val;
5544
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5545
23.0k
        err = txn->get(info_key, &info_val);
5546
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5547
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5548
0
            return -1;
5549
0
        }
5550
23.0k
        TxnInfoPB txn_info;
5551
23.0k
        if (!txn_info.ParseFromString(info_val)) {
5552
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5553
0
            return -1;
5554
0
        }
5555
23.0k
        txn->remove(info_key);
5556
        // Remove sub txn index kvs
5557
23.0k
        std::vector<std::string> sub_txn_index_keys;
5558
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5559
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5560
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
5561
22.9k
        }
5562
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5563
22.9k
            txn->remove(sub_txn_index_key);
5564
22.9k
        }
5565
        // Update txn label
5566
23.0k
        std::string label_key, label_val;
5567
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5568
23.0k
        err = txn->get(label_key, &label_val);
5569
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5570
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5571
0
                         << " err=" << err;
5572
0
            return -1;
5573
0
        }
5574
23.0k
        TxnLabelPB txn_label;
5575
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5576
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5577
0
            return -1;
5578
0
        }
5579
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5580
23.0k
        if (it != txn_label.txn_ids().end()) {
5581
23.0k
            txn_label.mutable_txn_ids()->erase(it);
5582
23.0k
        }
5583
23.0k
        if (txn_label.txn_ids().empty()) {
5584
23.0k
            txn->remove(label_key);
5585
23.0k
            TEST_SYNC_POINT_CALLBACK(
5586
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5587
23.0k
        } else {
5588
73
            if (!txn_label.SerializeToString(&label_val)) {
5589
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5590
0
                return -1;
5591
0
            }
5592
73
            TEST_SYNC_POINT_CALLBACK(
5593
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5594
73
            txn->atomic_set_ver_value(label_key, label_val);
5595
73
            TEST_SYNC_POINT_CALLBACK(
5596
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5597
73
        }
5598
        // Remove recycle txn kv
5599
23.0k
        txn->remove(k);
5600
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5601
23.0k
        err = txn->commit();
5602
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5603
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
5604
62
                TEST_SYNC_POINT_CALLBACK(
5605
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5606
                // log the txn_id and label
5607
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5608
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5609
62
                             << " txn_label=" << txn_info.label();
5610
62
                return 1;
5611
62
            }
5612
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5613
0
            return -1;
5614
62
        }
5615
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
5616
23.0k
        metrics_context.report();
5617
5618
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5619
23.0k
        return 0;
5620
23.0k
    };
5621
5622
21
    auto loop_done = [&]() -> int {
5623
12
        DORIS_CLOUD_DEFER {
5624
12
            recycle_txn_info_keys.clear();
5625
12
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5623
3
        DORIS_CLOUD_DEFER {
5624
3
            recycle_txn_info_keys.clear();
5625
3
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5623
9
        DORIS_CLOUD_DEFER {
5624
9
            recycle_txn_info_keys.clear();
5625
9
        };
5626
12
        TEST_SYNC_POINT_CALLBACK(
5627
12
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5628
12
                &recycle_txn_info_keys);
5629
23.0k
        for (const auto& k : recycle_txn_info_keys) {
5630
23.0k
            concurrent_delete_executor.add([&]() {
5631
23.0k
                int ret = delete_recycle_txn_kv(k);
5632
23.0k
                if (ret == 1) {
5633
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5634
54
                    for (int i = 1; i <= max_retry; ++i) {
5635
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5636
54
                        ret = delete_recycle_txn_kv(k);
5637
                        // clang-format off
5638
54
                        TEST_SYNC_POINT_CALLBACK(
5639
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5640
                        // clang-format off
5641
54
                        if (ret != 1) {
5642
18
                            break;
5643
18
                        }
5644
                        // random sleep 0-100 ms to retry
5645
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5646
36
                    }
5647
18
                }
5648
23.0k
                if (ret != 0) {
5649
9
                    LOG_WARNING("failed to delete recycle txn kv")
5650
9
                            .tag("instance id", instance_id_)
5651
9
                            .tag("key", hex(k));
5652
9
                    return -1;
5653
9
                }
5654
23.0k
                return 0;
5655
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5630
3
            concurrent_delete_executor.add([&]() {
5631
3
                int ret = delete_recycle_txn_kv(k);
5632
3
                if (ret == 1) {
5633
0
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5634
0
                    for (int i = 1; i <= max_retry; ++i) {
5635
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5636
0
                        ret = delete_recycle_txn_kv(k);
5637
                        // clang-format off
5638
0
                        TEST_SYNC_POINT_CALLBACK(
5639
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5640
                        // clang-format off
5641
0
                        if (ret != 1) {
5642
0
                            break;
5643
0
                        }
5644
                        // random sleep 0-100 ms to retry
5645
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5646
0
                    }
5647
0
                }
5648
3
                if (ret != 0) {
5649
0
                    LOG_WARNING("failed to delete recycle txn kv")
5650
0
                            .tag("instance id", instance_id_)
5651
0
                            .tag("key", hex(k));
5652
0
                    return -1;
5653
0
                }
5654
3
                return 0;
5655
3
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5630
23.0k
            concurrent_delete_executor.add([&]() {
5631
23.0k
                int ret = delete_recycle_txn_kv(k);
5632
23.0k
                if (ret == 1) {
5633
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5634
54
                    for (int i = 1; i <= max_retry; ++i) {
5635
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5636
54
                        ret = delete_recycle_txn_kv(k);
5637
                        // clang-format off
5638
54
                        TEST_SYNC_POINT_CALLBACK(
5639
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5640
                        // clang-format off
5641
54
                        if (ret != 1) {
5642
18
                            break;
5643
18
                        }
5644
                        // random sleep 0-100 ms to retry
5645
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5646
36
                    }
5647
18
                }
5648
23.0k
                if (ret != 0) {
5649
9
                    LOG_WARNING("failed to delete recycle txn kv")
5650
9
                            .tag("instance id", instance_id_)
5651
9
                            .tag("key", hex(k));
5652
9
                    return -1;
5653
9
                }
5654
23.0k
                return 0;
5655
23.0k
            });
5656
23.0k
        }
5657
12
        bool finished = true;
5658
12
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5659
23.0k
        for (int r : rets) {
5660
23.0k
            if (r != 0) {
5661
9
                ret = -1;
5662
9
            }
5663
23.0k
        }
5664
5665
12
        ret = finished ? ret : -1;
5666
5667
12
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5668
5669
12
        if (ret != 0) {
5670
3
            LOG_WARNING("recycle txn kv ret!=0")
5671
3
                    .tag("finished", finished)
5672
3
                    .tag("ret", ret)
5673
3
                    .tag("instance_id", instance_id_);
5674
3
            return ret;
5675
3
        }
5676
9
        return ret;
5677
12
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
5622
3
    auto loop_done = [&]() -> int {
5623
3
        DORIS_CLOUD_DEFER {
5624
3
            recycle_txn_info_keys.clear();
5625
3
        };
5626
3
        TEST_SYNC_POINT_CALLBACK(
5627
3
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5628
3
                &recycle_txn_info_keys);
5629
3
        for (const auto& k : recycle_txn_info_keys) {
5630
3
            concurrent_delete_executor.add([&]() {
5631
3
                int ret = delete_recycle_txn_kv(k);
5632
3
                if (ret == 1) {
5633
3
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5634
3
                    for (int i = 1; i <= max_retry; ++i) {
5635
3
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5636
3
                        ret = delete_recycle_txn_kv(k);
5637
                        // clang-format off
5638
3
                        TEST_SYNC_POINT_CALLBACK(
5639
3
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5640
                        // clang-format off
5641
3
                        if (ret != 1) {
5642
3
                            break;
5643
3
                        }
5644
                        // random sleep 0-100 ms to retry
5645
3
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5646
3
                    }
5647
3
                }
5648
3
                if (ret != 0) {
5649
3
                    LOG_WARNING("failed to delete recycle txn kv")
5650
3
                            .tag("instance id", instance_id_)
5651
3
                            .tag("key", hex(k));
5652
3
                    return -1;
5653
3
                }
5654
3
                return 0;
5655
3
            });
5656
3
        }
5657
3
        bool finished = true;
5658
3
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5659
3
        for (int r : rets) {
5660
3
            if (r != 0) {
5661
0
                ret = -1;
5662
0
            }
5663
3
        }
5664
5665
3
        ret = finished ? ret : -1;
5666
5667
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5668
5669
3
        if (ret != 0) {
5670
0
            LOG_WARNING("recycle txn kv ret!=0")
5671
0
                    .tag("finished", finished)
5672
0
                    .tag("ret", ret)
5673
0
                    .tag("instance_id", instance_id_);
5674
0
            return ret;
5675
0
        }
5676
3
        return ret;
5677
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
5622
9
    auto loop_done = [&]() -> int {
5623
9
        DORIS_CLOUD_DEFER {
5624
9
            recycle_txn_info_keys.clear();
5625
9
        };
5626
9
        TEST_SYNC_POINT_CALLBACK(
5627
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5628
9
                &recycle_txn_info_keys);
5629
23.0k
        for (const auto& k : recycle_txn_info_keys) {
5630
23.0k
            concurrent_delete_executor.add([&]() {
5631
23.0k
                int ret = delete_recycle_txn_kv(k);
5632
23.0k
                if (ret == 1) {
5633
23.0k
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5634
23.0k
                    for (int i = 1; i <= max_retry; ++i) {
5635
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5636
23.0k
                        ret = delete_recycle_txn_kv(k);
5637
                        // clang-format off
5638
23.0k
                        TEST_SYNC_POINT_CALLBACK(
5639
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5640
                        // clang-format off
5641
23.0k
                        if (ret != 1) {
5642
23.0k
                            break;
5643
23.0k
                        }
5644
                        // random sleep 0-100 ms to retry
5645
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5646
23.0k
                    }
5647
23.0k
                }
5648
23.0k
                if (ret != 0) {
5649
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
5650
23.0k
                            .tag("instance id", instance_id_)
5651
23.0k
                            .tag("key", hex(k));
5652
23.0k
                    return -1;
5653
23.0k
                }
5654
23.0k
                return 0;
5655
23.0k
            });
5656
23.0k
        }
5657
9
        bool finished = true;
5658
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5659
23.0k
        for (int r : rets) {
5660
23.0k
            if (r != 0) {
5661
9
                ret = -1;
5662
9
            }
5663
23.0k
        }
5664
5665
9
        ret = finished ? ret : -1;
5666
5667
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5668
5669
9
        if (ret != 0) {
5670
3
            LOG_WARNING("recycle txn kv ret!=0")
5671
3
                    .tag("finished", finished)
5672
3
                    .tag("ret", ret)
5673
3
                    .tag("instance_id", instance_id_);
5674
3
            return ret;
5675
3
        }
5676
6
        return ret;
5677
9
    };
5678
5679
21
    if (config::enable_recycler_stats_metrics) {
5680
0
        scan_and_statistics_expired_txn_label();
5681
0
    }
5682
    // recycle_func and loop_done for scan and recycle
5683
21
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
5684
21
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
5685
21
}
5686
5687
struct CopyJobIdTuple {
5688
    std::string instance_id;
5689
    std::string stage_id;
5690
    long table_id;
5691
    std::string copy_id;
5692
    std::string stage_path;
5693
};
5694
struct BatchObjStoreAccessor {
5695
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
5696
                          TxnKv* txn_kv)
5697
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
5698
3
    ~BatchObjStoreAccessor() {
5699
3
        if (!paths_.empty()) {
5700
3
            consume();
5701
3
        }
5702
3
    }
5703
5704
    /**
5705
    * To implicitely do batch work and submit the batch delete task to s3
5706
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
5707
    *
5708
    * @param copy_job The protubuf struct consists of the copy job files.
5709
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
5710
    *            it would last until we finish the delete task, here we need pass one string value
5711
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
5712
    */
5713
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
5714
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
5715
5
        auto& file_keys = copy_file_keys_[key];
5716
5
        file_keys.log_trace =
5717
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
5718
5
                            instance_id, stage_id, table_id, copy_id, path);
5719
5
        std::string_view log_trace = file_keys.log_trace;
5720
2.03k
        for (const auto& file : copy_job.object_files()) {
5721
2.03k
            auto relative_path = file.relative_path();
5722
2.03k
            paths_.push_back(relative_path);
5723
2.03k
            file_keys.keys.push_back(copy_file_key(
5724
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
5725
2.03k
            LOG_INFO(log_trace)
5726
2.03k
                    .tag("relative_path", relative_path)
5727
2.03k
                    .tag("batch_count", batch_count_);
5728
2.03k
        }
5729
5
        LOG_INFO(log_trace)
5730
5
                .tag("objects_num", copy_job.object_files().size())
5731
5
                .tag("batch_count", batch_count_);
5732
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
5733
        // recommend using delete objects when objects num is less than 10)
5734
5
        if (paths_.size() < 1000) {
5735
3
            return;
5736
3
        }
5737
2
        consume();
5738
2
    }
5739
5740
private:
5741
5
    void consume() {
5742
5
        DORIS_CLOUD_DEFER {
5743
5
            paths_.clear();
5744
5
            copy_file_keys_.clear();
5745
5
            batch_count_++;
5746
5747
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
5748
5
                        batch_count_);
5749
5
        };
5750
5751
5
        StopWatch sw;
5752
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
5753
5
        if (0 != accessor_->delete_files(paths_)) {
5754
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
5755
2
                        paths_.size(), batch_count_, sw.elapsed_us());
5756
2
            return;
5757
2
        }
5758
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
5759
3
                    paths_.size(), batch_count_, sw.elapsed_us());
5760
        // delete fdb's keys
5761
3
        for (auto& file_keys : copy_file_keys_) {
5762
3
            auto& [log_trace, keys] = file_keys.second;
5763
3
            std::unique_ptr<Transaction> txn;
5764
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
5765
0
                LOG(WARNING) << "failed to create txn";
5766
0
                continue;
5767
0
            }
5768
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
5769
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
5770
            // limited, should not cause the txn commit failed.
5771
1.02k
            for (const auto& key : keys) {
5772
1.02k
                txn->remove(key);
5773
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
5774
1.02k
            }
5775
3
            txn->remove(file_keys.first);
5776
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
5777
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
5778
0
                continue;
5779
0
            }
5780
3
        }
5781
3
    }
5782
    std::shared_ptr<StorageVaultAccessor> accessor_;
5783
    // the path of the s3 files to be deleted
5784
    std::vector<std::string> paths_;
5785
    struct CopyFiles {
5786
        std::string log_trace;
5787
        std::vector<std::string> keys;
5788
    };
5789
    // pair<std::string, std::vector<std::string>>
5790
    // first: instance_id_ stage_id table_id query_id
5791
    // second: keys to be deleted
5792
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
5793
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
5794
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
5795
    // which can together uniquely identifies different tasks for tracing log
5796
    uint64_t& batch_count_;
5797
    TxnKv* txn_kv_;
5798
};
5799
5800
13
int InstanceRecycler::recycle_copy_jobs() {
5801
13
    int64_t num_scanned = 0;
5802
13
    int64_t num_finished = 0;
5803
13
    int64_t num_expired = 0;
5804
13
    int64_t num_recycled = 0;
5805
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
5806
13
    uint64_t batch_count = 0;
5807
13
    const std::string task_name = "recycle_copy_jobs";
5808
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5809
5810
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
5811
5812
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5813
13
    register_recycle_task(task_name, start_time);
5814
5815
13
    DORIS_CLOUD_DEFER {
5816
13
        unregister_recycle_task(task_name);
5817
13
        int64_t cost =
5818
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5819
13
        metrics_context.finish_report();
5820
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
5821
13
                .tag("instance_id", instance_id_)
5822
13
                .tag("num_scanned", num_scanned)
5823
13
                .tag("num_finished", num_finished)
5824
13
                .tag("num_expired", num_expired)
5825
13
                .tag("num_recycled", num_recycled);
5826
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
5815
13
    DORIS_CLOUD_DEFER {
5816
13
        unregister_recycle_task(task_name);
5817
13
        int64_t cost =
5818
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5819
13
        metrics_context.finish_report();
5820
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
5821
13
                .tag("instance_id", instance_id_)
5822
13
                .tag("num_scanned", num_scanned)
5823
13
                .tag("num_finished", num_finished)
5824
13
                .tag("num_expired", num_expired)
5825
13
                .tag("num_recycled", num_recycled);
5826
13
    };
5827
5828
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
5829
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
5830
13
    std::string key0;
5831
13
    std::string key1;
5832
13
    copy_job_key(key_info0, &key0);
5833
13
    copy_job_key(key_info1, &key1);
5834
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
5835
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
5836
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
5837
16
                         this](std::string_view k, std::string_view v) -> int {
5838
16
        ++num_scanned;
5839
16
        CopyJobPB copy_job;
5840
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
5841
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
5842
0
            return -1;
5843
0
        }
5844
5845
        // decode copy job key
5846
16
        auto k1 = k;
5847
16
        k1.remove_prefix(1);
5848
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5849
16
        decode_key(&k1, &out);
5850
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
5851
        // -> CopyJobPB
5852
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
5853
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
5854
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
5855
5856
16
        bool check_storage = true;
5857
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
5858
12
            ++num_finished;
5859
5860
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
5861
7
                auto it = stage_accessor_map.find(stage_id);
5862
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
5863
7
                std::string_view path;
5864
7
                if (it != stage_accessor_map.end()) {
5865
2
                    accessor = it->second;
5866
5
                } else {
5867
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
5868
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
5869
5
                                                      &inner_accessor);
5870
5
                    if (ret < 0) { // error
5871
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
5872
0
                        return -1;
5873
5
                    } else if (ret == 0) {
5874
3
                        path = inner_accessor->uri();
5875
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
5876
3
                                inner_accessor, batch_count, txn_kv_.get());
5877
3
                        stage_accessor_map.emplace(stage_id, accessor);
5878
3
                    } else { // stage not found, skip check storage
5879
2
                        check_storage = false;
5880
2
                    }
5881
5
                }
5882
7
                if (check_storage) {
5883
                    // TODO delete objects with key and etag is not supported
5884
5
                    accessor->add(std::move(copy_job), std::string(k),
5885
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
5886
5
                    return 0;
5887
5
                }
5888
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
5889
5
                int64_t current_time =
5890
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5891
5
                if (copy_job.finish_time_ms() > 0) {
5892
2
                    if (!config::force_immediate_recycle &&
5893
2
                        current_time < copy_job.finish_time_ms() +
5894
2
                                               config::copy_job_max_retention_second * 1000) {
5895
1
                        return 0;
5896
1
                    }
5897
3
                } else {
5898
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
5899
3
                    if (!config::force_immediate_recycle &&
5900
3
                        current_time < copy_job.start_time_ms() +
5901
3
                                               config::copy_job_max_retention_second * 1000) {
5902
1
                        return 0;
5903
1
                    }
5904
3
                }
5905
5
            }
5906
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
5907
4
            int64_t current_time =
5908
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5909
            // if copy job is timeout: delete all copy file kvs and copy job kv
5910
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
5911
2
                return 0;
5912
2
            }
5913
2
            ++num_expired;
5914
2
        }
5915
5916
        // delete all copy files
5917
7
        std::vector<std::string> copy_file_keys;
5918
70
        for (auto& file : copy_job.object_files()) {
5919
70
            copy_file_keys.push_back(copy_file_key(
5920
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
5921
70
        }
5922
7
        std::unique_ptr<Transaction> txn;
5923
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
5924
0
            LOG(WARNING) << "failed to create txn";
5925
0
            return -1;
5926
0
        }
5927
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
5928
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
5929
        // limited, should not cause the txn commit failed.
5930
70
        for (const auto& key : copy_file_keys) {
5931
70
            txn->remove(key);
5932
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
5933
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
5934
70
                      << ", query_id=" << copy_id;
5935
70
        }
5936
7
        txn->remove(k);
5937
7
        TxnErrorCode err = txn->commit();
5938
7
        if (err != TxnErrorCode::TXN_OK) {
5939
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
5940
0
            return -1;
5941
0
        }
5942
5943
7
        metrics_context.total_recycled_num = ++num_recycled;
5944
7
        metrics_context.report();
5945
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5946
7
        return 0;
5947
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5837
16
                         this](std::string_view k, std::string_view v) -> int {
5838
16
        ++num_scanned;
5839
16
        CopyJobPB copy_job;
5840
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
5841
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
5842
0
            return -1;
5843
0
        }
5844
5845
        // decode copy job key
5846
16
        auto k1 = k;
5847
16
        k1.remove_prefix(1);
5848
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5849
16
        decode_key(&k1, &out);
5850
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
5851
        // -> CopyJobPB
5852
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
5853
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
5854
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
5855
5856
16
        bool check_storage = true;
5857
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
5858
12
            ++num_finished;
5859
5860
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
5861
7
                auto it = stage_accessor_map.find(stage_id);
5862
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
5863
7
                std::string_view path;
5864
7
                if (it != stage_accessor_map.end()) {
5865
2
                    accessor = it->second;
5866
5
                } else {
5867
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
5868
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
5869
5
                                                      &inner_accessor);
5870
5
                    if (ret < 0) { // error
5871
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
5872
0
                        return -1;
5873
5
                    } else if (ret == 0) {
5874
3
                        path = inner_accessor->uri();
5875
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
5876
3
                                inner_accessor, batch_count, txn_kv_.get());
5877
3
                        stage_accessor_map.emplace(stage_id, accessor);
5878
3
                    } else { // stage not found, skip check storage
5879
2
                        check_storage = false;
5880
2
                    }
5881
5
                }
5882
7
                if (check_storage) {
5883
                    // TODO delete objects with key and etag is not supported
5884
5
                    accessor->add(std::move(copy_job), std::string(k),
5885
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
5886
5
                    return 0;
5887
5
                }
5888
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
5889
5
                int64_t current_time =
5890
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5891
5
                if (copy_job.finish_time_ms() > 0) {
5892
2
                    if (!config::force_immediate_recycle &&
5893
2
                        current_time < copy_job.finish_time_ms() +
5894
2
                                               config::copy_job_max_retention_second * 1000) {
5895
1
                        return 0;
5896
1
                    }
5897
3
                } else {
5898
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
5899
3
                    if (!config::force_immediate_recycle &&
5900
3
                        current_time < copy_job.start_time_ms() +
5901
3
                                               config::copy_job_max_retention_second * 1000) {
5902
1
                        return 0;
5903
1
                    }
5904
3
                }
5905
5
            }
5906
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
5907
4
            int64_t current_time =
5908
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5909
            // if copy job is timeout: delete all copy file kvs and copy job kv
5910
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
5911
2
                return 0;
5912
2
            }
5913
2
            ++num_expired;
5914
2
        }
5915
5916
        // delete all copy files
5917
7
        std::vector<std::string> copy_file_keys;
5918
70
        for (auto& file : copy_job.object_files()) {
5919
70
            copy_file_keys.push_back(copy_file_key(
5920
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
5921
70
        }
5922
7
        std::unique_ptr<Transaction> txn;
5923
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
5924
0
            LOG(WARNING) << "failed to create txn";
5925
0
            return -1;
5926
0
        }
5927
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
5928
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
5929
        // limited, should not cause the txn commit failed.
5930
70
        for (const auto& key : copy_file_keys) {
5931
70
            txn->remove(key);
5932
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
5933
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
5934
70
                      << ", query_id=" << copy_id;
5935
70
        }
5936
7
        txn->remove(k);
5937
7
        TxnErrorCode err = txn->commit();
5938
7
        if (err != TxnErrorCode::TXN_OK) {
5939
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
5940
0
            return -1;
5941
0
        }
5942
5943
7
        metrics_context.total_recycled_num = ++num_recycled;
5944
7
        metrics_context.report();
5945
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5946
7
        return 0;
5947
7
    };
5948
5949
13
    if (config::enable_recycler_stats_metrics) {
5950
0
        scan_and_statistics_copy_jobs();
5951
0
    }
5952
    // recycle_func and loop_done for scan and recycle
5953
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
5954
13
}
5955
5956
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
5957
                                             const StagePB::StageType& stage_type,
5958
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
5959
5
#ifdef UNIT_TEST
5960
    // In unit test, external use the same accessor as the internal stage
5961
5
    auto it = accessor_map_.find(stage_id);
5962
5
    if (it != accessor_map_.end()) {
5963
3
        *accessor = it->second;
5964
3
    } else {
5965
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
5966
2
        return 1;
5967
2
    }
5968
#else
5969
    // init s3 accessor and add to accessor map
5970
    auto stage_it =
5971
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
5972
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
5973
5974
    if (stage_it == instance_info_.stages().end()) {
5975
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
5976
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
5977
        return 1;
5978
    }
5979
5980
    const auto& object_store_info = stage_it->obj_info();
5981
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
5982
5983
    S3Conf s3_conf;
5984
    if (stage_type == StagePB::EXTERNAL) {
5985
        if (stage_access_type == StagePB::AKSK) {
5986
            auto conf = S3Conf::from_obj_store_info(object_store_info);
5987
            if (!conf) {
5988
                return -1;
5989
            }
5990
5991
            s3_conf = std::move(*conf);
5992
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
5993
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
5994
            if (!conf) {
5995
                return -1;
5996
            }
5997
5998
            s3_conf = std::move(*conf);
5999
            if (instance_info_.ram_user().has_encryption_info()) {
6000
                AkSkPair plain_ak_sk_pair;
6001
                int ret = decrypt_ak_sk_helper(
6002
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
6003
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
6004
                if (ret != 0) {
6005
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
6006
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
6007
                    return -1;
6008
                }
6009
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
6010
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
6011
            } else {
6012
                s3_conf.ak = instance_info_.ram_user().ak();
6013
                s3_conf.sk = instance_info_.ram_user().sk();
6014
            }
6015
        } else {
6016
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
6017
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
6018
            return -1;
6019
        }
6020
    } else if (stage_type == StagePB::INTERNAL) {
6021
        int idx = stoi(object_store_info.id());
6022
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6023
            LOG(WARNING) << "invalid idx: " << idx;
6024
            return -1;
6025
        }
6026
6027
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6028
        auto conf = S3Conf::from_obj_store_info(old_obj);
6029
        if (!conf) {
6030
            return -1;
6031
        }
6032
6033
        s3_conf = std::move(*conf);
6034
        s3_conf.prefix = object_store_info.prefix();
6035
    } else {
6036
        LOG(WARNING) << "unknown stage type " << stage_type;
6037
        return -1;
6038
    }
6039
6040
    std::shared_ptr<S3Accessor> s3_accessor;
6041
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
6042
    if (ret != 0) {
6043
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
6044
        return -1;
6045
    }
6046
6047
    *accessor = std::move(s3_accessor);
6048
#endif
6049
3
    return 0;
6050
5
}
6051
6052
11
int InstanceRecycler::recycle_stage() {
6053
11
    int64_t num_scanned = 0;
6054
11
    int64_t num_recycled = 0;
6055
11
    const std::string task_name = "recycle_stage";
6056
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6057
6058
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
6059
6060
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6061
11
    register_recycle_task(task_name, start_time);
6062
6063
11
    DORIS_CLOUD_DEFER {
6064
11
        unregister_recycle_task(task_name);
6065
11
        int64_t cost =
6066
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6067
11
        metrics_context.finish_report();
6068
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6069
11
                .tag("instance_id", instance_id_)
6070
11
                .tag("num_scanned", num_scanned)
6071
11
                .tag("num_recycled", num_recycled);
6072
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
6063
11
    DORIS_CLOUD_DEFER {
6064
11
        unregister_recycle_task(task_name);
6065
11
        int64_t cost =
6066
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6067
11
        metrics_context.finish_report();
6068
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6069
11
                .tag("instance_id", instance_id_)
6070
11
                .tag("num_scanned", num_scanned)
6071
11
                .tag("num_recycled", num_recycled);
6072
11
    };
6073
6074
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6075
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6076
11
    std::string key0 = recycle_stage_key(key_info0);
6077
11
    std::string key1 = recycle_stage_key(key_info1);
6078
6079
11
    std::vector<std::string_view> stage_keys;
6080
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
6081
11
                         this](std::string_view k, std::string_view v) -> int {
6082
1
        ++num_scanned;
6083
1
        RecycleStagePB recycle_stage;
6084
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6085
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6086
0
            return -1;
6087
0
        }
6088
6089
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6090
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6091
0
            LOG(WARNING) << "invalid idx: " << idx;
6092
0
            return -1;
6093
0
        }
6094
6095
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6096
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6097
1
                [&] {
6098
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6099
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6100
1
                    if (!s3_conf) {
6101
1
                        return -1;
6102
1
                    }
6103
6104
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6105
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6106
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6107
1
                    if (ret != 0) {
6108
1
                        return -1;
6109
1
                    }
6110
6111
1
                    accessor = std::move(s3_accessor);
6112
1
                    return 0;
6113
1
                }(),
6114
1
                "recycle_stage:get_accessor", &accessor);
6115
6116
1
        if (ret != 0) {
6117
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6118
0
            return ret;
6119
0
        }
6120
6121
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6122
1
                .tag("instance_id", instance_id_)
6123
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6124
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6125
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6126
1
                .tag("obj_info_id", idx)
6127
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6128
1
        ret = accessor->delete_all();
6129
1
        if (ret != 0) {
6130
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6131
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6132
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6133
0
                         << ", ret=" << ret;
6134
0
            return -1;
6135
0
        }
6136
1
        metrics_context.total_recycled_num = ++num_recycled;
6137
1
        metrics_context.report();
6138
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6139
1
        stage_keys.push_back(k);
6140
1
        return 0;
6141
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6081
1
                         this](std::string_view k, std::string_view v) -> int {
6082
1
        ++num_scanned;
6083
1
        RecycleStagePB recycle_stage;
6084
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6085
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6086
0
            return -1;
6087
0
        }
6088
6089
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6090
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6091
0
            LOG(WARNING) << "invalid idx: " << idx;
6092
0
            return -1;
6093
0
        }
6094
6095
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6096
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6097
1
                [&] {
6098
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6099
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6100
1
                    if (!s3_conf) {
6101
1
                        return -1;
6102
1
                    }
6103
6104
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6105
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6106
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6107
1
                    if (ret != 0) {
6108
1
                        return -1;
6109
1
                    }
6110
6111
1
                    accessor = std::move(s3_accessor);
6112
1
                    return 0;
6113
1
                }(),
6114
1
                "recycle_stage:get_accessor", &accessor);
6115
6116
1
        if (ret != 0) {
6117
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6118
0
            return ret;
6119
0
        }
6120
6121
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6122
1
                .tag("instance_id", instance_id_)
6123
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6124
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6125
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6126
1
                .tag("obj_info_id", idx)
6127
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6128
1
        ret = accessor->delete_all();
6129
1
        if (ret != 0) {
6130
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6131
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6132
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6133
0
                         << ", ret=" << ret;
6134
0
            return -1;
6135
0
        }
6136
1
        metrics_context.total_recycled_num = ++num_recycled;
6137
1
        metrics_context.report();
6138
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6139
1
        stage_keys.push_back(k);
6140
1
        return 0;
6141
1
    };
6142
6143
11
    auto loop_done = [&stage_keys, this]() -> int {
6144
1
        if (stage_keys.empty()) return 0;
6145
1
        DORIS_CLOUD_DEFER {
6146
1
            stage_keys.clear();
6147
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6145
1
        DORIS_CLOUD_DEFER {
6146
1
            stage_keys.clear();
6147
1
        };
6148
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6149
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6150
0
            return -1;
6151
0
        }
6152
1
        return 0;
6153
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
6143
1
    auto loop_done = [&stage_keys, this]() -> int {
6144
1
        if (stage_keys.empty()) return 0;
6145
1
        DORIS_CLOUD_DEFER {
6146
1
            stage_keys.clear();
6147
1
        };
6148
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6149
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6150
0
            return -1;
6151
0
        }
6152
1
        return 0;
6153
1
    };
6154
11
    if (config::enable_recycler_stats_metrics) {
6155
0
        scan_and_statistics_stage();
6156
0
    }
6157
    // recycle_func and loop_done for scan and recycle
6158
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
6159
11
}
6160
6161
10
int InstanceRecycler::recycle_expired_stage_objects() {
6162
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
6163
6164
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6165
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6166
6167
10
    DORIS_CLOUD_DEFER {
6168
10
        int64_t cost =
6169
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6170
10
        metrics_context.finish_report();
6171
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6172
10
                .tag("instance_id", instance_id_);
6173
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
6167
10
    DORIS_CLOUD_DEFER {
6168
10
        int64_t cost =
6169
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6170
10
        metrics_context.finish_report();
6171
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6172
10
                .tag("instance_id", instance_id_);
6173
10
    };
6174
6175
10
    int ret = 0;
6176
6177
10
    if (config::enable_recycler_stats_metrics) {
6178
0
        scan_and_statistics_expired_stage_objects();
6179
0
    }
6180
6181
10
    for (const auto& stage : instance_info_.stages()) {
6182
0
        std::stringstream ss;
6183
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
6184
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
6185
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
6186
0
           << ", prefix=" << stage.obj_info().prefix();
6187
6188
0
        if (stopped()) {
6189
0
            break;
6190
0
        }
6191
0
        if (stage.type() == StagePB::EXTERNAL) {
6192
0
            continue;
6193
0
        }
6194
0
        int idx = stoi(stage.obj_info().id());
6195
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6196
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
6197
0
            continue;
6198
0
        }
6199
6200
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6201
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6202
0
        if (!s3_conf) {
6203
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
6204
0
            continue;
6205
0
        }
6206
6207
0
        s3_conf->prefix = stage.obj_info().prefix();
6208
0
        std::shared_ptr<S3Accessor> accessor;
6209
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
6210
0
        if (ret1 != 0) {
6211
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
6212
0
            ret = -1;
6213
0
            continue;
6214
0
        }
6215
6216
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6217
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
6218
0
            ret = -1;
6219
0
            continue;
6220
0
        }
6221
6222
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
6223
0
        int64_t expiration_time =
6224
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
6225
0
                config::internal_stage_objects_expire_time_second;
6226
0
        if (config::force_immediate_recycle) {
6227
0
            expiration_time = INT64_MAX;
6228
0
        }
6229
0
        ret1 = accessor->delete_all(expiration_time);
6230
0
        if (ret1 != 0) {
6231
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
6232
0
                         << ss.str();
6233
0
            ret = -1;
6234
0
            continue;
6235
0
        }
6236
0
        metrics_context.total_recycled_num++;
6237
0
        metrics_context.report();
6238
0
    }
6239
10
    return ret;
6240
10
}
6241
6242
163
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
6243
163
    std::lock_guard lock(recycle_tasks_mutex);
6244
163
    running_recycle_tasks[task_name] = start_time;
6245
163
}
6246
6247
162
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
6248
162
    std::lock_guard lock(recycle_tasks_mutex);
6249
162
    DCHECK(running_recycle_tasks[task_name] > 0);
6250
162
    running_recycle_tasks.erase(task_name);
6251
162
}
6252
6253
21
bool InstanceRecycler::check_recycle_tasks() {
6254
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
6255
21
    {
6256
21
        std::lock_guard lock(recycle_tasks_mutex);
6257
21
        tmp_running_recycle_tasks = running_recycle_tasks;
6258
21
    }
6259
6260
21
    bool found = false;
6261
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6262
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
6263
20
        int64_t cost = now - start_time;
6264
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
6265
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
6266
20
                    .tag("instance_id", instance_id_)
6267
20
                    .tag("task", task_name);
6268
20
            found = true;
6269
20
        }
6270
20
    }
6271
6272
21
    return found;
6273
21
}
6274
6275
// Scan and statistics indexes that need to be recycled
6276
0
int InstanceRecycler::scan_and_statistics_indexes() {
6277
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
6278
6279
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
6280
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
6281
0
    std::string index_key0;
6282
0
    std::string index_key1;
6283
0
    recycle_index_key(index_key_info0, &index_key0);
6284
0
    recycle_index_key(index_key_info1, &index_key1);
6285
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6286
6287
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
6288
0
        RecycleIndexPB index_pb;
6289
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
6290
0
            return 0;
6291
0
        }
6292
0
        int64_t current_time = ::time(nullptr);
6293
0
        if (current_time <
6294
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
6295
0
            return 0;
6296
0
        }
6297
        // decode index_id
6298
0
        auto k1 = k;
6299
0
        k1.remove_prefix(1);
6300
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6301
0
        decode_key(&k1, &out);
6302
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
6303
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
6304
0
        std::unique_ptr<Transaction> txn;
6305
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6306
0
        if (err != TxnErrorCode::TXN_OK) {
6307
0
            return 0;
6308
0
        }
6309
0
        std::string val;
6310
0
        err = txn->get(k, &val);
6311
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6312
0
            return 0;
6313
0
        }
6314
0
        if (err != TxnErrorCode::TXN_OK) {
6315
0
            return 0;
6316
0
        }
6317
0
        index_pb.Clear();
6318
0
        if (!index_pb.ParseFromString(val)) {
6319
0
            return 0;
6320
0
        }
6321
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
6322
0
            return 0;
6323
0
        }
6324
0
        metrics_context.total_need_recycle_num++;
6325
0
        return 0;
6326
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6327
6328
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
6329
0
    metrics_context.report(true);
6330
0
    segment_metrics_context_.report(true);
6331
0
    tablet_metrics_context_.report(true);
6332
0
    return ret;
6333
0
}
6334
6335
// Scan and statistics partitions that need to be recycled
6336
0
int InstanceRecycler::scan_and_statistics_partitions() {
6337
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
6338
6339
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
6340
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
6341
0
    std::string part_key0;
6342
0
    std::string part_key1;
6343
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6344
6345
0
    recycle_partition_key(part_key_info0, &part_key0);
6346
0
    recycle_partition_key(part_key_info1, &part_key1);
6347
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
6348
0
        RecyclePartitionPB part_pb;
6349
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
6350
0
            return 0;
6351
0
        }
6352
0
        int64_t current_time = ::time(nullptr);
6353
0
        if (current_time <
6354
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
6355
0
            return 0;
6356
0
        }
6357
        // decode partition_id
6358
0
        auto k1 = k;
6359
0
        k1.remove_prefix(1);
6360
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6361
0
        decode_key(&k1, &out);
6362
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
6363
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
6364
        // Change state to RECYCLING
6365
0
        std::unique_ptr<Transaction> txn;
6366
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6367
0
        if (err != TxnErrorCode::TXN_OK) {
6368
0
            return 0;
6369
0
        }
6370
0
        std::string val;
6371
0
        err = txn->get(k, &val);
6372
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6373
0
            return 0;
6374
0
        }
6375
0
        if (err != TxnErrorCode::TXN_OK) {
6376
0
            return 0;
6377
0
        }
6378
0
        part_pb.Clear();
6379
0
        if (!part_pb.ParseFromString(val)) {
6380
0
            return 0;
6381
0
        }
6382
        // Partitions with PREPARED state MUST have no data
6383
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
6384
0
        int ret = 0;
6385
0
        for (int64_t index_id : part_pb.index_id()) {
6386
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
6387
0
                                            partition_id, is_empty_tablet) != 0) {
6388
0
                ret = 0;
6389
0
            }
6390
0
        }
6391
0
        metrics_context.total_need_recycle_num++;
6392
0
        return ret;
6393
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6394
6395
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
6396
0
    metrics_context.report(true);
6397
0
    segment_metrics_context_.report(true);
6398
0
    tablet_metrics_context_.report(true);
6399
0
    return ret;
6400
0
}
6401
6402
// Scan and statistics rowsets that need to be recycled
6403
0
int InstanceRecycler::scan_and_statistics_rowsets() {
6404
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
6405
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
6406
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
6407
0
    std::string recyc_rs_key0;
6408
0
    std::string recyc_rs_key1;
6409
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
6410
0
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
6411
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6412
6413
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
6414
0
        RecycleRowsetPB rowset;
6415
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6416
0
            return 0;
6417
0
        }
6418
0
        int64_t current_time = ::time(nullptr);
6419
0
        if (current_time <
6420
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
6421
0
            return 0;
6422
0
        }
6423
0
        if (!rowset.has_type()) {
6424
0
            if (!rowset.has_resource_id()) [[unlikely]] {
6425
0
                return 0;
6426
0
            }
6427
0
            if (rowset.resource_id().empty()) [[unlikely]] {
6428
0
                return 0;
6429
0
            }
6430
0
            metrics_context.total_need_recycle_num++;
6431
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6432
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
6433
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6434
0
            return 0;
6435
0
        }
6436
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
6437
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
6438
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
6439
0
                return 0;
6440
0
            }
6441
0
        }
6442
0
        metrics_context.total_need_recycle_num++;
6443
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
6444
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
6445
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
6446
0
        return 0;
6447
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6448
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
6449
0
    metrics_context.report(true);
6450
0
    segment_metrics_context_.report(true);
6451
0
    return ret;
6452
0
}
6453
6454
// Scan and statistics tmp_rowsets that need to be recycled
6455
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
6456
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
6457
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
6458
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
6459
0
    std::string tmp_rs_key0;
6460
0
    std::string tmp_rs_key1;
6461
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
6462
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
6463
6464
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6465
6466
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
6467
0
        doris::RowsetMetaCloudPB rowset;
6468
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6469
0
            return 0;
6470
0
        }
6471
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
6472
0
        int64_t current_time = ::time(nullptr);
6473
0
        if (current_time < expiration) {
6474
0
            return 0;
6475
0
        }
6476
6477
0
        DCHECK_GT(rowset.txn_id(), 0)
6478
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
6479
0
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
6480
0
            return 0;
6481
0
        }
6482
6483
0
        if (!rowset.has_resource_id()) {
6484
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6485
0
                return 0;
6486
0
            }
6487
0
            return 0;
6488
0
        }
6489
6490
0
        metrics_context.total_need_recycle_num++;
6491
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
6492
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
6493
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
6494
0
        return 0;
6495
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6496
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
6497
0
    metrics_context.report(true);
6498
0
    segment_metrics_context_.report(true);
6499
0
    return ret;
6500
0
}
6501
6502
// Scan and statistics abort_timeout_txn that need to be recycled
6503
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
6504
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
6505
6506
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
6507
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6508
0
    std::string begin_txn_running_key;
6509
0
    std::string end_txn_running_key;
6510
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
6511
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
6512
6513
0
    int64_t current_time =
6514
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6515
6516
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
6517
0
                                               std::string_view k, std::string_view v) -> int {
6518
0
        std::unique_ptr<Transaction> txn;
6519
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6520
0
        if (err != TxnErrorCode::TXN_OK) {
6521
0
            return 0;
6522
0
        }
6523
0
        std::string_view k1 = k;
6524
0
        k1.remove_prefix(1);
6525
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6526
0
        if (decode_key(&k1, &out) != 0) {
6527
0
            return 0;
6528
0
        }
6529
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6530
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6531
        // Update txn_info
6532
0
        std::string txn_inf_key, txn_inf_val;
6533
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6534
0
        err = txn->get(txn_inf_key, &txn_inf_val);
6535
0
        if (err != TxnErrorCode::TXN_OK) {
6536
0
            return 0;
6537
0
        }
6538
0
        TxnInfoPB txn_info;
6539
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
6540
0
            return 0;
6541
0
        }
6542
6543
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
6544
0
            TxnRunningPB txn_running_pb;
6545
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6546
0
                return 0;
6547
0
            }
6548
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6549
0
                return 0;
6550
0
            }
6551
0
            metrics_context.total_need_recycle_num++;
6552
0
        }
6553
0
        return 0;
6554
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6555
6556
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
6557
0
    metrics_context.report(true);
6558
0
    return ret;
6559
0
}
6560
6561
// Scan and statistics expired_txn_label that need to be recycled
6562
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
6563
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
6564
6565
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
6566
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6567
0
    std::string begin_recycle_txn_key;
6568
0
    std::string end_recycle_txn_key;
6569
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
6570
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
6571
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6572
0
    int64_t current_time_ms =
6573
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6574
6575
    // for calculate the total num or bytes of recyled objects
6576
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
6577
0
        RecycleTxnPB recycle_txn_pb;
6578
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6579
0
            return 0;
6580
0
        }
6581
0
        if ((config::force_immediate_recycle) ||
6582
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6583
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6584
0
             current_time_ms)) {
6585
0
            metrics_context.total_need_recycle_num++;
6586
0
        }
6587
0
        return 0;
6588
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6589
6590
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
6591
0
    metrics_context.report(true);
6592
0
    return ret;
6593
0
}
6594
6595
// Scan and statistics copy_jobs that need to be recycled
6596
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
6597
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
6598
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6599
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6600
0
    std::string key0;
6601
0
    std::string key1;
6602
0
    copy_job_key(key_info0, &key0);
6603
0
    copy_job_key(key_info1, &key1);
6604
6605
    // for calculate the total num or bytes of recyled objects
6606
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
6607
0
        CopyJobPB copy_job;
6608
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6609
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6610
0
            return 0;
6611
0
        }
6612
6613
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6614
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
6615
0
                int64_t current_time =
6616
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6617
0
                if (copy_job.finish_time_ms() > 0) {
6618
0
                    if (!config::force_immediate_recycle &&
6619
0
                        current_time < copy_job.finish_time_ms() +
6620
0
                                               config::copy_job_max_retention_second * 1000) {
6621
0
                        return 0;
6622
0
                    }
6623
0
                } else {
6624
0
                    if (!config::force_immediate_recycle &&
6625
0
                        current_time < copy_job.start_time_ms() +
6626
0
                                               config::copy_job_max_retention_second * 1000) {
6627
0
                        return 0;
6628
0
                    }
6629
0
                }
6630
0
            }
6631
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6632
0
            int64_t current_time =
6633
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6634
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6635
0
                return 0;
6636
0
            }
6637
0
        }
6638
0
        metrics_context.total_need_recycle_num++;
6639
0
        return 0;
6640
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6641
6642
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
6643
0
    metrics_context.report(true);
6644
0
    return ret;
6645
0
}
6646
6647
// Scan and statistics stage that need to be recycled
6648
0
int InstanceRecycler::scan_and_statistics_stage() {
6649
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
6650
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6651
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6652
0
    std::string key0 = recycle_stage_key(key_info0);
6653
0
    std::string key1 = recycle_stage_key(key_info1);
6654
6655
    // for calculate the total num or bytes of recyled objects
6656
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
6657
0
                                                        std::string_view v) -> int {
6658
0
        RecycleStagePB recycle_stage;
6659
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6660
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6661
0
            return 0;
6662
0
        }
6663
6664
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
6665
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6666
0
            LOG(WARNING) << "invalid idx: " << idx;
6667
0
            return 0;
6668
0
        }
6669
6670
0
        std::shared_ptr<StorageVaultAccessor> accessor;
6671
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6672
0
                [&] {
6673
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6674
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6675
0
                    if (!s3_conf) {
6676
0
                        return 0;
6677
0
                    }
6678
6679
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6680
0
                    std::shared_ptr<S3Accessor> s3_accessor;
6681
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6682
0
                    if (ret != 0) {
6683
0
                        return 0;
6684
0
                    }
6685
6686
0
                    accessor = std::move(s3_accessor);
6687
0
                    return 0;
6688
0
                }(),
6689
0
                "recycle_stage:get_accessor", &accessor);
6690
6691
0
        if (ret != 0) {
6692
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6693
0
            return 0;
6694
0
        }
6695
6696
0
        metrics_context.total_need_recycle_num++;
6697
0
        return 0;
6698
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6699
6700
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
6701
0
    metrics_context.report(true);
6702
0
    return ret;
6703
0
}
6704
6705
// Scan and statistics expired_stage_objects that need to be recycled
6706
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
6707
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6708
6709
    // for calculate the total num or bytes of recyled objects
6710
0
    auto scan_and_statistics = [&metrics_context, this]() {
6711
0
        for (const auto& stage : instance_info_.stages()) {
6712
0
            if (stopped()) {
6713
0
                break;
6714
0
            }
6715
0
            if (stage.type() == StagePB::EXTERNAL) {
6716
0
                continue;
6717
0
            }
6718
0
            int idx = stoi(stage.obj_info().id());
6719
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
6720
0
                continue;
6721
0
            }
6722
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
6723
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6724
0
            if (!s3_conf) {
6725
0
                continue;
6726
0
            }
6727
0
            s3_conf->prefix = stage.obj_info().prefix();
6728
0
            std::shared_ptr<S3Accessor> accessor;
6729
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
6730
0
            if (ret1 != 0) {
6731
0
                continue;
6732
0
            }
6733
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6734
0
                continue;
6735
0
            }
6736
0
            metrics_context.total_need_recycle_num++;
6737
0
        }
6738
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
6739
6740
0
    scan_and_statistics();
6741
0
    metrics_context.report(true);
6742
0
    return 0;
6743
0
}
6744
6745
// Scan and statistics versions that need to be recycled
6746
0
int InstanceRecycler::scan_and_statistics_versions() {
6747
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
6748
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
6749
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
6750
6751
0
    int64_t last_scanned_table_id = 0;
6752
0
    bool is_recycled = false; // Is last scanned kv recycled
6753
    // for calculate the total num or bytes of recyled objects
6754
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
6755
0
                                       std::string_view k, std::string_view) {
6756
0
        auto k1 = k;
6757
0
        k1.remove_prefix(1);
6758
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
6759
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6760
0
        decode_key(&k1, &out);
6761
0
        DCHECK_EQ(out.size(), 6) << k;
6762
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
6763
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
6764
0
            metrics_context.total_need_recycle_num +=
6765
0
                    is_recycled; // Version kv of this table has been recycled
6766
0
            return 0;
6767
0
        }
6768
0
        last_scanned_table_id = table_id;
6769
0
        is_recycled = false;
6770
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
6771
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
6772
0
        std::unique_ptr<Transaction> txn;
6773
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6774
0
        if (err != TxnErrorCode::TXN_OK) {
6775
0
            return 0;
6776
0
        }
6777
0
        std::unique_ptr<RangeGetIterator> iter;
6778
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
6779
0
        if (err != TxnErrorCode::TXN_OK) {
6780
0
            return 0;
6781
0
        }
6782
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
6783
0
            return 0;
6784
0
        }
6785
0
        metrics_context.total_need_recycle_num++;
6786
0
        is_recycled = true;
6787
0
        return 0;
6788
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6789
6790
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
6791
0
    metrics_context.report(true);
6792
0
    return ret;
6793
0
}
6794
6795
// Scan and statistics restore jobs that need to be recycled
6796
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
6797
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
6798
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
6799
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
6800
0
    std::string restore_job_key0;
6801
0
    std::string restore_job_key1;
6802
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
6803
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
6804
6805
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6806
6807
    // for calculate the total num or bytes of recyled objects
6808
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
6809
0
        RestoreJobCloudPB restore_job_pb;
6810
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
6811
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
6812
0
            return 0;
6813
0
        }
6814
0
        int64_t expiration =
6815
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
6816
0
        int64_t current_time = ::time(nullptr);
6817
0
        if (current_time < expiration) { // not expired
6818
0
            return 0;
6819
0
        }
6820
0
        metrics_context.total_need_recycle_num++;
6821
0
        if(restore_job_pb.need_recycle_data()) {
6822
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
6823
0
        }
6824
0
        return 0;
6825
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6826
6827
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
6828
0
    metrics_context.report(true);
6829
0
    return ret;
6830
0
}
6831
6832
int InstanceRecycler::classify_rowset_task_by_ref_count(
6833
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
6834
60
    constexpr int MAX_RETRY = 10;
6835
60
    const auto& rowset_meta = task.rowset_meta;
6836
60
    int64_t tablet_id = rowset_meta.tablet_id();
6837
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
6838
60
    std::string_view reference_instance_id = instance_id_;
6839
60
    if (rowset_meta.has_reference_instance_id()) {
6840
5
        reference_instance_id = rowset_meta.reference_instance_id();
6841
5
    }
6842
6843
61
    for (int i = 0; i < MAX_RETRY; ++i) {
6844
61
        std::unique_ptr<Transaction> txn;
6845
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6846
61
        if (err != TxnErrorCode::TXN_OK) {
6847
0
            LOG_WARNING("failed to create txn when classifying rowset task")
6848
0
                    .tag("instance_id", instance_id_)
6849
0
                    .tag("tablet_id", tablet_id)
6850
0
                    .tag("rowset_id", rowset_id)
6851
0
                    .tag("err", err);
6852
0
            return -1;
6853
0
        }
6854
6855
61
        std::string rowset_ref_count_key =
6856
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
6857
61
        task.rowset_ref_count_key = rowset_ref_count_key;
6858
6859
61
        int64_t ref_count = 0;
6860
61
        {
6861
61
            std::string value;
6862
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
6863
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6864
0
                ref_count = 1;
6865
61
            } else if (err != TxnErrorCode::TXN_OK) {
6866
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
6867
0
                        .tag("instance_id", instance_id_)
6868
0
                        .tag("tablet_id", tablet_id)
6869
0
                        .tag("rowset_id", rowset_id)
6870
0
                        .tag("err", err);
6871
0
                return -1;
6872
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
6873
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
6874
0
                        .tag("instance_id", instance_id_)
6875
0
                        .tag("tablet_id", tablet_id)
6876
0
                        .tag("rowset_id", rowset_id)
6877
0
                        .tag("value", hex(value));
6878
0
                return -1;
6879
0
            }
6880
61
        }
6881
6882
61
        if (ref_count > 1) {
6883
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
6884
12
            txn->atomic_add(rowset_ref_count_key, -1);
6885
12
            LOG_INFO("decrease rowset data ref count in classification phase")
6886
12
                    .tag("instance_id", instance_id_)
6887
12
                    .tag("tablet_id", tablet_id)
6888
12
                    .tag("rowset_id", rowset_id)
6889
12
                    .tag("ref_count", ref_count - 1)
6890
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
6891
6892
12
            if (!task.recycle_rowset_key.empty()) {
6893
12
                txn->remove(task.recycle_rowset_key);
6894
12
                LOG_INFO("remove recycle rowset key in classification phase")
6895
12
                        .tag("key", hex(task.recycle_rowset_key));
6896
12
            }
6897
12
            if (!task.non_versioned_rowset_key.empty()) {
6898
12
                txn->remove(task.non_versioned_rowset_key);
6899
12
                LOG_INFO("remove non versioned rowset key in classification phase")
6900
12
                        .tag("key", hex(task.non_versioned_rowset_key));
6901
12
            }
6902
6903
12
            err = txn->commit();
6904
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
6905
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
6906
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
6907
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
6908
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
6909
1
                continue;
6910
11
            } else if (err != TxnErrorCode::TXN_OK) {
6911
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
6912
0
                        .tag("instance_id", instance_id_)
6913
0
                        .tag("tablet_id", tablet_id)
6914
0
                        .tag("rowset_id", rowset_id)
6915
0
                        .tag("err", err);
6916
0
                return -1;
6917
0
            }
6918
11
            return 1; // handled, not added to batch delete
6919
49
        } else {
6920
            // ref_count == 1: Add to batch delete plan without modifying any KV.
6921
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
6922
49
            LOG_INFO("add rowset to batch delete plan")
6923
49
                    .tag("instance_id", instance_id_)
6924
49
                    .tag("tablet_id", tablet_id)
6925
49
                    .tag("rowset_id", rowset_id)
6926
49
                    .tag("resource_id", rowset_meta.resource_id())
6927
49
                    .tag("ref_count", ref_count);
6928
6929
49
            batch_delete_tasks.push_back(std::move(task));
6930
49
            return 0; // added to batch delete
6931
49
        }
6932
61
    }
6933
6934
0
    LOG_WARNING("failed to classify rowset task after retry")
6935
0
            .tag("instance_id", instance_id_)
6936
0
            .tag("tablet_id", tablet_id)
6937
0
            .tag("rowset_id", rowset_id)
6938
0
            .tag("retry", MAX_RETRY);
6939
0
    return -1;
6940
60
}
6941
6942
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
6943
10
    int ret = 0;
6944
49
    for (const auto& task : tasks) {
6945
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
6946
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
6947
6948
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
6949
        // so we don't need to call it again here.
6950
6951
        // Remove all metadata keys in one transaction
6952
49
        std::unique_ptr<Transaction> txn;
6953
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6954
49
        if (err != TxnErrorCode::TXN_OK) {
6955
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
6956
0
                    .tag("instance_id", instance_id_)
6957
0
                    .tag("tablet_id", tablet_id)
6958
0
                    .tag("rowset_id", rowset_id)
6959
0
                    .tag("err", err);
6960
0
            ret = -1;
6961
0
            continue;
6962
0
        }
6963
6964
49
        std::string_view reference_instance_id = instance_id_;
6965
49
        if (task.rowset_meta.has_reference_instance_id()) {
6966
5
            reference_instance_id = task.rowset_meta.reference_instance_id();
6967
5
        }
6968
6969
49
        txn->remove(task.rowset_ref_count_key);
6970
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
6971
49
                .tag("instance_id", instance_id_)
6972
49
                .tag("tablet_id", tablet_id)
6973
49
                .tag("rowset_id", rowset_id)
6974
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
6975
6976
49
        std::string dbm_start_key =
6977
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
6978
49
        std::string dbm_end_key = meta_delete_bitmap_key(
6979
49
                {reference_instance_id, tablet_id, rowset_id,
6980
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
6981
49
        txn->remove(dbm_start_key, dbm_end_key);
6982
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
6983
49
                .tag("instance_id", instance_id_)
6984
49
                .tag("tablet_id", tablet_id)
6985
49
                .tag("rowset_id", rowset_id)
6986
49
                .tag("begin", hex(dbm_start_key))
6987
49
                .tag("end", hex(dbm_end_key));
6988
6989
49
        std::string versioned_dbm_start_key =
6990
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
6991
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
6992
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
6993
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
6994
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
6995
49
                .tag("instance_id", instance_id_)
6996
49
                .tag("tablet_id", tablet_id)
6997
49
                .tag("rowset_id", rowset_id)
6998
49
                .tag("begin", hex(versioned_dbm_start_key))
6999
49
                .tag("end", hex(versioned_dbm_end_key));
7000
7001
        // Remove versioned meta rowset key
7002
49
        if (!task.versioned_rowset_key.empty()) {
7003
49
            std::string versioned_rowset_key_end = task.versioned_rowset_key;
7004
49
            encode_int64(INT64_MAX, &versioned_rowset_key_end);
7005
49
            txn->remove(task.versioned_rowset_key, versioned_rowset_key_end);
7006
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
7007
49
                    .tag("instance_id", instance_id_)
7008
49
                    .tag("tablet_id", tablet_id)
7009
49
                    .tag("rowset_id", rowset_id)
7010
49
                    .tag("begin", hex(task.versioned_rowset_key))
7011
49
                    .tag("end", hex(versioned_rowset_key_end));
7012
49
        }
7013
7014
49
        if (!task.non_versioned_rowset_key.empty()) {
7015
49
            txn->remove(task.non_versioned_rowset_key);
7016
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
7017
49
                    .tag("instance_id", instance_id_)
7018
49
                    .tag("tablet_id", tablet_id)
7019
49
                    .tag("rowset_id", rowset_id)
7020
49
                    .tag("key", hex(task.non_versioned_rowset_key));
7021
49
        }
7022
7023
        // Remove recycle_rowset_key last to ensure retry safety:
7024
        // if cleanup fails, this key remains and triggers next round retry.
7025
49
        if (!task.recycle_rowset_key.empty()) {
7026
49
            txn->remove(task.recycle_rowset_key);
7027
49
            LOG_INFO("remove recycle rowset key in cleanup phase")
7028
49
                    .tag("instance_id", instance_id_)
7029
49
                    .tag("tablet_id", tablet_id)
7030
49
                    .tag("rowset_id", rowset_id)
7031
49
                    .tag("key", hex(task.recycle_rowset_key));
7032
49
        }
7033
7034
49
        err = txn->commit();
7035
49
        if (err != TxnErrorCode::TXN_OK) {
7036
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
7037
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
7038
0
                    .tag("instance_id", instance_id_)
7039
0
                    .tag("tablet_id", tablet_id)
7040
0
                    .tag("rowset_id", rowset_id)
7041
0
                    .tag("err", err);
7042
0
            ret = -1;
7043
0
            continue;
7044
0
        }
7045
7046
49
        LOG_INFO("cleanup rowset metadata success")
7047
49
                .tag("instance_id", instance_id_)
7048
49
                .tag("tablet_id", tablet_id)
7049
49
                .tag("rowset_id", rowset_id);
7050
49
    }
7051
10
    return ret;
7052
10
}
7053
7054
} // namespace doris::cloud