Coverage Report

Created: 2025-10-10 19:24

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <initializer_list>
36
#include <memory>
37
#include <numeric>
38
#include <string>
39
#include <string_view>
40
#include <utility>
41
42
#include "common/defer.h"
43
#include "common/stopwatch.h"
44
#include "meta-service/meta_service.h"
45
#include "meta-service/meta_service_helper.h"
46
#include "meta-service/meta_service_schema.h"
47
#include "meta-store/blob_message.h"
48
#include "meta-store/meta_reader.h"
49
#include "meta-store/txn_kv.h"
50
#include "meta-store/txn_kv_error.h"
51
#include "meta-store/versioned_value.h"
52
#include "recycler/checker.h"
53
#ifdef ENABLE_HDFS_STORAGE_VAULT
54
#include "recycler/hdfs_accessor.h"
55
#endif
56
#include "recycler/s3_accessor.h"
57
#include "recycler/storage_vault_accessor.h"
58
#ifdef UNIT_TEST
59
#include "../test/mock_accessor.h"
60
#endif
61
#include "common/bvars.h"
62
#include "common/config.h"
63
#include "common/encryption_util.h"
64
#include "common/logging.h"
65
#include "common/simple_thread_pool.h"
66
#include "common/util.h"
67
#include "cpp/sync_point.h"
68
#include "meta-store/codec.h"
69
#include "meta-store/keys.h"
70
#include "recycler/recycler_service.h"
71
#include "recycler/sync_executor.h"
72
#include "recycler/util.h"
73
74
namespace doris::cloud {
75
76
using namespace std::chrono;
77
78
// return 0 for success get a key, 1 for key not found, negative for error
79
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
80
0
    std::unique_ptr<Transaction> txn;
81
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
82
0
    if (err != TxnErrorCode::TXN_OK) {
83
0
        return -1;
84
0
    }
85
0
    switch (txn->get(key, &val, true)) {
86
0
    case TxnErrorCode::TXN_OK:
87
0
        return 0;
88
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
89
0
        return 1;
90
0
    default:
91
0
        return -1;
92
0
    };
93
0
}
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
94
95
// 0 for success, negative for error
96
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
97
270
                   std::unique_ptr<RangeGetIterator>& it) {
98
270
    std::unique_ptr<Transaction> txn;
99
270
    TxnErrorCode err = txn_kv->create_txn(&txn);
100
270
    if (err != TxnErrorCode::TXN_OK) {
101
0
        return -1;
102
0
    }
103
270
    switch (txn->get(begin, end, &it, true)) {
104
270
    case TxnErrorCode::TXN_OK:
105
270
        return 0;
106
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
107
0
        return 1;
108
0
    default:
109
0
        return -1;
110
270
    };
111
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
97
251
                   std::unique_ptr<RangeGetIterator>& it) {
98
251
    std::unique_ptr<Transaction> txn;
99
251
    TxnErrorCode err = txn_kv->create_txn(&txn);
100
251
    if (err != TxnErrorCode::TXN_OK) {
101
0
        return -1;
102
0
    }
103
251
    switch (txn->get(begin, end, &it, true)) {
104
251
    case TxnErrorCode::TXN_OK:
105
251
        return 0;
106
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
107
0
        return 1;
108
0
    default:
109
0
        return -1;
110
251
    };
111
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
97
19
                   std::unique_ptr<RangeGetIterator>& it) {
98
19
    std::unique_ptr<Transaction> txn;
99
19
    TxnErrorCode err = txn_kv->create_txn(&txn);
100
19
    if (err != TxnErrorCode::TXN_OK) {
101
0
        return -1;
102
0
    }
103
19
    switch (txn->get(begin, end, &it, true)) {
104
19
    case TxnErrorCode::TXN_OK:
105
19
        return 0;
106
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
107
0
        return 1;
108
0
    default:
109
0
        return -1;
110
19
    };
111
0
}
112
113
// return 0 for success otherwise error
114
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
115
6
    std::unique_ptr<Transaction> txn;
116
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
117
6
    if (err != TxnErrorCode::TXN_OK) {
118
0
        return -1;
119
0
    }
120
10
    for (auto k : keys) {
121
10
        txn->remove(k);
122
10
    }
123
6
    switch (txn->commit()) {
124
6
    case TxnErrorCode::TXN_OK:
125
6
        return 0;
126
0
    case TxnErrorCode::TXN_CONFLICT:
127
0
        return -1;
128
0
    default:
129
0
        return -1;
130
6
    }
131
6
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
114
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
115
5
    std::unique_ptr<Transaction> txn;
116
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
117
5
    if (err != TxnErrorCode::TXN_OK) {
118
0
        return -1;
119
0
    }
120
9
    for (auto k : keys) {
121
9
        txn->remove(k);
122
9
    }
123
5
    switch (txn->commit()) {
124
5
    case TxnErrorCode::TXN_OK:
125
5
        return 0;
126
0
    case TxnErrorCode::TXN_CONFLICT:
127
0
        return -1;
128
0
    default:
129
0
        return -1;
130
5
    }
131
5
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
114
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
115
1
    std::unique_ptr<Transaction> txn;
116
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
117
1
    if (err != TxnErrorCode::TXN_OK) {
118
0
        return -1;
119
0
    }
120
1
    for (auto k : keys) {
121
1
        txn->remove(k);
122
1
    }
123
1
    switch (txn->commit()) {
124
1
    case TxnErrorCode::TXN_OK:
125
1
        return 0;
126
0
    case TxnErrorCode::TXN_CONFLICT:
127
0
        return -1;
128
0
    default:
129
0
        return -1;
130
1
    }
131
1
}
132
133
// return 0 for success otherwise error
134
54
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
135
54
    std::unique_ptr<Transaction> txn;
136
54
    TxnErrorCode err = txn_kv->create_txn(&txn);
137
54
    if (err != TxnErrorCode::TXN_OK) {
138
0
        return -1;
139
0
    }
140
109k
    for (auto& k : keys) {
141
109k
        txn->remove(k);
142
109k
    }
143
54
    switch (txn->commit()) {
144
54
    case TxnErrorCode::TXN_OK:
145
54
        return 0;
146
0
    case TxnErrorCode::TXN_CONFLICT:
147
0
        return -1;
148
0
    default:
149
0
        return -1;
150
54
    }
151
54
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
134
48
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
135
48
    std::unique_ptr<Transaction> txn;
136
48
    TxnErrorCode err = txn_kv->create_txn(&txn);
137
48
    if (err != TxnErrorCode::TXN_OK) {
138
0
        return -1;
139
0
    }
140
105k
    for (auto& k : keys) {
141
105k
        txn->remove(k);
142
105k
    }
143
48
    switch (txn->commit()) {
144
48
    case TxnErrorCode::TXN_OK:
145
48
        return 0;
146
0
    case TxnErrorCode::TXN_CONFLICT:
147
0
        return -1;
148
0
    default:
149
0
        return -1;
150
48
    }
151
48
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
134
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
135
6
    std::unique_ptr<Transaction> txn;
136
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
137
6
    if (err != TxnErrorCode::TXN_OK) {
138
0
        return -1;
139
0
    }
140
4.00k
    for (auto& k : keys) {
141
4.00k
        txn->remove(k);
142
4.00k
    }
143
6
    switch (txn->commit()) {
144
6
    case TxnErrorCode::TXN_OK:
145
6
        return 0;
146
0
    case TxnErrorCode::TXN_CONFLICT:
147
0
        return -1;
148
0
    default:
149
0
        return -1;
150
6
    }
151
6
}
152
153
// return 0 for success otherwise error
154
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
155
106k
                                       std::string_view end) {
156
106k
    std::unique_ptr<Transaction> txn;
157
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
158
106k
    if (err != TxnErrorCode::TXN_OK) {
159
0
        return -1;
160
0
    }
161
106k
    txn->remove(begin, end);
162
106k
    switch (txn->commit()) {
163
106k
    case TxnErrorCode::TXN_OK:
164
106k
        return 0;
165
0
    case TxnErrorCode::TXN_CONFLICT:
166
0
        return -1;
167
0
    default:
168
0
        return -1;
169
106k
    }
170
106k
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
155
106k
                                       std::string_view end) {
156
106k
    std::unique_ptr<Transaction> txn;
157
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
158
106k
    if (err != TxnErrorCode::TXN_OK) {
159
0
        return -1;
160
0
    }
161
106k
    txn->remove(begin, end);
162
106k
    switch (txn->commit()) {
163
106k
    case TxnErrorCode::TXN_OK:
164
106k
        return 0;
165
0
    case TxnErrorCode::TXN_CONFLICT:
166
0
        return -1;
167
0
    default:
168
0
        return -1;
169
106k
    }
170
106k
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
171
172
void scan_restore_job_rowset(
173
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
174
        std::string& msg,
175
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
176
177
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
178
                                      int64_t num_scanned, int64_t num_recycled,
179
52
                                      int64_t start_time) {
180
52
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
181
0
        int64_t cost =
182
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
183
0
        if (cost > config::recycle_task_threshold_seconds) {
184
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
185
0
                    .tag("instance_id", instance_id)
186
0
                    .tag("task", task_name)
187
0
                    .tag("num_scanned", num_scanned)
188
0
                    .tag("num_recycled", num_recycled);
189
0
        }
190
0
    }
191
52
    return;
192
52
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
179
50
                                      int64_t start_time) {
180
50
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
181
0
        int64_t cost =
182
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
183
0
        if (cost > config::recycle_task_threshold_seconds) {
184
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
185
0
                    .tag("instance_id", instance_id)
186
0
                    .tag("task", task_name)
187
0
                    .tag("num_scanned", num_scanned)
188
0
                    .tag("num_recycled", num_recycled);
189
0
        }
190
0
    }
191
50
    return;
192
50
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
179
2
                                      int64_t start_time) {
180
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
181
0
        int64_t cost =
182
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
183
0
        if (cost > config::recycle_task_threshold_seconds) {
184
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
185
0
                    .tag("instance_id", instance_id)
186
0
                    .tag("task", task_name)
187
0
                    .tag("num_scanned", num_scanned)
188
0
                    .tag("num_recycled", num_recycled);
189
0
        }
190
0
    }
191
2
    return;
192
2
}
193
194
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
195
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
196
197
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
198
4
                                                               "s3_producer_pool");
199
4
    s3_producer_pool->start();
200
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
201
4
                                                                  "recycle_tablet_pool");
202
4
    recycle_tablet_pool->start();
203
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
204
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
205
4
    group_recycle_function_pool->start();
206
4
    _thread_pool_group =
207
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
208
4
                                    std::move(group_recycle_function_pool));
209
210
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
211
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
212
4
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
213
4
}
214
215
4
Recycler::~Recycler() {
216
4
    if (!stopped()) {
217
0
        stop();
218
0
    }
219
4
}
220
221
4
void Recycler::instance_scanner_callback() {
222
    // sleep 60 seconds before scheduling for the launch procedure to complete:
223
    // some bad hdfs connection may cause some log to stdout stderr
224
    // which may pollute .out file and affect the script to check success
225
4
    std::this_thread::sleep_for(
226
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
227
8
    while (!stopped()) {
228
4
        std::vector<InstanceInfoPB> instances;
229
4
        get_all_instances(txn_kv_.get(), instances);
230
        // TODO(plat1ko): delete job recycle kv of non-existent instances
231
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
232
4
            std::stringstream ss;
233
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
234
4
            return ss.str();
235
4
        }();
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
231
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
232
4
            std::stringstream ss;
233
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
234
4
            return ss.str();
235
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
236
4
        if (!instances.empty()) {
237
            // enqueue instances
238
3
            std::lock_guard lock(mtx_);
239
30
            for (auto& instance : instances) {
240
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
241
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
242
                // skip instance already in pending queue
243
30
                if (success) {
244
30
                    pending_instance_queue_.push_back(std::move(instance));
245
30
                }
246
30
            }
247
3
            pending_instance_cond_.notify_all();
248
3
        }
249
4
        {
250
4
            std::unique_lock lock(mtx_);
251
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
252
7
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
252
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
253
4
        }
254
4
    }
255
4
}
256
257
8
void Recycler::recycle_callback() {
258
38
    while (!stopped()) {
259
36
        InstanceInfoPB instance;
260
36
        {
261
36
            std::unique_lock lock(mtx_);
262
36
            pending_instance_cond_.wait(
263
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
263
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
264
36
            if (stopped()) {
265
6
                return;
266
6
            }
267
30
            instance = std::move(pending_instance_queue_.front());
268
30
            pending_instance_queue_.pop_front();
269
30
            pending_instance_set_.erase(instance.instance_id());
270
30
        }
271
0
        auto& instance_id = instance.instance_id();
272
30
        {
273
30
            std::lock_guard lock(mtx_);
274
            // skip instance in recycling
275
30
            if (recycling_instance_map_.count(instance_id)) continue;
276
30
        }
277
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
278
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
279
280
30
        if (int r = instance_recycler->init(); r != 0) {
281
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
282
0
                         << " ret=" << r;
283
0
            continue;
284
0
        }
285
30
        std::string recycle_job_key;
286
30
        job_recycle_key({instance_id}, &recycle_job_key);
287
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
288
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
289
30
        if (ret != 0) { // Prepare failed
290
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
291
20
                         << " ret=" << ret;
292
20
            continue;
293
20
        } else {
294
10
            std::lock_guard lock(mtx_);
295
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
296
10
        }
297
10
        if (stopped()) return;
298
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
299
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
300
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
301
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
302
10
        ret = instance_recycler->do_recycle();
303
        // If instance recycler has been aborted, don't finish this job
304
305
10
        if (!instance_recycler->stopped()) {
306
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
307
10
                                        ret == 0, ctime_ms);
308
10
        }
309
10
        if (instance_recycler->stopped() || ret != 0) {
310
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
311
0
        }
312
10
        {
313
10
            std::lock_guard lock(mtx_);
314
10
            recycling_instance_map_.erase(instance_id);
315
10
        }
316
317
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
318
10
        auto elpased_ms = now - ctime_ms;
319
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
320
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
321
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
322
10
                                             now + config::recycle_interval_seconds * 1000);
323
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
324
10
        LOG(INFO) << "recycle instance done, "
325
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
326
10
                  << " now: " << now;
327
328
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
329
330
10
        LOG_WARNING("finish recycle instance")
331
10
                .tag("instance_id", instance_id)
332
10
                .tag("cost_ms", elpased_ms);
333
10
    }
334
8
}
335
336
4
void Recycler::lease_recycle_jobs() {
337
54
    while (!stopped()) {
338
50
        std::vector<std::string> instances;
339
50
        instances.reserve(recycling_instance_map_.size());
340
50
        {
341
50
            std::lock_guard lock(mtx_);
342
50
            for (auto& [id, _] : recycling_instance_map_) {
343
30
                instances.push_back(id);
344
30
            }
345
50
        }
346
50
        for (auto& i : instances) {
347
30
            std::string recycle_job_key;
348
30
            job_recycle_key({i}, &recycle_job_key);
349
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
350
30
            if (ret == 1) {
351
0
                std::lock_guard lock(mtx_);
352
0
                if (auto it = recycling_instance_map_.find(i);
353
0
                    it != recycling_instance_map_.end()) {
354
0
                    it->second->stop();
355
0
                }
356
0
            }
357
30
        }
358
50
        {
359
50
            std::unique_lock lock(mtx_);
360
50
            notifier_.wait_for(lock,
361
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
362
100
                               [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
362
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
363
50
        }
364
50
    }
365
4
}
366
367
4
void Recycler::check_recycle_tasks() {
368
7
    while (!stopped()) {
369
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
370
3
        {
371
3
            std::lock_guard lock(mtx_);
372
3
            recycling_instance_map = recycling_instance_map_;
373
3
        }
374
3
        for (auto& entry : recycling_instance_map) {
375
0
            entry.second->check_recycle_tasks();
376
0
        }
377
378
3
        std::unique_lock lock(mtx_);
379
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
380
6
                           [&]() { return stopped(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
380
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
381
3
    }
382
4
}
383
384
4
int Recycler::start(brpc::Server* server) {
385
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
386
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
387
4
    S3Environment::getInstance();
388
389
4
    if (config::enable_checker) {
390
0
        checker_ = std::make_unique<Checker>(txn_kv_);
391
0
        int ret = checker_->start();
392
0
        std::string msg;
393
0
        if (ret != 0) {
394
0
            msg = "failed to start checker";
395
0
            LOG(ERROR) << msg;
396
0
            std::cerr << msg << std::endl;
397
0
            return ret;
398
0
        }
399
0
        msg = "checker started";
400
0
        LOG(INFO) << msg;
401
0
        std::cout << msg << std::endl;
402
0
    }
403
404
4
    if (server) {
405
        // Add service
406
1
        auto recycler_service =
407
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
408
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
409
1
    }
410
411
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
411
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
412
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
413
8
        workers_.emplace_back([this] { recycle_callback(); });
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
413
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
414
8
    }
415
416
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
417
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
418
4
    return 0;
419
4
}
420
421
4
void Recycler::stop() {
422
4
    stopped_ = true;
423
4
    notifier_.notify_all();
424
4
    pending_instance_cond_.notify_all();
425
4
    {
426
4
        std::lock_guard lock(mtx_);
427
4
        for (auto& [_, recycler] : recycling_instance_map_) {
428
0
            recycler->stop();
429
0
        }
430
4
    }
431
20
    for (auto& w : workers_) {
432
20
        if (w.joinable()) w.join();
433
20
    }
434
4
    if (checker_) {
435
0
        checker_->stop();
436
0
    }
437
4
}
438
439
class InstanceRecycler::InvertedIndexIdCache {
440
public:
441
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
442
101
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
443
444
    // Return 0 if success, 1 if schema kv not found, negative for error
445
    // For the same index_id, schema_version, res, since `get` is not completely atomic
446
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
447
    // resulting in repeated addition and inaccuracy.
448
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
449
    // repeated addition does not affect correctness.
450
27.5k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
451
27.5k
        {
452
27.5k
            std::lock_guard lock(mtx_);
453
27.5k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
454
4.45k
                return 0;
455
4.45k
            }
456
23.1k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
457
23.1k
                it != inverted_index_id_map_.end()) {
458
17.5k
                res = it->second;
459
17.5k
                return 0;
460
17.5k
            }
461
23.1k
        }
462
        // Get schema from kv
463
        // TODO(plat1ko): Single flight
464
5.53k
        std::unique_ptr<Transaction> txn;
465
5.53k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
466
5.53k
        if (err != TxnErrorCode::TXN_OK) {
467
0
            LOG(WARNING) << "failed to create txn, err=" << err;
468
0
            return -1;
469
0
        }
470
5.53k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
471
5.53k
        ValueBuf val_buf;
472
5.53k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
473
5.53k
        if (err != TxnErrorCode::TXN_OK) {
474
500
            LOG(WARNING) << "failed to get schema, err=" << err;
475
500
            return static_cast<int>(err);
476
500
        }
477
5.03k
        doris::TabletSchemaCloudPB schema;
478
5.03k
        if (!parse_schema_value(val_buf, &schema)) {
479
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
480
0
            return -1;
481
0
        }
482
5.03k
        if (schema.index_size() > 0) {
483
4.01k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
484
4.01k
            if (schema.has_inverted_index_storage_format()) {
485
4.00k
                index_format = schema.inverted_index_storage_format();
486
4.00k
            }
487
4.01k
            res.first = index_format;
488
4.01k
            res.second.reserve(schema.index_size());
489
10.0k
            for (auto& i : schema.index()) {
490
10.0k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
491
10.0k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
492
10.0k
                }
493
10.0k
            }
494
4.01k
        }
495
5.03k
        insert(index_id, schema_version, res);
496
5.03k
        return 0;
497
5.03k
    }
498
499
    // Empty `ids` means this schema has no inverted index
500
5.03k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
501
5.03k
        if (index_info.second.empty()) {
502
1.02k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
503
1.02k
            std::lock_guard lock(mtx_);
504
1.02k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
505
4.01k
        } else {
506
4.01k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
507
4.01k
            std::lock_guard lock(mtx_);
508
4.01k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
509
4.01k
        }
510
5.03k
    }
511
512
private:
513
    std::string instance_id_;
514
    std::shared_ptr<TxnKv> txn_kv_;
515
516
    std::mutex mtx_;
517
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
518
    struct HashOfKey {
519
55.7k
        size_t operator()(const Key& key) const {
520
55.7k
            size_t seed = 0;
521
55.7k
            seed = std::hash<int64_t> {}(key.first);
522
55.7k
            seed = std::hash<int32_t> {}(key.second);
523
55.7k
            return seed;
524
55.7k
        }
525
    };
526
    // <index_id, schema_version> -> inverted_index_ids
527
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
528
    // Store <index_id, schema_version> of schema which doesn't have inverted index
529
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
530
};
531
532
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
533
                                   RecyclerThreadPoolGroup thread_pool_group,
534
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
535
        : txn_kv_(std::move(txn_kv)),
536
          instance_id_(instance.instance_id()),
537
          instance_info_(instance),
538
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
539
          _thread_pool_group(std::move(thread_pool_group)),
540
101
          txn_lazy_committer_(std::move(txn_lazy_committer)) {
541
101
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
542
543
    // Since the recycler's resource manager could not be notified when instance info changes,
544
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
545
101
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
546
101
};
547
548
101
InstanceRecycler::~InstanceRecycler() = default;
549
550
89
int InstanceRecycler::init_obj_store_accessors() {
551
89
    for (const auto& obj_info : instance_info_.obj_info()) {
552
58
#ifdef UNIT_TEST
553
58
        auto accessor = std::make_shared<MockAccessor>();
554
#else
555
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
556
        if (!s3_conf) {
557
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
558
            return -1;
559
        }
560
561
        std::shared_ptr<S3Accessor> accessor;
562
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
563
        if (ret != 0) {
564
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
565
                         << " resource_id=" << obj_info.id();
566
            return ret;
567
        }
568
#endif
569
58
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
570
58
    }
571
572
89
    return 0;
573
89
}
574
575
89
int InstanceRecycler::init_storage_vault_accessors() {
576
89
    if (instance_info_.resource_ids().empty()) {
577
82
        return 0;
578
82
    }
579
580
7
    FullRangeGetOptions opts(txn_kv_);
581
7
    opts.prefetch = true;
582
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
583
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
584
585
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
586
18
        auto [k, v] = *kv;
587
18
        StorageVaultPB vault;
588
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
589
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
590
0
            return -1;
591
0
        }
592
18
        std::string recycler_storage_vault_white_list = accumulate(
593
18
                config::recycler_storage_vault_white_list.begin(),
594
18
                config::recycler_storage_vault_white_list.end(), std::string(),
595
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
595
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
596
18
        LOG_INFO("config::recycler_storage_vault_white_list")
597
18
                .tag("", recycler_storage_vault_white_list);
598
18
        if (!config::recycler_storage_vault_white_list.empty()) {
599
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
600
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
601
8
                it == config::recycler_storage_vault_white_list.end()) {
602
2
                LOG_WARNING(
603
2
                        "failed to init accessor for vault because this vault is not in "
604
2
                        "config::recycler_storage_vault_white_list. ")
605
2
                        .tag(" vault name:", vault.name())
606
2
                        .tag(" config::recycler_storage_vault_white_list:",
607
2
                             recycler_storage_vault_white_list);
608
2
                continue;
609
2
            }
610
8
        }
611
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
612
16
                                 &accessor_map_, &vault);
613
16
        if (vault.has_hdfs_info()) {
614
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
615
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
616
9
            int ret = accessor->init();
617
9
            if (ret != 0) {
618
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
619
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
620
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
621
4
                continue;
622
4
            }
623
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
624
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
625
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
626
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
627
#else
628
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
629
                       << "but HDFS storage vaults were detected";
630
#endif
631
7
        } else if (vault.has_obj_info()) {
632
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
633
7
            if (!s3_conf) {
634
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
635
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
636
1
                continue;
637
1
            }
638
639
6
            std::shared_ptr<S3Accessor> accessor;
640
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
641
6
            if (ret != 0) {
642
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
643
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
644
0
                             << " ret=" << ret
645
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
646
0
                continue;
647
0
            }
648
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
649
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
650
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
651
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
652
6
        }
653
16
    }
654
655
7
    if (!it->is_valid()) {
656
0
        LOG_WARNING("failed to get storage vault kv");
657
0
        return -1;
658
0
    }
659
660
7
    if (accessor_map_.empty()) {
661
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
662
1
        return -2;
663
1
    }
664
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
665
6
             instance_id_);
666
667
6
    return 0;
668
7
}
669
670
89
int InstanceRecycler::init() {
671
89
    int ret = init_obj_store_accessors();
672
89
    if (ret != 0) {
673
0
        return ret;
674
0
    }
675
676
89
    return init_storage_vault_accessors();
677
89
}
678
679
template <typename... Func>
680
110
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
110
    return [funcs...]() {
682
110
        return [](std::initializer_list<int> ret_vals) {
683
110
            int i = 0;
684
130
            for (int ret : ret_vals) {
685
130
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
130
            }
689
110
            return i;
690
110
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
20
            for (int ret : ret_vals) {
685
20
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
20
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
20
            for (int ret : ret_vals) {
685
20
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
20
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
0
                    i = ret;
687
0
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
691
110
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
692
110
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
680
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
681
10
    return [funcs...]() {
682
10
        return [](std::initializer_list<int> ret_vals) {
683
10
            int i = 0;
684
10
            for (int ret : ret_vals) {
685
10
                if (ret != 0) {
686
10
                    i = ret;
687
10
                }
688
10
            }
689
10
            return i;
690
10
        }({funcs()...});
691
10
    };
692
10
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8ZNS2_10do_recycleEvE3$_9EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
693
694
10
int InstanceRecycler::do_recycle() {
695
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
696
10
    tablet_metrics_context_.reset();
697
10
    segment_metrics_context_.reset();
698
10
    DORIS_CLOUD_DEFER {
699
10
        tablet_metrics_context_.finish_report();
700
10
        segment_metrics_context_.finish_report();
701
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
698
10
    DORIS_CLOUD_DEFER {
699
10
        tablet_metrics_context_.finish_report();
700
10
        segment_metrics_context_.finish_report();
701
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
702
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
703
0
        int res = recycle_cluster_snapshots();
704
0
        if (res != 0) {
705
0
            return -1;
706
0
        }
707
0
        return recycle_deleted_instance();
708
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
709
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
710
10
                                        fmt::format("instance id {}", instance_id_),
711
109
                                        [](int r) { return r != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
711
109
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
712
10
        sync_executor
713
10
                .add(task_wrapper(
714
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
714
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
715
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
715
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
716
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
717
                                   // becase they may both recycle the same set of tablets
718
                        // recycle dropped table or idexes(mv, rollup)
719
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
719
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
720
                        // recycle dropped partitions
721
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
721
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
722
10
                .add(task_wrapper(
723
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
723
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
724
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
724
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
725
10
                .add(task_wrapper(
726
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
726
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
727
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
727
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
728
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
728
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
729
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
729
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
730
10
                .add(task_wrapper(
731
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
731
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
732
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
732
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
733
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
733
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
734
10
        bool finished = true;
735
10
        std::vector<int> rets = sync_executor.when_all(&finished);
736
110
        for (int ret : rets) {
737
110
            if (ret != 0) {
738
0
                return ret;
739
0
            }
740
110
        }
741
10
        return finished ? 0 : -1;
742
10
    } else {
743
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
744
0
                     << " instance_id=" << instance_id_;
745
0
        return -1;
746
0
    }
747
10
}
748
749
/**
750
* 1. delete all remote data
751
* 2. delete all kv
752
* 3. remove instance kv
753
*/
754
4
int InstanceRecycler::recycle_deleted_instance() {
755
4
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
756
757
4
    int ret = 0;
758
4
    auto start_time = steady_clock::now();
759
760
4
    DORIS_CLOUD_DEFER {
761
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
762
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
763
4
                     << " recycle deleted instance, cost=" << cost
764
4
                     << "s, instance_id=" << instance_id_;
765
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
760
4
    DORIS_CLOUD_DEFER {
761
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
762
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
763
4
                     << " recycle deleted instance, cost=" << cost
764
4
                     << "s, instance_id=" << instance_id_;
765
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
766
767
4
    bool has_snapshots = false;
768
4
    if (has_cluster_snapshots(&has_snapshots) != 0) {
769
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
770
0
        return -1;
771
4
    } else if (has_snapshots) {
772
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
773
1
        return 0;
774
1
    }
775
776
    // delete all remote data
777
3
    for (auto& [_, accessor] : accessor_map_) {
778
3
        if (stopped()) {
779
0
            return ret;
780
0
        }
781
782
3
        LOG(INFO) << "begin to delete all objects in " << accessor->uri();
783
3
        int del_ret = accessor->delete_all();
784
3
        if (del_ret == 0) {
785
3
            LOG(INFO) << "successfully delete all objects in " << accessor->uri();
786
3
        } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
787
            // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
788
            // so the recycling has been successful.
789
0
            ret = -1;
790
0
        }
791
3
    }
792
793
3
    if (ret != 0) {
794
0
        LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
795
0
        return ret;
796
0
    }
797
798
    // delete all kv
799
3
    std::unique_ptr<Transaction> txn;
800
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
801
3
    if (err != TxnErrorCode::TXN_OK) {
802
0
        LOG(WARNING) << "failed to create txn";
803
0
        ret = -1;
804
0
        return -1;
805
0
    }
806
3
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
807
    // delete kv before deleting objects to prevent the checker from misjudging data loss
808
3
    std::string start_txn_key = txn_key_prefix(instance_id_);
809
3
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
810
3
    txn->remove(start_txn_key, end_txn_key);
811
3
    std::string start_version_key = version_key_prefix(instance_id_);
812
3
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
813
3
    txn->remove(start_version_key, end_version_key);
814
3
    std::string start_meta_key = meta_key_prefix(instance_id_);
815
3
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
816
3
    txn->remove(start_meta_key, end_meta_key);
817
3
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
818
3
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
819
3
    txn->remove(start_recycle_key, end_recycle_key);
820
3
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
821
3
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
822
3
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
823
3
    std::string start_copy_key = copy_key_prefix(instance_id_);
824
3
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
825
3
    txn->remove(start_copy_key, end_copy_key);
826
    // should not remove job key range, because we need to reserve job recycle kv
827
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
828
3
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
829
3
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
830
3
    txn->remove(start_job_tablet_key, end_job_tablet_key);
831
3
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
832
3
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
833
3
    std::string start_vault_key = storage_vault_key(key_info0);
834
3
    std::string end_vault_key = storage_vault_key(key_info1);
835
3
    txn->remove(start_vault_key, end_vault_key);
836
3
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, 0, ""});
837
3
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, INT64_MAX, ""});
838
3
    txn->remove(dbm_start_key, dbm_end_key);
839
3
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
840
3
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
841
3
    txn->remove(versioned_version_key_start, versioned_version_key_end);
842
3
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
843
3
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
844
3
    txn->remove(versioned_index_key_start, versioned_index_key_end);
845
3
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
846
3
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
847
3
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
848
3
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
849
3
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
850
3
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
851
3
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
852
3
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
853
3
    txn->remove(versioned_data_key_start, versioned_data_key_end);
854
3
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
855
3
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
856
3
    txn->remove(versioned_log_key_start, versioned_log_key_end);
857
3
    err = txn->commit();
858
3
    if (err != TxnErrorCode::TXN_OK) {
859
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
860
0
        ret = -1;
861
0
    }
862
863
3
    if (ret == 0) {
864
        // remove instance kv
865
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
866
3
        err = txn_kv_->create_txn(&txn);
867
3
        if (err != TxnErrorCode::TXN_OK) {
868
0
            LOG(WARNING) << "failed to create txn";
869
0
            ret = -1;
870
0
            return ret;
871
0
        }
872
3
        std::string key;
873
3
        instance_key({instance_id_}, &key);
874
3
        txn->remove(key);
875
3
        err = txn->commit();
876
3
        if (err != TxnErrorCode::TXN_OK) {
877
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
878
0
                         << " err=" << err;
879
0
            ret = -1;
880
0
        }
881
3
    }
882
3
    return ret;
883
3
}
884
885
bool is_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id,
886
57.0k
                     int64_t txn_id) {
887
57.0k
    std::unique_ptr<Transaction> txn;
888
57.0k
    TxnErrorCode err = txn_kv->create_txn(&txn);
889
57.0k
    if (err != TxnErrorCode::TXN_OK) {
890
0
        LOG(WARNING) << "failed to create txn, txn_id=" << txn_id << " instance_id=" << instance_id;
891
0
        return false;
892
0
    }
893
894
57.0k
    std::string index_val;
895
57.0k
    const std::string index_key = txn_index_key({instance_id, txn_id});
896
57.0k
    err = txn->get(index_key, &index_val);
897
57.0k
    if (err != TxnErrorCode::TXN_OK) {
898
53.0k
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
899
53.0k
            TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_recycled");
900
            // txn has been recycled;
901
53.0k
            LOG(INFO) << "txn index key has been recycled, txn_id=" << txn_id
902
53.0k
                      << " instance_id=" << instance_id;
903
53.0k
            return true;
904
53.0k
        }
905
0
        LOG(WARNING) << "failed to get txn index key, txn_id=" << txn_id
906
0
                     << " instance_id=" << instance_id << " key=" << hex(index_key)
907
0
                     << " err=" << err;
908
0
        return false;
909
53.0k
    }
910
911
4.00k
    TxnIndexPB index_pb;
912
4.00k
    if (!index_pb.ParseFromString(index_val)) {
913
0
        LOG(WARNING) << "failed to parse txn_index_pb, txn_id=" << txn_id
914
0
                     << " instance_id=" << instance_id;
915
0
        return false;
916
0
    }
917
918
4.00k
    DCHECK(index_pb.has_tablet_index() == true);
919
4.00k
    if (!index_pb.tablet_index().has_db_id()) {
920
        // In the previous version, the db_id was not set in the index_pb.
921
        // If updating to the version which enable txn lazy commit, the db_id will be set.
922
0
        LOG(INFO) << "txn index has no db_id, txn_id=" << txn_id << " instance_id=" << instance_id
923
0
                  << " index=" << index_pb.ShortDebugString();
924
0
        return true;
925
0
    }
926
927
4.00k
    int64_t db_id = index_pb.tablet_index().db_id();
928
4.00k
    DCHECK_GT(db_id, 0) << "db_id=" << db_id << " txn_id=" << txn_id
929
0
                        << " instance_id=" << instance_id;
930
931
4.00k
    std::string info_val;
932
4.00k
    const std::string info_key = txn_info_key({instance_id, db_id, txn_id});
933
4.00k
    err = txn->get(info_key, &info_val);
934
4.00k
    if (err != TxnErrorCode::TXN_OK) {
935
0
        if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
936
            // txn info has been recycled;
937
0
            LOG(INFO) << "txn info key has been recycled, db_id=" << db_id << " txn_id=" << txn_id
938
0
                      << " instance_id=" << instance_id;
939
0
            return true;
940
0
        }
941
942
0
        DCHECK(err != TxnErrorCode::TXN_KEY_NOT_FOUND);
943
0
        LOG(WARNING) << "failed to get txn info key, txn_id=" << txn_id
944
0
                     << " instance_id=" << instance_id << " key=" << hex(info_key)
945
0
                     << " err=" << err;
946
0
        return false;
947
0
    }
948
949
4.00k
    TxnInfoPB txn_info;
950
4.00k
    if (!txn_info.ParseFromString(info_val)) {
951
0
        LOG(WARNING) << "failed to parse txn_info, txn_id=" << txn_id
952
0
                     << " instance_id=" << instance_id;
953
0
        return false;
954
0
    }
955
956
4.00k
    DCHECK(txn_info.txn_id() == txn_id) << "txn_id=" << txn_id << " instance_id=" << instance_id
957
0
                                        << " txn_info=" << txn_info.ShortDebugString();
958
959
4.00k
    if (TxnStatusPB::TXN_STATUS_ABORTED == txn_info.status() ||
960
4.00k
        TxnStatusPB::TXN_STATUS_VISIBLE == txn_info.status()) {
961
2.00k
        TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_has_been_aborted", &txn_info);
962
2.00k
        return true;
963
2.00k
    }
964
965
2.00k
    TEST_SYNC_POINT_CALLBACK("is_txn_finished::txn_not_finished", &txn_info);
966
2.00k
    return false;
967
4.00k
}
968
969
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
970
4.01k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
971
4.01k
    if (config::force_immediate_recycle) {
972
8
        return 0L;
973
8
    }
974
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
975
4.00k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
976
4.00k
    int64_t retention_seconds = config::retention_seconds;
977
4.00k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
978
3.10k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
979
3.10k
    }
980
4.00k
    int64_t final_expiration = expiration + retention_seconds;
981
4.00k
    if (*earlest_ts > final_expiration) {
982
3
        *earlest_ts = final_expiration;
983
3
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
984
3
    }
985
4.00k
    return final_expiration;
986
4.01k
}
987
988
int64_t calculate_partition_expired_time(
989
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
990
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
991
9
    if (config::force_immediate_recycle) {
992
3
        return 0L;
993
3
    }
994
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
995
6
                                                            : partition_meta_pb.creation_time();
996
6
    int64_t retention_seconds = config::retention_seconds;
997
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
998
6
        retention_seconds =
999
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1000
6
    }
1001
6
    int64_t final_expiration = expiration + retention_seconds;
1002
6
    if (*earlest_ts > final_expiration) {
1003
2
        *earlest_ts = final_expiration;
1004
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1005
2
    }
1006
6
    return final_expiration;
1007
9
}
1008
1009
int64_t calculate_index_expired_time(const std::string& instance_id_,
1010
                                     const RecycleIndexPB& index_meta_pb,
1011
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1012
10
    if (config::force_immediate_recycle) {
1013
4
        return 0L;
1014
4
    }
1015
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1016
6
                                                        : index_meta_pb.creation_time();
1017
6
    int64_t retention_seconds = config::retention_seconds;
1018
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1019
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1020
6
    }
1021
6
    int64_t final_expiration = expiration + retention_seconds;
1022
6
    if (*earlest_ts > final_expiration) {
1023
2
        *earlest_ts = final_expiration;
1024
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1025
2
    }
1026
6
    return final_expiration;
1027
10
}
1028
1029
int64_t calculate_tmp_rowset_expired_time(
1030
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1031
57.0k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1032
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1033
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1034
    //  duration or timeout always < `retention_time` in practice.
1035
57.0k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1036
57.0k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1037
57.0k
                                 : tmp_rowset_meta_pb.creation_time();
1038
57.0k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1039
57.0k
    int64_t final_expiration = expiration + config::retention_seconds;
1040
57.0k
    if (*earlest_ts > final_expiration) {
1041
6
        *earlest_ts = final_expiration;
1042
6
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1043
6
    }
1044
57.0k
    return final_expiration;
1045
57.0k
}
1046
1047
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1048
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1049
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1050
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1051
8
        *earlest_ts = final_expiration / 1000;
1052
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1053
8
    }
1054
30.0k
    return final_expiration;
1055
30.0k
}
1056
1057
int64_t calculate_restore_job_expired_time(
1058
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1059
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1060
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1061
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1062
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1063
        // final state, recycle immediately
1064
41
        return 0L;
1065
41
    }
1066
    // not final state, wait much longer than the FE's timeout(1 day)
1067
0
    int64_t last_modified_s =
1068
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1069
0
    int64_t expiration = restore_job.expired_at_s() > 0
1070
0
                                 ? last_modified_s + restore_job.expired_at_s()
1071
0
                                 : last_modified_s;
1072
0
    int64_t final_expiration = expiration + config::retention_seconds;
1073
0
    if (*earlest_ts > final_expiration) {
1074
0
        *earlest_ts = final_expiration;
1075
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1076
0
    }
1077
0
    return final_expiration;
1078
41
}
1079
1080
17
int InstanceRecycler::recycle_indexes() {
1081
17
    const std::string task_name = "recycle_indexes";
1082
17
    int64_t num_scanned = 0;
1083
17
    int64_t num_expired = 0;
1084
17
    int64_t num_recycled = 0;
1085
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1086
1087
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
1088
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
1089
17
    std::string index_key0;
1090
17
    std::string index_key1;
1091
17
    recycle_index_key(index_key_info0, &index_key0);
1092
17
    recycle_index_key(index_key_info1, &index_key1);
1093
1094
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
1095
1096
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1097
17
    register_recycle_task(task_name, start_time);
1098
1099
17
    DORIS_CLOUD_DEFER {
1100
17
        unregister_recycle_task(task_name);
1101
17
        int64_t cost =
1102
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1103
17
        metrics_context.finish_report();
1104
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1105
17
                .tag("instance_id", instance_id_)
1106
17
                .tag("num_scanned", num_scanned)
1107
17
                .tag("num_expired", num_expired)
1108
17
                .tag("num_recycled", num_recycled);
1109
17
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1099
15
    DORIS_CLOUD_DEFER {
1100
15
        unregister_recycle_task(task_name);
1101
15
        int64_t cost =
1102
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1103
15
        metrics_context.finish_report();
1104
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1105
15
                .tag("instance_id", instance_id_)
1106
15
                .tag("num_scanned", num_scanned)
1107
15
                .tag("num_expired", num_expired)
1108
15
                .tag("num_recycled", num_recycled);
1109
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
1099
2
    DORIS_CLOUD_DEFER {
1100
2
        unregister_recycle_task(task_name);
1101
2
        int64_t cost =
1102
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1103
2
        metrics_context.finish_report();
1104
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
1105
2
                .tag("instance_id", instance_id_)
1106
2
                .tag("num_scanned", num_scanned)
1107
2
                .tag("num_expired", num_expired)
1108
2
                .tag("num_recycled", num_recycled);
1109
2
    };
1110
1111
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1112
1113
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
1114
17
    std::vector<std::string_view> index_keys;
1115
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1116
10
        ++num_scanned;
1117
10
        RecycleIndexPB index_pb;
1118
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1119
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1120
0
            return -1;
1121
0
        }
1122
10
        int64_t current_time = ::time(nullptr);
1123
10
        if (current_time <
1124
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1125
0
            return 0;
1126
0
        }
1127
10
        ++num_expired;
1128
        // decode index_id
1129
10
        auto k1 = k;
1130
10
        k1.remove_prefix(1);
1131
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1132
10
        decode_key(&k1, &out);
1133
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1134
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1135
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1136
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1137
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1138
        // Change state to RECYCLING
1139
10
        std::unique_ptr<Transaction> txn;
1140
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1141
10
        if (err != TxnErrorCode::TXN_OK) {
1142
0
            LOG_WARNING("failed to create txn").tag("err", err);
1143
0
            return -1;
1144
0
        }
1145
10
        std::string val;
1146
10
        err = txn->get(k, &val);
1147
10
        if (err ==
1148
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1149
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1150
0
            return 0;
1151
0
        }
1152
10
        if (err != TxnErrorCode::TXN_OK) {
1153
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1154
0
            return -1;
1155
0
        }
1156
10
        index_pb.Clear();
1157
10
        if (!index_pb.ParseFromString(val)) {
1158
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1159
0
            return -1;
1160
0
        }
1161
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1162
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1163
9
            txn->put(k, index_pb.SerializeAsString());
1164
9
            err = txn->commit();
1165
9
            if (err != TxnErrorCode::TXN_OK) {
1166
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1167
0
                return -1;
1168
0
            }
1169
9
        }
1170
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1171
1
            LOG_WARNING("failed to recycle tablets under index")
1172
1
                    .tag("table_id", index_pb.table_id())
1173
1
                    .tag("instance_id", instance_id_)
1174
1
                    .tag("index_id", index_id);
1175
1
            return -1;
1176
1
        }
1177
1178
9
        if (index_pb.has_db_id()) {
1179
            // Recycle the versioned keys
1180
3
            std::unique_ptr<Transaction> txn;
1181
3
            err = txn_kv_->create_txn(&txn);
1182
3
            if (err != TxnErrorCode::TXN_OK) {
1183
0
                LOG_WARNING("failed to create txn").tag("err", err);
1184
0
                return -1;
1185
0
            }
1186
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1187
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1188
3
            std::string index_inverted_key = versioned::index_inverted_key(
1189
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1190
3
            versioned_remove_all(txn.get(), meta_key);
1191
3
            txn->remove(index_key);
1192
3
            txn->remove(index_inverted_key);
1193
3
            err = txn->commit();
1194
3
            if (err != TxnErrorCode::TXN_OK) {
1195
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1196
0
                return -1;
1197
0
            }
1198
3
        }
1199
1200
9
        metrics_context.total_recycled_num = ++num_recycled;
1201
9
        metrics_context.report();
1202
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1203
9
        index_keys.push_back(k);
1204
9
        return 0;
1205
9
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1115
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1116
8
        ++num_scanned;
1117
8
        RecycleIndexPB index_pb;
1118
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1119
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1120
0
            return -1;
1121
0
        }
1122
8
        int64_t current_time = ::time(nullptr);
1123
8
        if (current_time <
1124
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1125
0
            return 0;
1126
0
        }
1127
8
        ++num_expired;
1128
        // decode index_id
1129
8
        auto k1 = k;
1130
8
        k1.remove_prefix(1);
1131
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1132
8
        decode_key(&k1, &out);
1133
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1134
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1135
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1136
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1137
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1138
        // Change state to RECYCLING
1139
8
        std::unique_ptr<Transaction> txn;
1140
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1141
8
        if (err != TxnErrorCode::TXN_OK) {
1142
0
            LOG_WARNING("failed to create txn").tag("err", err);
1143
0
            return -1;
1144
0
        }
1145
8
        std::string val;
1146
8
        err = txn->get(k, &val);
1147
8
        if (err ==
1148
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1149
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1150
0
            return 0;
1151
0
        }
1152
8
        if (err != TxnErrorCode::TXN_OK) {
1153
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1154
0
            return -1;
1155
0
        }
1156
8
        index_pb.Clear();
1157
8
        if (!index_pb.ParseFromString(val)) {
1158
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1159
0
            return -1;
1160
0
        }
1161
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1162
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1163
8
            txn->put(k, index_pb.SerializeAsString());
1164
8
            err = txn->commit();
1165
8
            if (err != TxnErrorCode::TXN_OK) {
1166
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1167
0
                return -1;
1168
0
            }
1169
8
        }
1170
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1171
0
            LOG_WARNING("failed to recycle tablets under index")
1172
0
                    .tag("table_id", index_pb.table_id())
1173
0
                    .tag("instance_id", instance_id_)
1174
0
                    .tag("index_id", index_id);
1175
0
            return -1;
1176
0
        }
1177
1178
8
        if (index_pb.has_db_id()) {
1179
            // Recycle the versioned keys
1180
2
            std::unique_ptr<Transaction> txn;
1181
2
            err = txn_kv_->create_txn(&txn);
1182
2
            if (err != TxnErrorCode::TXN_OK) {
1183
0
                LOG_WARNING("failed to create txn").tag("err", err);
1184
0
                return -1;
1185
0
            }
1186
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1187
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1188
2
            std::string index_inverted_key = versioned::index_inverted_key(
1189
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1190
2
            versioned_remove_all(txn.get(), meta_key);
1191
2
            txn->remove(index_key);
1192
2
            txn->remove(index_inverted_key);
1193
2
            err = txn->commit();
1194
2
            if (err != TxnErrorCode::TXN_OK) {
1195
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1196
0
                return -1;
1197
0
            }
1198
2
        }
1199
1200
8
        metrics_context.total_recycled_num = ++num_recycled;
1201
8
        metrics_context.report();
1202
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1203
8
        index_keys.push_back(k);
1204
8
        return 0;
1205
8
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1115
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1116
2
        ++num_scanned;
1117
2
        RecycleIndexPB index_pb;
1118
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
1119
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1120
0
            return -1;
1121
0
        }
1122
2
        int64_t current_time = ::time(nullptr);
1123
2
        if (current_time <
1124
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
1125
0
            return 0;
1126
0
        }
1127
2
        ++num_expired;
1128
        // decode index_id
1129
2
        auto k1 = k;
1130
2
        k1.remove_prefix(1);
1131
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1132
2
        decode_key(&k1, &out);
1133
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
1134
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
1135
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
1136
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
1137
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
1138
        // Change state to RECYCLING
1139
2
        std::unique_ptr<Transaction> txn;
1140
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1141
2
        if (err != TxnErrorCode::TXN_OK) {
1142
0
            LOG_WARNING("failed to create txn").tag("err", err);
1143
0
            return -1;
1144
0
        }
1145
2
        std::string val;
1146
2
        err = txn->get(k, &val);
1147
2
        if (err ==
1148
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1149
0
            LOG_INFO("index {} has been recycled or committed", index_id);
1150
0
            return 0;
1151
0
        }
1152
2
        if (err != TxnErrorCode::TXN_OK) {
1153
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
1154
0
            return -1;
1155
0
        }
1156
2
        index_pb.Clear();
1157
2
        if (!index_pb.ParseFromString(val)) {
1158
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
1159
0
            return -1;
1160
0
        }
1161
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
1162
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
1163
1
            txn->put(k, index_pb.SerializeAsString());
1164
1
            err = txn->commit();
1165
1
            if (err != TxnErrorCode::TXN_OK) {
1166
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1167
0
                return -1;
1168
0
            }
1169
1
        }
1170
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
1171
1
            LOG_WARNING("failed to recycle tablets under index")
1172
1
                    .tag("table_id", index_pb.table_id())
1173
1
                    .tag("instance_id", instance_id_)
1174
1
                    .tag("index_id", index_id);
1175
1
            return -1;
1176
1
        }
1177
1178
1
        if (index_pb.has_db_id()) {
1179
            // Recycle the versioned keys
1180
1
            std::unique_ptr<Transaction> txn;
1181
1
            err = txn_kv_->create_txn(&txn);
1182
1
            if (err != TxnErrorCode::TXN_OK) {
1183
0
                LOG_WARNING("failed to create txn").tag("err", err);
1184
0
                return -1;
1185
0
            }
1186
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
1187
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
1188
1
            std::string index_inverted_key = versioned::index_inverted_key(
1189
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
1190
1
            versioned_remove_all(txn.get(), meta_key);
1191
1
            txn->remove(index_key);
1192
1
            txn->remove(index_inverted_key);
1193
1
            err = txn->commit();
1194
1
            if (err != TxnErrorCode::TXN_OK) {
1195
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1196
0
                return -1;
1197
0
            }
1198
1
        }
1199
1200
1
        metrics_context.total_recycled_num = ++num_recycled;
1201
1
        metrics_context.report();
1202
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1203
1
        index_keys.push_back(k);
1204
1
        return 0;
1205
1
    };
1206
1207
17
    auto loop_done = [&index_keys, this]() -> int {
1208
6
        if (index_keys.empty()) return 0;
1209
5
        DORIS_CLOUD_DEFER {
1210
5
            index_keys.clear();
1211
5
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1209
4
        DORIS_CLOUD_DEFER {
1210
4
            index_keys.clear();
1211
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1209
1
        DORIS_CLOUD_DEFER {
1210
1
            index_keys.clear();
1211
1
        };
1212
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1213
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1214
0
            return -1;
1215
0
        }
1216
5
        return 0;
1217
5
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1207
4
    auto loop_done = [&index_keys, this]() -> int {
1208
4
        if (index_keys.empty()) return 0;
1209
4
        DORIS_CLOUD_DEFER {
1210
4
            index_keys.clear();
1211
4
        };
1212
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1213
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1214
0
            return -1;
1215
0
        }
1216
4
        return 0;
1217
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
1207
2
    auto loop_done = [&index_keys, this]() -> int {
1208
2
        if (index_keys.empty()) return 0;
1209
1
        DORIS_CLOUD_DEFER {
1210
1
            index_keys.clear();
1211
1
        };
1212
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
1213
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
1214
0
            return -1;
1215
0
        }
1216
1
        return 0;
1217
1
    };
1218
1219
17
    if (config::enable_recycler_stats_metrics) {
1220
0
        scan_and_statistics_indexes();
1221
0
    }
1222
    // recycle_func and loop_done for scan and recycle
1223
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
1224
17
}
1225
1226
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
1227
8.24k
                             int64_t tablet_id) {
1228
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
1229
1230
8.23k
    std::unique_ptr<Transaction> txn;
1231
8.23k
    TxnErrorCode err = txn_kv->create_txn(&txn);
1232
8.23k
    if (err != TxnErrorCode::TXN_OK) {
1233
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
1234
0
                     << " tablet_id=" << tablet_id << " err=" << err;
1235
0
        return false;
1236
0
    }
1237
1238
8.23k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
1239
8.23k
    std::string tablet_idx_val;
1240
8.23k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
1241
8.23k
    if (TxnErrorCode::TXN_OK != err) {
1242
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
1243
0
                     << " tablet_id=" << tablet_id << " err=" << err
1244
0
                     << " key=" << hex(tablet_idx_key);
1245
0
        return false;
1246
0
    }
1247
1248
8.23k
    TabletIndexPB tablet_idx_pb;
1249
8.23k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
1250
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
1251
0
                     << " tablet_id=" << tablet_id;
1252
0
        return false;
1253
0
    }
1254
1255
8.23k
    if (!tablet_idx_pb.has_db_id()) {
1256
        // In the previous version, the db_id was not set in the index_pb.
1257
        // If updating to the version which enable txn lazy commit, the db_id will be set.
1258
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
1259
0
                  << " instance_id=" << instance_id
1260
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
1261
0
        return true;
1262
0
    }
1263
1264
8.23k
    std::string ver_val;
1265
8.23k
    std::string ver_key =
1266
8.23k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
1267
8.23k
                                   tablet_idx_pb.partition_id()});
1268
8.23k
    err = txn->get(ver_key, &ver_val);
1269
1270
8.23k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
1271
201
        LOG(INFO) << ""
1272
201
                     "partition version not found, instance_id="
1273
201
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
1274
201
                  << " table_id=" << tablet_idx_pb.table_id()
1275
201
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
1276
201
                  << " key=" << hex(ver_key);
1277
201
        return true;
1278
201
    }
1279
1280
8.03k
    if (TxnErrorCode::TXN_OK != err) {
1281
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
1282
0
                     << " db_id=" << tablet_idx_pb.db_id()
1283
0
                     << " table_id=" << tablet_idx_pb.table_id()
1284
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1285
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
1286
0
        return false;
1287
0
    }
1288
1289
8.03k
    VersionPB version_pb;
1290
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
1291
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
1292
0
                     << " db_id=" << tablet_idx_pb.db_id()
1293
0
                     << " table_id=" << tablet_idx_pb.table_id()
1294
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
1295
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
1296
0
        return false;
1297
0
    }
1298
1299
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
1300
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
1301
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
1302
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
1303
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
1304
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
1305
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
1306
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
1307
4.00k
                     << " key=" << hex(ver_key);
1308
4.00k
        return false;
1309
4.00k
    }
1310
4.03k
    return true;
1311
8.03k
}
1312
1313
15
int InstanceRecycler::recycle_partitions() {
1314
15
    const std::string task_name = "recycle_partitions";
1315
15
    int64_t num_scanned = 0;
1316
15
    int64_t num_expired = 0;
1317
15
    int64_t num_recycled = 0;
1318
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1319
1320
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
1321
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
1322
15
    std::string part_key0;
1323
15
    std::string part_key1;
1324
15
    recycle_partition_key(part_key_info0, &part_key0);
1325
15
    recycle_partition_key(part_key_info1, &part_key1);
1326
1327
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
1328
1329
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1330
15
    register_recycle_task(task_name, start_time);
1331
1332
15
    DORIS_CLOUD_DEFER {
1333
15
        unregister_recycle_task(task_name);
1334
15
        int64_t cost =
1335
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1336
15
        metrics_context.finish_report();
1337
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1338
15
                .tag("instance_id", instance_id_)
1339
15
                .tag("num_scanned", num_scanned)
1340
15
                .tag("num_expired", num_expired)
1341
15
                .tag("num_recycled", num_recycled);
1342
15
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1332
13
    DORIS_CLOUD_DEFER {
1333
13
        unregister_recycle_task(task_name);
1334
13
        int64_t cost =
1335
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1336
13
        metrics_context.finish_report();
1337
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1338
13
                .tag("instance_id", instance_id_)
1339
13
                .tag("num_scanned", num_scanned)
1340
13
                .tag("num_expired", num_expired)
1341
13
                .tag("num_recycled", num_recycled);
1342
13
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
1332
2
    DORIS_CLOUD_DEFER {
1333
2
        unregister_recycle_task(task_name);
1334
2
        int64_t cost =
1335
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1336
2
        metrics_context.finish_report();
1337
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
1338
2
                .tag("instance_id", instance_id_)
1339
2
                .tag("num_scanned", num_scanned)
1340
2
                .tag("num_expired", num_expired)
1341
2
                .tag("num_recycled", num_recycled);
1342
2
    };
1343
1344
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
1345
1346
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
1347
15
    std::vector<std::string_view> partition_keys;
1348
15
    std::vector<std::string> partition_version_keys;
1349
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1350
9
        ++num_scanned;
1351
9
        RecyclePartitionPB part_pb;
1352
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1353
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1354
0
            return -1;
1355
0
        }
1356
9
        int64_t current_time = ::time(nullptr);
1357
9
        if (current_time <
1358
9
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1359
0
            return 0;
1360
0
        }
1361
9
        ++num_expired;
1362
        // decode partition_id
1363
9
        auto k1 = k;
1364
9
        k1.remove_prefix(1);
1365
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1366
9
        decode_key(&k1, &out);
1367
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1368
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1369
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1370
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1371
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1372
        // Change state to RECYCLING
1373
9
        std::unique_ptr<Transaction> txn;
1374
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1375
9
        if (err != TxnErrorCode::TXN_OK) {
1376
0
            LOG_WARNING("failed to create txn").tag("err", err);
1377
0
            return -1;
1378
0
        }
1379
9
        std::string val;
1380
9
        err = txn->get(k, &val);
1381
9
        if (err ==
1382
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1383
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1384
0
            return 0;
1385
0
        }
1386
9
        if (err != TxnErrorCode::TXN_OK) {
1387
0
            LOG_WARNING("failed to get kv");
1388
0
            return -1;
1389
0
        }
1390
9
        part_pb.Clear();
1391
9
        if (!part_pb.ParseFromString(val)) {
1392
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1393
0
            return -1;
1394
0
        }
1395
        // Partitions with PREPARED state MUST have no data
1396
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1397
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1398
8
            txn->put(k, part_pb.SerializeAsString());
1399
8
            err = txn->commit();
1400
8
            if (err != TxnErrorCode::TXN_OK) {
1401
0
                LOG_WARNING("failed to commit txn: {}", err);
1402
0
                return -1;
1403
0
            }
1404
8
        }
1405
1406
9
        int ret = 0;
1407
33
        for (int64_t index_id : part_pb.index_id()) {
1408
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1409
1
                LOG_WARNING("failed to recycle tablets under partition")
1410
1
                        .tag("table_id", part_pb.table_id())
1411
1
                        .tag("instance_id", instance_id_)
1412
1
                        .tag("index_id", index_id)
1413
1
                        .tag("partition_id", partition_id);
1414
1
                ret = -1;
1415
1
            }
1416
33
        }
1417
9
        if (ret == 0 && part_pb.has_db_id()) {
1418
            // Recycle the versioned keys
1419
8
            std::unique_ptr<Transaction> txn;
1420
8
            err = txn_kv_->create_txn(&txn);
1421
8
            if (err != TxnErrorCode::TXN_OK) {
1422
0
                LOG_WARNING("failed to create txn").tag("err", err);
1423
0
                return -1;
1424
0
            }
1425
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1426
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1427
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1428
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1429
8
            std::string partition_version_key =
1430
8
                    versioned::partition_version_key({instance_id_, partition_id});
1431
8
            versioned_remove_all(txn.get(), meta_key);
1432
8
            txn->remove(index_key);
1433
8
            txn->remove(inverted_index_key);
1434
8
            versioned_remove_all(txn.get(), partition_version_key);
1435
8
            err = txn->commit();
1436
8
            if (err != TxnErrorCode::TXN_OK) {
1437
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1438
0
                return -1;
1439
0
            }
1440
8
        }
1441
1442
9
        if (ret == 0) {
1443
8
            ++num_recycled;
1444
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1445
8
            partition_keys.push_back(k);
1446
8
            if (part_pb.db_id() > 0) {
1447
8
                partition_version_keys.push_back(partition_version_key(
1448
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1449
8
            }
1450
8
            metrics_context.total_recycled_num = num_recycled;
1451
8
            metrics_context.report();
1452
8
        }
1453
9
        return ret;
1454
9
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1349
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1350
7
        ++num_scanned;
1351
7
        RecyclePartitionPB part_pb;
1352
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1353
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1354
0
            return -1;
1355
0
        }
1356
7
        int64_t current_time = ::time(nullptr);
1357
7
        if (current_time <
1358
7
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1359
0
            return 0;
1360
0
        }
1361
7
        ++num_expired;
1362
        // decode partition_id
1363
7
        auto k1 = k;
1364
7
        k1.remove_prefix(1);
1365
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1366
7
        decode_key(&k1, &out);
1367
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1368
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1369
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1370
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1371
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1372
        // Change state to RECYCLING
1373
7
        std::unique_ptr<Transaction> txn;
1374
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1375
7
        if (err != TxnErrorCode::TXN_OK) {
1376
0
            LOG_WARNING("failed to create txn").tag("err", err);
1377
0
            return -1;
1378
0
        }
1379
7
        std::string val;
1380
7
        err = txn->get(k, &val);
1381
7
        if (err ==
1382
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1383
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1384
0
            return 0;
1385
0
        }
1386
7
        if (err != TxnErrorCode::TXN_OK) {
1387
0
            LOG_WARNING("failed to get kv");
1388
0
            return -1;
1389
0
        }
1390
7
        part_pb.Clear();
1391
7
        if (!part_pb.ParseFromString(val)) {
1392
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1393
0
            return -1;
1394
0
        }
1395
        // Partitions with PREPARED state MUST have no data
1396
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1397
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1398
7
            txn->put(k, part_pb.SerializeAsString());
1399
7
            err = txn->commit();
1400
7
            if (err != TxnErrorCode::TXN_OK) {
1401
0
                LOG_WARNING("failed to commit txn: {}", err);
1402
0
                return -1;
1403
0
            }
1404
7
        }
1405
1406
7
        int ret = 0;
1407
31
        for (int64_t index_id : part_pb.index_id()) {
1408
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1409
0
                LOG_WARNING("failed to recycle tablets under partition")
1410
0
                        .tag("table_id", part_pb.table_id())
1411
0
                        .tag("instance_id", instance_id_)
1412
0
                        .tag("index_id", index_id)
1413
0
                        .tag("partition_id", partition_id);
1414
0
                ret = -1;
1415
0
            }
1416
31
        }
1417
7
        if (ret == 0 && part_pb.has_db_id()) {
1418
            // Recycle the versioned keys
1419
7
            std::unique_ptr<Transaction> txn;
1420
7
            err = txn_kv_->create_txn(&txn);
1421
7
            if (err != TxnErrorCode::TXN_OK) {
1422
0
                LOG_WARNING("failed to create txn").tag("err", err);
1423
0
                return -1;
1424
0
            }
1425
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1426
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1427
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1428
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1429
7
            std::string partition_version_key =
1430
7
                    versioned::partition_version_key({instance_id_, partition_id});
1431
7
            versioned_remove_all(txn.get(), meta_key);
1432
7
            txn->remove(index_key);
1433
7
            txn->remove(inverted_index_key);
1434
7
            versioned_remove_all(txn.get(), partition_version_key);
1435
7
            err = txn->commit();
1436
7
            if (err != TxnErrorCode::TXN_OK) {
1437
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1438
0
                return -1;
1439
0
            }
1440
7
        }
1441
1442
7
        if (ret == 0) {
1443
7
            ++num_recycled;
1444
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1445
7
            partition_keys.push_back(k);
1446
7
            if (part_pb.db_id() > 0) {
1447
7
                partition_version_keys.push_back(partition_version_key(
1448
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1449
7
            }
1450
7
            metrics_context.total_recycled_num = num_recycled;
1451
7
            metrics_context.report();
1452
7
        }
1453
7
        return ret;
1454
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1349
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1350
2
        ++num_scanned;
1351
2
        RecyclePartitionPB part_pb;
1352
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
1353
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1354
0
            return -1;
1355
0
        }
1356
2
        int64_t current_time = ::time(nullptr);
1357
2
        if (current_time <
1358
2
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
1359
0
            return 0;
1360
0
        }
1361
2
        ++num_expired;
1362
        // decode partition_id
1363
2
        auto k1 = k;
1364
2
        k1.remove_prefix(1);
1365
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1366
2
        decode_key(&k1, &out);
1367
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
1368
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
1369
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
1370
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
1371
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
1372
        // Change state to RECYCLING
1373
2
        std::unique_ptr<Transaction> txn;
1374
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1375
2
        if (err != TxnErrorCode::TXN_OK) {
1376
0
            LOG_WARNING("failed to create txn").tag("err", err);
1377
0
            return -1;
1378
0
        }
1379
2
        std::string val;
1380
2
        err = txn->get(k, &val);
1381
2
        if (err ==
1382
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
1383
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
1384
0
            return 0;
1385
0
        }
1386
2
        if (err != TxnErrorCode::TXN_OK) {
1387
0
            LOG_WARNING("failed to get kv");
1388
0
            return -1;
1389
0
        }
1390
2
        part_pb.Clear();
1391
2
        if (!part_pb.ParseFromString(val)) {
1392
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
1393
0
            return -1;
1394
0
        }
1395
        // Partitions with PREPARED state MUST have no data
1396
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
1397
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
1398
1
            txn->put(k, part_pb.SerializeAsString());
1399
1
            err = txn->commit();
1400
1
            if (err != TxnErrorCode::TXN_OK) {
1401
0
                LOG_WARNING("failed to commit txn: {}", err);
1402
0
                return -1;
1403
0
            }
1404
1
        }
1405
1406
2
        int ret = 0;
1407
2
        for (int64_t index_id : part_pb.index_id()) {
1408
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
1409
1
                LOG_WARNING("failed to recycle tablets under partition")
1410
1
                        .tag("table_id", part_pb.table_id())
1411
1
                        .tag("instance_id", instance_id_)
1412
1
                        .tag("index_id", index_id)
1413
1
                        .tag("partition_id", partition_id);
1414
1
                ret = -1;
1415
1
            }
1416
2
        }
1417
2
        if (ret == 0 && part_pb.has_db_id()) {
1418
            // Recycle the versioned keys
1419
1
            std::unique_ptr<Transaction> txn;
1420
1
            err = txn_kv_->create_txn(&txn);
1421
1
            if (err != TxnErrorCode::TXN_OK) {
1422
0
                LOG_WARNING("failed to create txn").tag("err", err);
1423
0
                return -1;
1424
0
            }
1425
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
1426
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
1427
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
1428
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
1429
1
            std::string partition_version_key =
1430
1
                    versioned::partition_version_key({instance_id_, partition_id});
1431
1
            versioned_remove_all(txn.get(), meta_key);
1432
1
            txn->remove(index_key);
1433
1
            txn->remove(inverted_index_key);
1434
1
            versioned_remove_all(txn.get(), partition_version_key);
1435
1
            err = txn->commit();
1436
1
            if (err != TxnErrorCode::TXN_OK) {
1437
0
                LOG_WARNING("failed to commit txn").tag("err", err);
1438
0
                return -1;
1439
0
            }
1440
1
        }
1441
1442
2
        if (ret == 0) {
1443
1
            ++num_recycled;
1444
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
1445
1
            partition_keys.push_back(k);
1446
1
            if (part_pb.db_id() > 0) {
1447
1
                partition_version_keys.push_back(partition_version_key(
1448
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
1449
1
            }
1450
1
            metrics_context.total_recycled_num = num_recycled;
1451
1
            metrics_context.report();
1452
1
        }
1453
2
        return ret;
1454
2
    };
1455
1456
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1457
5
        if (partition_keys.empty()) return 0;
1458
4
        DORIS_CLOUD_DEFER {
1459
4
            partition_keys.clear();
1460
4
            partition_version_keys.clear();
1461
4
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1458
3
        DORIS_CLOUD_DEFER {
1459
3
            partition_keys.clear();
1460
3
            partition_version_keys.clear();
1461
3
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1458
1
        DORIS_CLOUD_DEFER {
1459
1
            partition_keys.clear();
1460
1
            partition_version_keys.clear();
1461
1
        };
1462
4
        std::unique_ptr<Transaction> txn;
1463
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1464
4
        if (err != TxnErrorCode::TXN_OK) {
1465
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1466
0
            return -1;
1467
0
        }
1468
8
        for (auto& k : partition_keys) {
1469
8
            txn->remove(k);
1470
8
        }
1471
8
        for (auto& k : partition_version_keys) {
1472
8
            txn->remove(k);
1473
8
        }
1474
4
        err = txn->commit();
1475
4
        if (err != TxnErrorCode::TXN_OK) {
1476
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1477
0
                         << " err=" << err;
1478
0
            return -1;
1479
0
        }
1480
4
        return 0;
1481
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1456
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1457
3
        if (partition_keys.empty()) return 0;
1458
3
        DORIS_CLOUD_DEFER {
1459
3
            partition_keys.clear();
1460
3
            partition_version_keys.clear();
1461
3
        };
1462
3
        std::unique_ptr<Transaction> txn;
1463
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1464
3
        if (err != TxnErrorCode::TXN_OK) {
1465
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1466
0
            return -1;
1467
0
        }
1468
7
        for (auto& k : partition_keys) {
1469
7
            txn->remove(k);
1470
7
        }
1471
7
        for (auto& k : partition_version_keys) {
1472
7
            txn->remove(k);
1473
7
        }
1474
3
        err = txn->commit();
1475
3
        if (err != TxnErrorCode::TXN_OK) {
1476
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1477
0
                         << " err=" << err;
1478
0
            return -1;
1479
0
        }
1480
3
        return 0;
1481
3
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
1456
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
1457
2
        if (partition_keys.empty()) return 0;
1458
1
        DORIS_CLOUD_DEFER {
1459
1
            partition_keys.clear();
1460
1
            partition_version_keys.clear();
1461
1
        };
1462
1
        std::unique_ptr<Transaction> txn;
1463
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1464
1
        if (err != TxnErrorCode::TXN_OK) {
1465
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
1466
0
            return -1;
1467
0
        }
1468
1
        for (auto& k : partition_keys) {
1469
1
            txn->remove(k);
1470
1
        }
1471
1
        for (auto& k : partition_version_keys) {
1472
1
            txn->remove(k);
1473
1
        }
1474
1
        err = txn->commit();
1475
1
        if (err != TxnErrorCode::TXN_OK) {
1476
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
1477
0
                         << " err=" << err;
1478
0
            return -1;
1479
0
        }
1480
1
        return 0;
1481
1
    };
1482
1483
15
    if (config::enable_recycler_stats_metrics) {
1484
0
        scan_and_statistics_partitions();
1485
0
    }
1486
    // recycle_func and loop_done for scan and recycle
1487
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
1488
15
}
1489
1490
14
int InstanceRecycler::recycle_versions() {
1491
14
    if (instance_info_.has_multi_version_status() &&
1492
14
        instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) {
1493
2
        return recycle_orphan_partitions();
1494
2
    }
1495
1496
12
    int64_t num_scanned = 0;
1497
12
    int64_t num_recycled = 0;
1498
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
1499
1500
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
1501
1502
12
    auto start_time = steady_clock::now();
1503
1504
12
    DORIS_CLOUD_DEFER {
1505
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1506
12
        metrics_context.finish_report();
1507
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1508
12
                .tag("instance_id", instance_id_)
1509
12
                .tag("num_scanned", num_scanned)
1510
12
                .tag("num_recycled", num_recycled);
1511
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
1504
12
    DORIS_CLOUD_DEFER {
1505
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1506
12
        metrics_context.finish_report();
1507
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
1508
12
                .tag("instance_id", instance_id_)
1509
12
                .tag("num_scanned", num_scanned)
1510
12
                .tag("num_recycled", num_recycled);
1511
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
1512
1513
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
1514
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
1515
12
    int64_t last_scanned_table_id = 0;
1516
12
    bool is_recycled = false; // Is last scanned kv recycled
1517
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
1518
12
                         &metrics_context, this](std::string_view k, std::string_view) {
1519
2
        ++num_scanned;
1520
2
        auto k1 = k;
1521
2
        k1.remove_prefix(1);
1522
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1523
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1524
2
        decode_key(&k1, &out);
1525
2
        DCHECK_EQ(out.size(), 6) << k;
1526
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1527
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1528
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1529
0
            return 0;
1530
0
        }
1531
2
        last_scanned_table_id = table_id;
1532
2
        is_recycled = false;
1533
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1534
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1535
2
        std::unique_ptr<Transaction> txn;
1536
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1537
2
        if (err != TxnErrorCode::TXN_OK) {
1538
0
            return -1;
1539
0
        }
1540
2
        std::unique_ptr<RangeGetIterator> iter;
1541
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1542
2
        if (err != TxnErrorCode::TXN_OK) {
1543
0
            return -1;
1544
0
        }
1545
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1546
1
            return 0;
1547
1
        }
1548
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1549
        // 1. Remove all partition version kvs of this table
1550
1
        auto partition_version_key_begin =
1551
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1552
1
        auto partition_version_key_end =
1553
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1554
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1555
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1556
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1557
1
                     << " table_id=" << table_id;
1558
        // 2. Remove the table version kv of this table
1559
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1560
1
        txn->remove(tbl_version_key);
1561
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1562
        // 3. Remove mow delete bitmap update lock and tablet job lock
1563
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1564
1
        txn->remove(lock_key);
1565
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1566
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1567
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1568
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1569
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1570
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1571
1
                     << " table_id=" << table_id;
1572
1
        err = txn->commit();
1573
1
        if (err != TxnErrorCode::TXN_OK) {
1574
0
            return -1;
1575
0
        }
1576
1
        metrics_context.total_recycled_num = ++num_recycled;
1577
1
        metrics_context.report();
1578
1
        is_recycled = true;
1579
1
        return 0;
1580
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1518
2
                         &metrics_context, this](std::string_view k, std::string_view) {
1519
2
        ++num_scanned;
1520
2
        auto k1 = k;
1521
2
        k1.remove_prefix(1);
1522
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
1523
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
1524
2
        decode_key(&k1, &out);
1525
2
        DCHECK_EQ(out.size(), 6) << k;
1526
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
1527
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
1528
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
1529
0
            return 0;
1530
0
        }
1531
2
        last_scanned_table_id = table_id;
1532
2
        is_recycled = false;
1533
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
1534
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
1535
2
        std::unique_ptr<Transaction> txn;
1536
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1537
2
        if (err != TxnErrorCode::TXN_OK) {
1538
0
            return -1;
1539
0
        }
1540
2
        std::unique_ptr<RangeGetIterator> iter;
1541
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
1542
2
        if (err != TxnErrorCode::TXN_OK) {
1543
0
            return -1;
1544
0
        }
1545
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
1546
1
            return 0;
1547
1
        }
1548
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
1549
        // 1. Remove all partition version kvs of this table
1550
1
        auto partition_version_key_begin =
1551
1
                partition_version_key({instance_id_, db_id, table_id, 0});
1552
1
        auto partition_version_key_end =
1553
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
1554
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
1555
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
1556
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
1557
1
                     << " table_id=" << table_id;
1558
        // 2. Remove the table version kv of this table
1559
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
1560
1
        txn->remove(tbl_version_key);
1561
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
1562
        // 3. Remove mow delete bitmap update lock and tablet job lock
1563
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1564
1
        txn->remove(lock_key);
1565
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1566
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1567
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1568
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
1569
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1570
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1571
1
                     << " table_id=" << table_id;
1572
1
        err = txn->commit();
1573
1
        if (err != TxnErrorCode::TXN_OK) {
1574
0
            return -1;
1575
0
        }
1576
1
        metrics_context.total_recycled_num = ++num_recycled;
1577
1
        metrics_context.report();
1578
1
        is_recycled = true;
1579
1
        return 0;
1580
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1581
1582
12
    if (config::enable_recycler_stats_metrics) {
1583
0
        scan_and_statistics_versions();
1584
0
    }
1585
    // recycle_func and loop_done for scan and recycle
1586
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
1587
14
}
1588
1589
3
int InstanceRecycler::recycle_orphan_partitions() {
1590
3
    int64_t num_scanned = 0;
1591
3
    int64_t num_recycled = 0;
1592
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
1593
1594
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
1595
3
            .tag("instance_id", instance_id_);
1596
1597
3
    auto start_time = steady_clock::now();
1598
1599
3
    DORIS_CLOUD_DEFER {
1600
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1601
3
        metrics_context.finish_report();
1602
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
1603
3
                .tag("instance_id", instance_id_)
1604
3
                .tag("num_scanned", num_scanned)
1605
3
                .tag("num_recycled", num_recycled);
1606
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
1599
3
    DORIS_CLOUD_DEFER {
1600
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1601
3
        metrics_context.finish_report();
1602
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
1603
3
                .tag("instance_id", instance_id_)
1604
3
                .tag("num_scanned", num_scanned)
1605
3
                .tag("num_recycled", num_recycled);
1606
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
1607
1608
3
    bool is_empty_table = false;        // whether the table has no indexes
1609
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
1610
3
    int64_t current_table_id = 0;       // current scanning table id
1611
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
1612
3
                         &current_table_id, &is_table_kvs_recycled,
1613
3
                         this](std::string_view k, std::string_view) {
1614
2
        ++num_scanned;
1615
1616
2
        std::string_view k1(k);
1617
2
        int64_t db_id, table_id, partition_id;
1618
2
        if (versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id, &partition_id) !=
1619
2
            0) {
1620
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
1621
0
            return -1;
1622
2
        } else if (table_id != current_table_id) {
1623
2
            current_table_id = table_id;
1624
2
            is_table_kvs_recycled = false;
1625
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
1626
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
1627
2
            if (err != TxnErrorCode::TXN_OK) {
1628
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
1629
0
                             << " table_id=" << table_id << " err=" << err;
1630
0
                return -1;
1631
0
            }
1632
2
        }
1633
1634
2
        if (!is_empty_table) {
1635
            // table is not empty, skip recycle
1636
1
            return 0;
1637
1
        }
1638
1639
1
        std::unique_ptr<Transaction> txn;
1640
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1641
1
        if (err != TxnErrorCode::TXN_OK) {
1642
0
            return -1;
1643
0
        }
1644
1645
        // 1. Remove all partition related kvs
1646
1
        std::string partition_meta_key =
1647
1
                versioned::meta_partition_key({instance_id_, partition_id});
1648
1
        std::string partition_index_key =
1649
1
                versioned::partition_index_key({instance_id_, partition_id});
1650
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
1651
1
                {instance_id_, db_id, table_id, partition_id});
1652
1
        std::string partition_version_key =
1653
1
                versioned::partition_version_key({instance_id_, partition_id});
1654
1
        txn->remove(partition_index_key);
1655
1
        txn->remove(partition_inverted_key);
1656
1
        versioned_remove_all(txn.get(), partition_meta_key);
1657
1
        versioned_remove_all(txn.get(), partition_version_key);
1658
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
1659
1
                     << " table_id=" << table_id << " db_id=" << db_id
1660
1
                     << " partition_meta_key=" << hex(partition_meta_key)
1661
1
                     << " partition_version_key=" << hex(partition_version_key);
1662
1663
1
        if (!is_table_kvs_recycled) {
1664
1
            is_table_kvs_recycled = true;
1665
1666
            // 2. Remove the table version kv of this table
1667
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
1668
1
            versioned_remove_all(txn.get(), table_version_key);
1669
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
1670
            // 3. Remove mow delete bitmap update lock and tablet job lock
1671
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1672
1
            txn->remove(lock_key);
1673
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1674
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1675
1
            std::string tablet_job_key_end =
1676
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1677
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
1678
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1679
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1680
1
                         << " table_id=" << table_id;
1681
1
        }
1682
1683
1
        err = txn->commit();
1684
1
        if (err != TxnErrorCode::TXN_OK) {
1685
0
            return -1;
1686
0
        }
1687
1
        metrics_context.total_recycled_num = ++num_recycled;
1688
1
        metrics_context.report();
1689
1
        return 0;
1690
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
1613
2
                         this](std::string_view k, std::string_view) {
1614
2
        ++num_scanned;
1615
1616
2
        std::string_view k1(k);
1617
2
        int64_t db_id, table_id, partition_id;
1618
2
        if (versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id, &partition_id) !=
1619
2
            0) {
1620
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
1621
0
            return -1;
1622
2
        } else if (table_id != current_table_id) {
1623
2
            current_table_id = table_id;
1624
2
            is_table_kvs_recycled = false;
1625
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
1626
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
1627
2
            if (err != TxnErrorCode::TXN_OK) {
1628
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
1629
0
                             << " table_id=" << table_id << " err=" << err;
1630
0
                return -1;
1631
0
            }
1632
2
        }
1633
1634
2
        if (!is_empty_table) {
1635
            // table is not empty, skip recycle
1636
1
            return 0;
1637
1
        }
1638
1639
1
        std::unique_ptr<Transaction> txn;
1640
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1641
1
        if (err != TxnErrorCode::TXN_OK) {
1642
0
            return -1;
1643
0
        }
1644
1645
        // 1. Remove all partition related kvs
1646
1
        std::string partition_meta_key =
1647
1
                versioned::meta_partition_key({instance_id_, partition_id});
1648
1
        std::string partition_index_key =
1649
1
                versioned::partition_index_key({instance_id_, partition_id});
1650
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
1651
1
                {instance_id_, db_id, table_id, partition_id});
1652
1
        std::string partition_version_key =
1653
1
                versioned::partition_version_key({instance_id_, partition_id});
1654
1
        txn->remove(partition_index_key);
1655
1
        txn->remove(partition_inverted_key);
1656
1
        versioned_remove_all(txn.get(), partition_meta_key);
1657
1
        versioned_remove_all(txn.get(), partition_version_key);
1658
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
1659
1
                     << " table_id=" << table_id << " db_id=" << db_id
1660
1
                     << " partition_meta_key=" << hex(partition_meta_key)
1661
1
                     << " partition_version_key=" << hex(partition_version_key);
1662
1663
1
        if (!is_table_kvs_recycled) {
1664
1
            is_table_kvs_recycled = true;
1665
1666
            // 2. Remove the table version kv of this table
1667
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
1668
1
            versioned_remove_all(txn.get(), table_version_key);
1669
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
1670
            // 3. Remove mow delete bitmap update lock and tablet job lock
1671
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
1672
1
            txn->remove(lock_key);
1673
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
1674
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
1675
1
            std::string tablet_job_key_end =
1676
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
1677
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
1678
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
1679
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
1680
1
                         << " table_id=" << table_id;
1681
1
        }
1682
1683
1
        err = txn->commit();
1684
1
        if (err != TxnErrorCode::TXN_OK) {
1685
0
            return -1;
1686
0
        }
1687
1
        metrics_context.total_recycled_num = ++num_recycled;
1688
1
        metrics_context.report();
1689
1
        return 0;
1690
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
1691
1692
    // recycle_func and loop_done for scan and recycle
1693
3
    return scan_and_recycle(
1694
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
1695
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
1696
3
            std::move(recycle_func));
1697
3
}
1698
1699
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
1700
                                      RecyclerMetricsContext& metrics_context,
1701
46
                                      int64_t partition_id) {
1702
46
    bool is_multi_version =
1703
46
            instance_info_.has_multi_version_status() &&
1704
46
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
1705
46
    int64_t num_scanned = 0;
1706
46
    std::atomic_long num_recycled = 0;
1707
1708
46
    std::string tablet_key_begin, tablet_key_end;
1709
46
    std::string stats_key_begin, stats_key_end;
1710
46
    std::string job_key_begin, job_key_end;
1711
1712
46
    std::string tablet_belongs;
1713
46
    if (partition_id > 0) {
1714
        // recycle tablets in a partition belonging to the index
1715
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
1716
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
1717
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
1718
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
1719
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
1720
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
1721
33
        tablet_belongs = "partition";
1722
33
    } else {
1723
        // recycle tablets in the index
1724
13
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
1725
13
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
1726
13
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
1727
13
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
1728
13
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
1729
13
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
1730
13
        tablet_belongs = "index";
1731
13
    }
1732
1733
46
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
1734
46
            .tag("table_id", table_id)
1735
46
            .tag("index_id", index_id)
1736
46
            .tag("partition_id", partition_id);
1737
1738
46
    auto start_time = steady_clock::now();
1739
1740
46
    DORIS_CLOUD_DEFER {
1741
46
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1742
46
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1743
46
                .tag("instance_id", instance_id_)
1744
46
                .tag("table_id", table_id)
1745
46
                .tag("index_id", index_id)
1746
46
                .tag("partition_id", partition_id)
1747
46
                .tag("num_scanned", num_scanned)
1748
46
                .tag("num_recycled", num_recycled);
1749
46
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
1740
42
    DORIS_CLOUD_DEFER {
1741
42
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1742
42
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1743
42
                .tag("instance_id", instance_id_)
1744
42
                .tag("table_id", table_id)
1745
42
                .tag("index_id", index_id)
1746
42
                .tag("partition_id", partition_id)
1747
42
                .tag("num_scanned", num_scanned)
1748
42
                .tag("num_recycled", num_recycled);
1749
42
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
1740
4
    DORIS_CLOUD_DEFER {
1741
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
1742
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
1743
4
                .tag("instance_id", instance_id_)
1744
4
                .tag("table_id", table_id)
1745
4
                .tag("index_id", index_id)
1746
4
                .tag("partition_id", partition_id)
1747
4
                .tag("num_scanned", num_scanned)
1748
4
                .tag("num_recycled", num_recycled);
1749
4
    };
1750
1751
    // The first string_view represents the tablet key which has been recycled
1752
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
1753
46
    using TabletKeyPair = std::pair<std::string_view, bool>;
1754
46
    SyncExecutor<TabletKeyPair> sync_executor(
1755
46
            _thread_pool_group.recycle_tablet_pool,
1756
46
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
1757
46
                        index_id, partition_id),
1758
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1758
234
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
1758
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
1759
1760
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
1761
46
    std::vector<std::string> tablet_idx_keys;
1762
46
    std::vector<std::string> restore_job_keys;
1763
46
    std::vector<std::string> init_rs_keys;
1764
46
    std::vector<std::string> tablet_compact_stats_keys;
1765
46
    std::vector<std::string> tablet_load_stats_keys;
1766
46
    std::vector<std::string> versioned_meta_tablet_keys;
1767
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1768
8.24k
        bool use_range_remove = true;
1769
8.24k
        ++num_scanned;
1770
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
1771
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1772
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1773
0
            use_range_remove = false;
1774
0
            return -1;
1775
0
        }
1776
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1777
1778
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1779
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
1780
4.00k
            return -1;
1781
4.00k
        }
1782
1783
4.23k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1784
4.23k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
1785
4.23k
        if (is_multi_version) {
1786
6
            tablet_compact_stats_keys.push_back(
1787
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
1788
6
            tablet_load_stats_keys.push_back(
1789
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
1790
6
            versioned_meta_tablet_keys.push_back(
1791
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
1792
6
        }
1793
4.23k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
1794
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1795
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
1796
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
1797
0
                LOG_WARNING("failed to recycle tablet")
1798
0
                        .tag("instance_id", instance_id_)
1799
0
                        .tag("tablet_id", tid);
1800
0
                range_move = false;
1801
0
                return {std::string_view(), range_move};
1802
0
            }
1803
4.23k
            ++num_recycled;
1804
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1805
4.23k
            return {k, range_move};
1806
4.23k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
1795
234
                           &metrics_context, k]() mutable -> TabletKeyPair {
1796
234
            if (recycle_tablet(tid, metrics_context) != 0) {
1797
0
                LOG_WARNING("failed to recycle tablet")
1798
0
                        .tag("instance_id", instance_id_)
1799
0
                        .tag("tablet_id", tid);
1800
0
                range_move = false;
1801
0
                return {std::string_view(), range_move};
1802
0
            }
1803
234
            ++num_recycled;
1804
234
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1805
234
            return {k, range_move};
1806
234
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
1795
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
1796
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
1797
0
                LOG_WARNING("failed to recycle tablet")
1798
0
                        .tag("instance_id", instance_id_)
1799
0
                        .tag("tablet_id", tid);
1800
0
                range_move = false;
1801
0
                return {std::string_view(), range_move};
1802
0
            }
1803
4.00k
            ++num_recycled;
1804
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1805
4.00k
            return {k, range_move};
1806
4.00k
        });
1807
4.23k
        return 0;
1808
4.23k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1767
237
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1768
237
        bool use_range_remove = true;
1769
237
        ++num_scanned;
1770
237
        doris::TabletMetaCloudPB tablet_meta_pb;
1771
237
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1772
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1773
0
            use_range_remove = false;
1774
0
            return -1;
1775
0
        }
1776
237
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1777
1778
237
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1779
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
1780
0
            return -1;
1781
0
        }
1782
1783
237
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1784
237
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
1785
237
        if (is_multi_version) {
1786
6
            tablet_compact_stats_keys.push_back(
1787
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
1788
6
            tablet_load_stats_keys.push_back(
1789
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
1790
6
            versioned_meta_tablet_keys.push_back(
1791
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
1792
6
        }
1793
237
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
1794
234
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1795
234
                           &metrics_context, k]() mutable -> TabletKeyPair {
1796
234
            if (recycle_tablet(tid, metrics_context) != 0) {
1797
234
                LOG_WARNING("failed to recycle tablet")
1798
234
                        .tag("instance_id", instance_id_)
1799
234
                        .tag("tablet_id", tid);
1800
234
                range_move = false;
1801
234
                return {std::string_view(), range_move};
1802
234
            }
1803
234
            ++num_recycled;
1804
234
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1805
234
            return {k, range_move};
1806
234
        });
1807
234
        return 0;
1808
237
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
1767
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1768
8.00k
        bool use_range_remove = true;
1769
8.00k
        ++num_scanned;
1770
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
1771
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
1772
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
1773
0
            use_range_remove = false;
1774
0
            return -1;
1775
0
        }
1776
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
1777
1778
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
1779
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
1780
4.00k
            return -1;
1781
4.00k
        }
1782
1783
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
1784
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
1785
4.00k
        if (is_multi_version) {
1786
0
            tablet_compact_stats_keys.push_back(
1787
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
1788
0
            tablet_load_stats_keys.push_back(
1789
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
1790
0
            versioned_meta_tablet_keys.push_back(
1791
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
1792
0
        }
1793
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
1794
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
1795
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
1796
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
1797
4.00k
                LOG_WARNING("failed to recycle tablet")
1798
4.00k
                        .tag("instance_id", instance_id_)
1799
4.00k
                        .tag("tablet_id", tid);
1800
4.00k
                range_move = false;
1801
4.00k
                return {std::string_view(), range_move};
1802
4.00k
            }
1803
4.00k
            ++num_recycled;
1804
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
1805
4.00k
            return {k, range_move};
1806
4.00k
        });
1807
4.00k
        return 0;
1808
4.00k
    };
1809
1810
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
1811
46
    auto loop_done = [&, this]() -> int {
1812
46
        bool finished = true;
1813
46
        auto tablet_keys = sync_executor.when_all(&finished);
1814
46
        if (!finished) {
1815
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1816
0
            return -1;
1817
0
        }
1818
46
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1819
        // sort the vector using key's order
1820
44
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1821
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
1821
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
1821
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1822
44
        bool use_range_remove = true;
1823
4.23k
        for (auto& [_, remove] : tablet_keys) {
1824
4.23k
            if (!remove) {
1825
0
                use_range_remove = remove;
1826
0
                break;
1827
0
            }
1828
4.23k
        }
1829
44
        DORIS_CLOUD_DEFER {
1830
44
            tablet_idx_keys.clear();
1831
44
            restore_job_keys.clear();
1832
44
            init_rs_keys.clear();
1833
44
            tablet_compact_stats_keys.clear();
1834
44
            tablet_load_stats_keys.clear();
1835
44
            versioned_meta_tablet_keys.clear();
1836
44
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1829
42
        DORIS_CLOUD_DEFER {
1830
42
            tablet_idx_keys.clear();
1831
42
            restore_job_keys.clear();
1832
42
            init_rs_keys.clear();
1833
42
            tablet_compact_stats_keys.clear();
1834
42
            tablet_load_stats_keys.clear();
1835
42
            versioned_meta_tablet_keys.clear();
1836
42
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
1829
2
        DORIS_CLOUD_DEFER {
1830
2
            tablet_idx_keys.clear();
1831
2
            restore_job_keys.clear();
1832
2
            init_rs_keys.clear();
1833
2
            tablet_compact_stats_keys.clear();
1834
2
            tablet_load_stats_keys.clear();
1835
2
            versioned_meta_tablet_keys.clear();
1836
2
        };
1837
44
        std::unique_ptr<Transaction> txn;
1838
44
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1839
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1840
0
            return -1;
1841
0
        }
1842
44
        std::string tablet_key_end;
1843
44
        if (!tablet_keys.empty()) {
1844
42
            if (use_range_remove) {
1845
42
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1846
42
                txn->remove(tablet_keys.front().first, tablet_key_end);
1847
42
            } else {
1848
0
                for (auto& [k, _] : tablet_keys) {
1849
0
                    txn->remove(k);
1850
0
                }
1851
0
            }
1852
42
        }
1853
44
        if (is_multi_version) {
1854
6
            for (auto& k : tablet_compact_stats_keys) {
1855
                // Remove all versions of tablet compact stats for recycled tablet
1856
6
                LOG_INFO("remove versioned tablet compact stats key")
1857
6
                        .tag("compact_stats_key", hex(k));
1858
6
                versioned_remove_all(txn.get(), k);
1859
6
            }
1860
6
            for (auto& k : tablet_load_stats_keys) {
1861
                // Remove all versions of tablet load stats for recycled tablet
1862
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
1863
6
                versioned_remove_all(txn.get(), k);
1864
6
            }
1865
6
            for (auto& k : versioned_meta_tablet_keys) {
1866
                // Remove all versions of meta tablet for recycled tablet
1867
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
1868
6
                versioned_remove_all(txn.get(), k);
1869
6
            }
1870
5
        }
1871
4.23k
        for (auto& k : tablet_idx_keys) {
1872
4.23k
            txn->remove(k);
1873
4.23k
        }
1874
4.23k
        for (auto& k : restore_job_keys) {
1875
4.23k
            txn->remove(k);
1876
4.23k
        }
1877
44
        for (auto& k : init_rs_keys) {
1878
0
            txn->remove(k);
1879
0
        }
1880
44
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1881
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1882
0
                         << ", err=" << err;
1883
0
            return -1;
1884
0
        }
1885
44
        return 0;
1886
44
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
1811
42
    auto loop_done = [&, this]() -> int {
1812
42
        bool finished = true;
1813
42
        auto tablet_keys = sync_executor.when_all(&finished);
1814
42
        if (!finished) {
1815
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1816
0
            return -1;
1817
0
        }
1818
42
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1819
        // sort the vector using key's order
1820
42
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1821
42
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1822
42
        bool use_range_remove = true;
1823
234
        for (auto& [_, remove] : tablet_keys) {
1824
234
            if (!remove) {
1825
0
                use_range_remove = remove;
1826
0
                break;
1827
0
            }
1828
234
        }
1829
42
        DORIS_CLOUD_DEFER {
1830
42
            tablet_idx_keys.clear();
1831
42
            restore_job_keys.clear();
1832
42
            init_rs_keys.clear();
1833
42
            tablet_compact_stats_keys.clear();
1834
42
            tablet_load_stats_keys.clear();
1835
42
            versioned_meta_tablet_keys.clear();
1836
42
        };
1837
42
        std::unique_ptr<Transaction> txn;
1838
42
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1839
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1840
0
            return -1;
1841
0
        }
1842
42
        std::string tablet_key_end;
1843
42
        if (!tablet_keys.empty()) {
1844
40
            if (use_range_remove) {
1845
40
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1846
40
                txn->remove(tablet_keys.front().first, tablet_key_end);
1847
40
            } else {
1848
0
                for (auto& [k, _] : tablet_keys) {
1849
0
                    txn->remove(k);
1850
0
                }
1851
0
            }
1852
40
        }
1853
42
        if (is_multi_version) {
1854
6
            for (auto& k : tablet_compact_stats_keys) {
1855
                // Remove all versions of tablet compact stats for recycled tablet
1856
6
                LOG_INFO("remove versioned tablet compact stats key")
1857
6
                        .tag("compact_stats_key", hex(k));
1858
6
                versioned_remove_all(txn.get(), k);
1859
6
            }
1860
6
            for (auto& k : tablet_load_stats_keys) {
1861
                // Remove all versions of tablet load stats for recycled tablet
1862
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
1863
6
                versioned_remove_all(txn.get(), k);
1864
6
            }
1865
6
            for (auto& k : versioned_meta_tablet_keys) {
1866
                // Remove all versions of meta tablet for recycled tablet
1867
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
1868
6
                versioned_remove_all(txn.get(), k);
1869
6
            }
1870
5
        }
1871
237
        for (auto& k : tablet_idx_keys) {
1872
237
            txn->remove(k);
1873
237
        }
1874
237
        for (auto& k : restore_job_keys) {
1875
237
            txn->remove(k);
1876
237
        }
1877
42
        for (auto& k : init_rs_keys) {
1878
0
            txn->remove(k);
1879
0
        }
1880
42
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1881
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1882
0
                         << ", err=" << err;
1883
0
            return -1;
1884
0
        }
1885
42
        return 0;
1886
42
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
1811
4
    auto loop_done = [&, this]() -> int {
1812
4
        bool finished = true;
1813
4
        auto tablet_keys = sync_executor.when_all(&finished);
1814
4
        if (!finished) {
1815
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
1816
0
            return -1;
1817
0
        }
1818
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
1819
        // sort the vector using key's order
1820
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
1821
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
1822
2
        bool use_range_remove = true;
1823
4.00k
        for (auto& [_, remove] : tablet_keys) {
1824
4.00k
            if (!remove) {
1825
0
                use_range_remove = remove;
1826
0
                break;
1827
0
            }
1828
4.00k
        }
1829
2
        DORIS_CLOUD_DEFER {
1830
2
            tablet_idx_keys.clear();
1831
2
            restore_job_keys.clear();
1832
2
            init_rs_keys.clear();
1833
2
            tablet_compact_stats_keys.clear();
1834
2
            tablet_load_stats_keys.clear();
1835
2
            versioned_meta_tablet_keys.clear();
1836
2
        };
1837
2
        std::unique_ptr<Transaction> txn;
1838
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1839
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
1840
0
            return -1;
1841
0
        }
1842
2
        std::string tablet_key_end;
1843
2
        if (!tablet_keys.empty()) {
1844
2
            if (use_range_remove) {
1845
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
1846
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
1847
2
            } else {
1848
0
                for (auto& [k, _] : tablet_keys) {
1849
0
                    txn->remove(k);
1850
0
                }
1851
0
            }
1852
2
        }
1853
2
        if (is_multi_version) {
1854
0
            for (auto& k : tablet_compact_stats_keys) {
1855
                // Remove all versions of tablet compact stats for recycled tablet
1856
0
                LOG_INFO("remove versioned tablet compact stats key")
1857
0
                        .tag("compact_stats_key", hex(k));
1858
0
                versioned_remove_all(txn.get(), k);
1859
0
            }
1860
0
            for (auto& k : tablet_load_stats_keys) {
1861
                // Remove all versions of tablet load stats for recycled tablet
1862
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
1863
0
                versioned_remove_all(txn.get(), k);
1864
0
            }
1865
0
            for (auto& k : versioned_meta_tablet_keys) {
1866
                // Remove all versions of meta tablet for recycled tablet
1867
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
1868
0
                versioned_remove_all(txn.get(), k);
1869
0
            }
1870
0
        }
1871
4.00k
        for (auto& k : tablet_idx_keys) {
1872
4.00k
            txn->remove(k);
1873
4.00k
        }
1874
4.00k
        for (auto& k : restore_job_keys) {
1875
4.00k
            txn->remove(k);
1876
4.00k
        }
1877
2
        for (auto& k : init_rs_keys) {
1878
0
            txn->remove(k);
1879
0
        }
1880
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
1881
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
1882
0
                         << ", err=" << err;
1883
0
            return -1;
1884
0
        }
1885
2
        return 0;
1886
2
    };
1887
1888
46
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
1889
46
                               std::move(loop_done));
1890
46
    if (ret != 0) {
1891
2
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
1892
2
        return ret;
1893
2
    }
1894
1895
    // directly remove tablet stats and tablet jobs of these dropped index or partition
1896
44
    std::unique_ptr<Transaction> txn;
1897
44
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
1898
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
1899
0
        return -1;
1900
0
    }
1901
44
    txn->remove(stats_key_begin, stats_key_end);
1902
44
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
1903
44
                 << " end=" << hex(stats_key_end);
1904
44
    txn->remove(job_key_begin, job_key_end);
1905
44
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
1906
44
    std::string schema_key_begin, schema_key_end;
1907
44
    std::string schema_dict_key;
1908
44
    std::string versioned_schema_key_begin, versioned_schema_key_end;
1909
44
    if (partition_id <= 0) {
1910
        // Delete schema kv of this index
1911
12
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
1912
12
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
1913
12
        txn->remove(schema_key_begin, schema_key_end);
1914
12
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
1915
12
                     << " end=" << hex(schema_key_end);
1916
12
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
1917
12
        txn->remove(schema_dict_key);
1918
12
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
1919
12
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
1920
12
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
1921
12
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
1922
12
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
1923
12
                     << " end=" << hex(versioned_schema_key_end);
1924
12
    }
1925
1926
44
    TxnErrorCode err = txn->commit();
1927
44
    if (err != TxnErrorCode::TXN_OK) {
1928
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
1929
0
                     << " err=" << err;
1930
0
        return -1;
1931
0
    }
1932
1933
44
    return ret;
1934
44
}
1935
1936
4.03k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
1937
4.03k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
1938
4.03k
    int64_t num_segments = rs_meta_pb.num_segments();
1939
4.03k
    if (num_segments <= 0) return 0;
1940
1941
    // Process inverted indexes
1942
4.02k
    std::vector<std::pair<int64_t, std::string>> index_ids;
1943
    // default format as v1.
1944
4.02k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
1945
4.02k
    bool delete_rowset_data_by_prefix = false;
1946
4.02k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
1947
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
1948
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
1949
0
        delete_rowset_data_by_prefix = true;
1950
4.02k
    } else if (rs_meta_pb.has_tablet_schema()) {
1951
8.00k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
1952
8.00k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
1953
8.00k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
1954
8.00k
            }
1955
8.00k
        }
1956
4.00k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
1957
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
1958
2.00k
        }
1959
4.00k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
1960
        // schema version and index id are not found, delete rowset data by prefix directly.
1961
0
        delete_rowset_data_by_prefix = true;
1962
28
    } else {
1963
        // otherwise, try to get schema kv
1964
28
        InvertedIndexInfo index_info;
1965
28
        int inverted_index_get_ret = inverted_index_id_cache_->get(
1966
28
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
1967
28
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
1968
28
                                 &inverted_index_get_ret);
1969
28
        if (inverted_index_get_ret == 0) {
1970
28
            index_format = index_info.first;
1971
28
            index_ids = index_info.second;
1972
28
        } else if (inverted_index_get_ret == 1) {
1973
            // 1. Schema kv not found means tablet has been recycled
1974
            // Maybe some tablet recycle failed by some bugs
1975
            // We need to delete again to double check
1976
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
1977
            // because we are uncertain about the inverted index information.
1978
            // If there are inverted indexes, some data might not be deleted,
1979
            // but this is acceptable as we have made our best effort to delete the data.
1980
0
            LOG_INFO(
1981
0
                    "delete rowset data schema kv not found, need to delete again to double "
1982
0
                    "check")
1983
0
                    .tag("instance_id", instance_id_)
1984
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
1985
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
1986
            // Currently index_ids is guaranteed to be empty,
1987
            // but we clear it again here as a safeguard against future code changes
1988
            // that might cause index_ids to no longer be empty
1989
0
            index_format = InvertedIndexStorageFormatPB::V2;
1990
0
            index_ids.clear();
1991
0
        } else {
1992
            // failed to get schema kv, delete rowset data by prefix directly.
1993
0
            delete_rowset_data_by_prefix = true;
1994
0
        }
1995
28
    }
1996
1997
4.02k
    if (delete_rowset_data_by_prefix) {
1998
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
1999
0
                                  rs_meta_pb.rowset_id_v2());
2000
0
    }
2001
2002
4.02k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
2003
4.02k
    if (it == accessor_map_.end()) {
2004
0
        LOG_WARNING("instance has no such resource id")
2005
0
                .tag("instance_id", instance_id_)
2006
0
                .tag("resource_id", rs_meta_pb.resource_id());
2007
0
        return -1;
2008
0
    }
2009
4.02k
    auto& accessor = it->second;
2010
4.02k
    int64_t tablet_id = rs_meta_pb.tablet_id();
2011
4.02k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
2012
4.02k
    std::vector<std::string> file_paths;
2013
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
2014
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2015
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
2016
40.0k
            for (const auto& index_id : index_ids) {
2017
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
2018
40.0k
                                                            index_id.second));
2019
40.0k
            }
2020
20.0k
        } else if (!index_ids.empty()) {
2021
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
2022
0
        }
2023
20.0k
    }
2024
2025
    // Process delete bitmap
2026
4.02k
    file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2027
    // TODO(AlexYue): seems could do do batch
2028
4.02k
    return accessor->delete_files(file_paths);
2029
4.02k
}
2030
2031
int InstanceRecycler::delete_rowset_data(
2032
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
2033
36
        RecyclerMetricsContext& metrics_context) {
2034
36
    int ret = 0;
2035
    // resource_id -> file_paths
2036
36
    std::map<std::string, std::vector<std::string>> resource_file_paths;
2037
    // (resource_id, tablet_id, rowset_id)
2038
36
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
2039
36
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
2040
2041
54.1k
    for (const auto& [_, rs] : rowsets) {
2042
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
2043
        // due to aborted schema change.
2044
54.1k
        if (is_formal_rowset) {
2045
3.12k
            std::lock_guard lock(recycled_tablets_mtx_);
2046
3.12k
            if (recycled_tablets_.count(rs.tablet_id())) {
2047
0
                continue; // Rowset data has already been deleted
2048
0
            }
2049
3.12k
        }
2050
2051
54.1k
        auto it = accessor_map_.find(rs.resource_id());
2052
        // possible if the accessor is not initilized correctly
2053
54.1k
        if (it == accessor_map_.end()) [[unlikely]] {
2054
1
            LOG_WARNING("instance has no such resource id")
2055
1
                    .tag("instance_id", instance_id_)
2056
1
                    .tag("resource_id", rs.resource_id());
2057
1
            ret = -1;
2058
1
            continue;
2059
1
        }
2060
2061
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
2062
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
2063
54.1k
        int64_t tablet_id = rs.tablet_id();
2064
54.1k
        int64_t num_segments = rs.num_segments();
2065
54.1k
        if (num_segments <= 0) {
2066
0
            metrics_context.total_recycled_num++;
2067
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
2068
0
            continue;
2069
0
        }
2070
2071
        // Process delete bitmap
2072
54.1k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2073
2074
        // Process inverted indexes
2075
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
2076
        // default format as v1.
2077
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2078
54.1k
        int inverted_index_get_ret = 0;
2079
54.1k
        if (rs.has_tablet_schema()) {
2080
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
2081
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2082
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2083
53.5k
                }
2084
53.5k
            }
2085
26.5k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
2086
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
2087
26.5k
            }
2088
27.5k
        } else {
2089
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
2090
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
2091
0
                                "instance_id="
2092
0
                             << instance_id_ << " tablet_id=" << tablet_id
2093
0
                             << " rowset_id=" << rowset_id;
2094
0
                ret = -1;
2095
0
                continue;
2096
0
            }
2097
27.5k
            InvertedIndexInfo index_info;
2098
27.5k
            inverted_index_get_ret =
2099
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
2100
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2101
27.5k
                                     &inverted_index_get_ret);
2102
27.5k
            if (inverted_index_get_ret == 0) {
2103
27.0k
                index_format = index_info.first;
2104
27.0k
                index_ids = index_info.second;
2105
27.0k
            } else if (inverted_index_get_ret == 1) {
2106
                // 1. Schema kv not found means tablet has been recycled
2107
                // Maybe some tablet recycle failed by some bugs
2108
                // We need to delete again to double check
2109
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2110
                // because we are uncertain about the inverted index information.
2111
                // If there are inverted indexes, some data might not be deleted,
2112
                // but this is acceptable as we have made our best effort to delete the data.
2113
503
                LOG_INFO(
2114
503
                        "delete rowset data schema kv not found, need to delete again to double "
2115
503
                        "check")
2116
503
                        .tag("instance_id", instance_id_)
2117
503
                        .tag("tablet_id", tablet_id)
2118
503
                        .tag("rowset", rs.ShortDebugString());
2119
                // Currently index_ids is guaranteed to be empty,
2120
                // but we clear it again here as a safeguard against future code changes
2121
                // that might cause index_ids to no longer be empty
2122
503
                index_format = InvertedIndexStorageFormatPB::V2;
2123
503
                index_ids.clear();
2124
18.4E
            } else {
2125
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
2126
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
2127
18.4E
                ret = -1;
2128
18.4E
                continue;
2129
18.4E
            }
2130
27.5k
        }
2131
54.1k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2132
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2133
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2134
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
2135
5
            continue;
2136
5
        }
2137
324k
        for (int64_t i = 0; i < num_segments; ++i) {
2138
270k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2139
270k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
2140
539k
                for (const auto& index_id : index_ids) {
2141
539k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
2142
539k
                                                                index_id.first, index_id.second));
2143
539k
                }
2144
268k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
2145
                // try to recycle inverted index v2 when get_ret == 1
2146
                // we treat schema not found as if it has a v2 format inverted index
2147
                // to reduce chance of data leakage
2148
2.50k
                if (inverted_index_get_ret == 1) {
2149
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
2150
2.50k
                            .tag("instance_id", instance_id_)
2151
2.50k
                            .tag("inverted index v2 path",
2152
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
2153
2.50k
                }
2154
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
2155
2.50k
            }
2156
270k
        }
2157
54.1k
    }
2158
2159
36
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
2160
36
                                                 "delete_rowset_data",
2161
38
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
2161
38
                                                 [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
2162
36
    for (auto& [resource_id, file_paths] : resource_file_paths) {
2163
33
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
2164
33
            DCHECK(accessor_map_.count(*rid))
2165
0
                    << "uninitilized accessor, instance_id=" << instance_id_
2166
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
2167
33
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
2168
33
                                     &accessor_map_);
2169
33
            if (!accessor_map_.contains(*rid)) {
2170
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
2171
0
                        .tag("resource_id", resource_id)
2172
0
                        .tag("instance_id", instance_id_);
2173
0
                return -1;
2174
0
            }
2175
33
            auto& accessor = accessor_map_[*rid];
2176
33
            int ret = accessor->delete_files(*paths);
2177
33
            if (!ret) {
2178
                // deduplication of different files with the same rowset id
2179
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
2180
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
2181
33
                std::set<std::string> deleted_rowset_id;
2182
2183
33
                std::for_each(paths->begin(), paths->end(),
2184
33
                              [&metrics_context, &rowsets, &deleted_rowset_id,
2185
862k
                               this](const std::string& path) {
2186
862k
                                  std::vector<std::string> str;
2187
862k
                                  butil::SplitString(path, '/', &str);
2188
862k
                                  std::string rowset_id;
2189
862k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
2190
859k
                                      rowset_id = str.back().substr(0, pos);
2191
859k
                                  } else {
2192
2.94k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
2193
2.94k
                                      return;
2194
2.94k
                                  }
2195
859k
                                  auto rs_meta = rowsets.find(rowset_id);
2196
859k
                                  if (rs_meta != rowsets.end() &&
2197
862k
                                      !deleted_rowset_id.contains(rowset_id)) {
2198
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
2199
54.1k
                                      metrics_context.total_recycled_data_size +=
2200
54.1k
                                              rs_meta->second.total_disk_size();
2201
54.1k
                                      segment_metrics_context_.total_recycled_num +=
2202
54.1k
                                              rs_meta->second.num_segments();
2203
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
2204
54.1k
                                              rs_meta->second.total_disk_size();
2205
54.1k
                                      metrics_context.total_recycled_num++;
2206
54.1k
                                  }
2207
859k
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
2185
862k
                               this](const std::string& path) {
2186
862k
                                  std::vector<std::string> str;
2187
862k
                                  butil::SplitString(path, '/', &str);
2188
862k
                                  std::string rowset_id;
2189
862k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
2190
859k
                                      rowset_id = str.back().substr(0, pos);
2191
859k
                                  } else {
2192
2.94k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
2193
2.94k
                                      return;
2194
2.94k
                                  }
2195
859k
                                  auto rs_meta = rowsets.find(rowset_id);
2196
859k
                                  if (rs_meta != rowsets.end() &&
2197
862k
                                      !deleted_rowset_id.contains(rowset_id)) {
2198
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
2199
54.1k
                                      metrics_context.total_recycled_data_size +=
2200
54.1k
                                              rs_meta->second.total_disk_size();
2201
54.1k
                                      segment_metrics_context_.total_recycled_num +=
2202
54.1k
                                              rs_meta->second.num_segments();
2203
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
2204
54.1k
                                              rs_meta->second.total_disk_size();
2205
54.1k
                                      metrics_context.total_recycled_num++;
2206
54.1k
                                  }
2207
859k
                              });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
2208
33
                segment_metrics_context_.report();
2209
33
                metrics_context.report();
2210
33
            }
2211
33
            return ret;
2212
33
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2163
33
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
2164
33
            DCHECK(accessor_map_.count(*rid))
2165
0
                    << "uninitilized accessor, instance_id=" << instance_id_
2166
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
2167
33
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
2168
33
                                     &accessor_map_);
2169
33
            if (!accessor_map_.contains(*rid)) {
2170
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
2171
0
                        .tag("resource_id", resource_id)
2172
0
                        .tag("instance_id", instance_id_);
2173
0
                return -1;
2174
0
            }
2175
33
            auto& accessor = accessor_map_[*rid];
2176
33
            int ret = accessor->delete_files(*paths);
2177
33
            if (!ret) {
2178
                // deduplication of different files with the same rowset id
2179
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
2180
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
2181
33
                std::set<std::string> deleted_rowset_id;
2182
2183
33
                std::for_each(paths->begin(), paths->end(),
2184
33
                              [&metrics_context, &rowsets, &deleted_rowset_id,
2185
33
                               this](const std::string& path) {
2186
33
                                  std::vector<std::string> str;
2187
33
                                  butil::SplitString(path, '/', &str);
2188
33
                                  std::string rowset_id;
2189
33
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
2190
33
                                      rowset_id = str.back().substr(0, pos);
2191
33
                                  } else {
2192
33
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
2193
33
                                      return;
2194
33
                                  }
2195
33
                                  auto rs_meta = rowsets.find(rowset_id);
2196
33
                                  if (rs_meta != rowsets.end() &&
2197
33
                                      !deleted_rowset_id.contains(rowset_id)) {
2198
33
                                      deleted_rowset_id.emplace(rowset_id);
2199
33
                                      metrics_context.total_recycled_data_size +=
2200
33
                                              rs_meta->second.total_disk_size();
2201
33
                                      segment_metrics_context_.total_recycled_num +=
2202
33
                                              rs_meta->second.num_segments();
2203
33
                                      segment_metrics_context_.total_recycled_data_size +=
2204
33
                                              rs_meta->second.total_disk_size();
2205
33
                                      metrics_context.total_recycled_num++;
2206
33
                                  }
2207
33
                              });
2208
33
                segment_metrics_context_.report();
2209
33
                metrics_context.report();
2210
33
            }
2211
33
            return ret;
2212
33
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
2213
33
    }
2214
36
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
2215
5
        LOG_INFO(
2216
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
2217
5
                "resource_id={}, tablet_id={}, instance_id={}",
2218
5
                rowset_id, resource_id, tablet_id, instance_id_);
2219
5
        concurrent_delete_executor.add([&]() -> int {
2220
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
2221
5
            if (!ret) {
2222
5
                auto rs = rowsets.at(rowset_id);
2223
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
2224
5
                metrics_context.total_recycled_num++;
2225
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
2226
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
2227
5
                metrics_context.report();
2228
5
                segment_metrics_context_.report();
2229
5
            }
2230
5
            return ret;
2231
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
2219
5
        concurrent_delete_executor.add([&]() -> int {
2220
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
2221
5
            if (!ret) {
2222
5
                auto rs = rowsets.at(rowset_id);
2223
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
2224
5
                metrics_context.total_recycled_num++;
2225
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
2226
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
2227
5
                metrics_context.report();
2228
5
                segment_metrics_context_.report();
2229
5
            }
2230
5
            return ret;
2231
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
2232
5
    }
2233
2234
36
    bool finished = true;
2235
36
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2236
38
    for (int r : rets) {
2237
38
        if (r != 0) {
2238
0
            ret = -1;
2239
0
            break;
2240
0
        }
2241
38
    }
2242
36
    ret = finished ? ret : -1;
2243
36
    return ret;
2244
36
}
2245
2246
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
2247
2.90k
                                         const std::string& rowset_id) {
2248
2.90k
    auto it = accessor_map_.find(resource_id);
2249
2.90k
    if (it == accessor_map_.end()) {
2250
0
        LOG_WARNING("instance has no such resource id")
2251
0
                .tag("instance_id", instance_id_)
2252
0
                .tag("resource_id", resource_id)
2253
0
                .tag("tablet_id", tablet_id)
2254
0
                .tag("rowset_id", rowset_id);
2255
0
        return -1;
2256
0
    }
2257
2.90k
    auto& accessor = it->second;
2258
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
2259
2.90k
}
2260
2261
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
2262
                                                  RecyclerMetricsContext& metrics_context,
2263
0
                                                  int64_t partition_id, bool is_empty_tablet) {
2264
0
    std::string tablet_key_begin, tablet_key_end;
2265
2266
0
    if (partition_id > 0) {
2267
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2268
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2269
0
    } else {
2270
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2271
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2272
0
    }
2273
    // for calculate the total num or bytes of recyled objects
2274
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
2275
0
                                                          std::string_view v) -> int {
2276
0
        doris::TabletMetaCloudPB tablet_meta_pb;
2277
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2278
0
            return 0;
2279
0
        }
2280
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2281
2282
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2283
0
            return 0;
2284
0
        }
2285
2286
0
        if (!is_empty_tablet) {
2287
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
2288
0
                return 0;
2289
0
            }
2290
0
            tablet_metrics_context_.total_need_recycle_num++;
2291
0
        }
2292
0
        return 0;
2293
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
2294
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
2295
0
    metrics_context.report(true);
2296
0
    tablet_metrics_context_.report(true);
2297
0
    segment_metrics_context_.report(true);
2298
0
    return ret;
2299
0
}
2300
2301
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
2302
0
                                                 RecyclerMetricsContext& metrics_context) {
2303
0
    int ret = 0;
2304
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
2305
0
    std::unique_ptr<Transaction> txn;
2306
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2307
0
        LOG_WARNING("failed to recycle tablet ")
2308
0
                .tag("tablet id", tablet_id)
2309
0
                .tag("instance_id", instance_id_)
2310
0
                .tag("reason", "failed to create txn");
2311
0
        ret = -1;
2312
0
    }
2313
0
    GetRowsetResponse resp;
2314
0
    std::string msg;
2315
0
    MetaServiceCode code = MetaServiceCode::OK;
2316
    // get rowsets in tablet
2317
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2318
0
                        tablet_id, code, msg, &resp);
2319
0
    if (code != MetaServiceCode::OK) {
2320
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2321
0
                .tag("tablet id", tablet_id)
2322
0
                .tag("msg", msg)
2323
0
                .tag("code", code)
2324
0
                .tag("instance id", instance_id_);
2325
0
        ret = -1;
2326
0
    }
2327
0
    for (const auto& rs_meta : resp.rowset_meta()) {
2328
        /*
2329
        * For compatibility, we skip the loop for [0-1] here.
2330
        * The purpose of this loop is to delete object files,
2331
        * and since [0-1] only has meta and doesn't have object files,
2332
        * skipping it doesn't affect system correctness.
2333
        *
2334
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
2335
        * would return error -1 directly, causing the recycle operation to fail.
2336
        *
2337
        * [0-1] doesn't have resource id is a bug.
2338
        * In the future, we will fix this problem, after that,
2339
        * we can remove this if statement.
2340
        *
2341
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
2342
        */
2343
2344
0
        if (rs_meta.end_version() == 1) {
2345
            // Assert that [0-1] has no resource_id to make sure
2346
            // this if statement will not be forgetted to remove
2347
            // when the resource id bug is fixed
2348
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2349
0
            continue;
2350
0
        }
2351
0
        if (!rs_meta.has_resource_id()) {
2352
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2353
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2354
0
                    .tag("instance_id", instance_id_)
2355
0
                    .tag("tablet_id", tablet_id);
2356
0
            continue;
2357
0
        }
2358
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2359
0
        auto it = accessor_map_.find(rs_meta.resource_id());
2360
        // possible if the accessor is not initilized correctly
2361
0
        if (it == accessor_map_.end()) [[unlikely]] {
2362
0
            LOG_WARNING(
2363
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2364
0
                    "recycle process")
2365
0
                    .tag("tablet id", tablet_id)
2366
0
                    .tag("instance_id", instance_id_)
2367
0
                    .tag("resource_id", rs_meta.resource_id())
2368
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2369
0
            continue;
2370
0
        }
2371
2372
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
2373
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2374
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
2375
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
2376
0
    }
2377
0
    return ret;
2378
0
}
2379
2380
4.24k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
2381
4.24k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
2382
4.24k
            .tag("instance_id", instance_id_)
2383
4.24k
            .tag("tablet_id", tablet_id);
2384
2385
4.24k
    if (instance_info_.has_multi_version_status() &&
2386
4.24k
        instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) {
2387
6
        return recycle_versioned_tablet(tablet_id, metrics_context);
2388
6
    }
2389
2390
4.23k
    int ret = 0;
2391
4.23k
    auto start_time = steady_clock::now();
2392
2393
4.23k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
2394
2395
    // collect resource ids
2396
234
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
2397
234
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
2398
234
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
2399
234
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
2400
234
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
2401
234
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
2402
2403
234
    std::set<std::string> resource_ids;
2404
234
    int64_t recycle_rowsets_number = 0;
2405
234
    int64_t recycle_segments_number = 0;
2406
234
    int64_t recycle_rowsets_data_size = 0;
2407
234
    int64_t recycle_rowsets_index_size = 0;
2408
234
    int64_t recycle_restore_job_rowsets_number = 0;
2409
234
    int64_t recycle_restore_job_segments_number = 0;
2410
234
    int64_t recycle_restore_job_rowsets_data_size = 0;
2411
234
    int64_t recycle_restore_job_rowsets_index_size = 0;
2412
234
    int64_t max_rowset_version = 0;
2413
234
    int64_t min_rowset_creation_time = INT64_MAX;
2414
234
    int64_t max_rowset_creation_time = 0;
2415
234
    int64_t min_rowset_expiration_time = INT64_MAX;
2416
234
    int64_t max_rowset_expiration_time = 0;
2417
2418
234
    DORIS_CLOUD_DEFER {
2419
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2420
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2421
234
                .tag("instance_id", instance_id_)
2422
234
                .tag("tablet_id", tablet_id)
2423
234
                .tag("recycle rowsets number", recycle_rowsets_number)
2424
234
                .tag("recycle segments number", recycle_segments_number)
2425
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2426
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2427
234
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
2428
234
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
2429
234
                .tag("all restore job rowsets recycle data size",
2430
234
                     recycle_restore_job_rowsets_data_size)
2431
234
                .tag("all restore job rowsets recycle index size",
2432
234
                     recycle_restore_job_rowsets_index_size)
2433
234
                .tag("max rowset version", max_rowset_version)
2434
234
                .tag("min rowset creation time", min_rowset_creation_time)
2435
234
                .tag("max rowset creation time", max_rowset_creation_time)
2436
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
2437
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
2438
234
                .tag("ret", ret);
2439
234
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2418
234
    DORIS_CLOUD_DEFER {
2419
234
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2420
234
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2421
234
                .tag("instance_id", instance_id_)
2422
234
                .tag("tablet_id", tablet_id)
2423
234
                .tag("recycle rowsets number", recycle_rowsets_number)
2424
234
                .tag("recycle segments number", recycle_segments_number)
2425
234
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2426
234
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2427
234
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
2428
234
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
2429
234
                .tag("all restore job rowsets recycle data size",
2430
234
                     recycle_restore_job_rowsets_data_size)
2431
234
                .tag("all restore job rowsets recycle index size",
2432
234
                     recycle_restore_job_rowsets_index_size)
2433
234
                .tag("max rowset version", max_rowset_version)
2434
234
                .tag("min rowset creation time", min_rowset_creation_time)
2435
234
                .tag("max rowset creation time", max_rowset_creation_time)
2436
234
                .tag("min rowset expiration time", min_rowset_expiration_time)
2437
234
                .tag("max rowset expiration time", max_rowset_expiration_time)
2438
234
                .tag("ret", ret);
2439
234
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
2440
2441
234
    std::unique_ptr<Transaction> txn;
2442
234
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2443
0
        LOG_WARNING("failed to recycle tablet ")
2444
0
                .tag("tablet id", tablet_id)
2445
0
                .tag("instance_id", instance_id_)
2446
0
                .tag("reason", "failed to create txn");
2447
0
        ret = -1;
2448
0
    }
2449
234
    GetRowsetResponse resp;
2450
234
    std::string msg;
2451
234
    MetaServiceCode code = MetaServiceCode::OK;
2452
    // get rowsets in tablet
2453
234
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
2454
234
                        tablet_id, code, msg, &resp);
2455
234
    if (code != MetaServiceCode::OK) {
2456
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2457
0
                .tag("tablet id", tablet_id)
2458
0
                .tag("msg", msg)
2459
0
                .tag("code", code)
2460
0
                .tag("instance id", instance_id_);
2461
0
        ret = -1;
2462
0
    }
2463
234
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
2464
2465
2.50k
    for (const auto& rs_meta : resp.rowset_meta()) {
2466
        // The rowset has no resource id and segments when it was generated by compaction
2467
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
2468
2.50k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
2469
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
2470
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2471
0
                    .tag("instance_id", instance_id_)
2472
0
                    .tag("tablet_id", tablet_id);
2473
0
            recycle_rowsets_number += 1;
2474
0
            continue;
2475
0
        }
2476
2.50k
        if (!rs_meta.has_resource_id()) {
2477
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2478
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
2479
1
                    .tag("instance_id", instance_id_)
2480
1
                    .tag("tablet_id", tablet_id);
2481
1
            return -1;
2482
1
        }
2483
2.50k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
2484
2.50k
        auto it = accessor_map_.find(rs_meta.resource_id());
2485
        // possible if the accessor is not initilized correctly
2486
2.50k
        if (it == accessor_map_.end()) [[unlikely]] {
2487
1
            LOG_WARNING(
2488
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2489
1
                    "recycle process")
2490
1
                    .tag("tablet id", tablet_id)
2491
1
                    .tag("instance_id", instance_id_)
2492
1
                    .tag("resource_id", rs_meta.resource_id())
2493
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2494
1
            return -1;
2495
1
        }
2496
2.50k
        recycle_rowsets_number += 1;
2497
2.50k
        recycle_segments_number += rs_meta.num_segments();
2498
2.50k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2499
2.50k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2500
2.50k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2501
2.50k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2502
2.50k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2503
2.50k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2504
2.50k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2505
2.50k
        resource_ids.emplace(rs_meta.resource_id());
2506
2.50k
    }
2507
2508
    // get restore job rowset in tablet
2509
232
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
2510
232
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
2511
232
    if (code != MetaServiceCode::OK) {
2512
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
2513
0
                .tag("tablet id", tablet_id)
2514
0
                .tag("msg", msg)
2515
0
                .tag("code", code)
2516
0
                .tag("instance id", instance_id_);
2517
0
        return -1;
2518
0
    }
2519
2520
232
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
2521
0
        if (!rs_meta.has_resource_id()) {
2522
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
2523
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
2524
0
                    .tag("instance_id", instance_id_)
2525
0
                    .tag("tablet_id", tablet_id);
2526
0
            return -1;
2527
0
        }
2528
2529
0
        auto it = accessor_map_.find(rs_meta.resource_id());
2530
        // possible if the accessor is not initilized correctly
2531
0
        if (it == accessor_map_.end()) [[unlikely]] {
2532
0
            LOG_WARNING(
2533
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
2534
0
                    "recycle process")
2535
0
                    .tag("tablet id", tablet_id)
2536
0
                    .tag("instance_id", instance_id_)
2537
0
                    .tag("resource_id", rs_meta.resource_id())
2538
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
2539
0
            return -1;
2540
0
        }
2541
0
        recycle_restore_job_rowsets_number += 1;
2542
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
2543
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
2544
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
2545
0
        resource_ids.emplace(rs_meta.resource_id());
2546
0
    }
2547
2548
232
    LOG_INFO("recycle tablet start to delete object")
2549
232
            .tag("instance id", instance_id_)
2550
232
            .tag("tablet id", tablet_id)
2551
232
            .tag("recycle tablet resource ids are",
2552
232
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
2553
232
                                 [](std::string rs_id, const auto& it) {
2554
203
                                     return rs_id.empty() ? it : rs_id + ", " + it;
2555
203
                                 }));
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
2553
203
                                 [](std::string rs_id, const auto& it) {
2554
203
                                     return rs_id.empty() ? it : rs_id + ", " + it;
2555
203
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
2556
2557
232
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
2558
232
            _thread_pool_group.s3_producer_pool,
2559
232
            fmt::format("delete tablet {} s3 rowset", tablet_id),
2560
232
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
2560
203
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
2561
2562
    // delete all rowset data in this tablet
2563
    // ATTN: there may be data leak if not all accessor initilized successfully
2564
    //       partial data deleted if the tablet is stored cross-storage vault
2565
    //       vault id is not attached to TabletMeta...
2566
232
    for (const auto& resource_id : resource_ids) {
2567
203
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
2568
203
        concurrent_delete_executor.add(
2569
203
                [&, rs_id = resource_id,
2570
203
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
2571
203
                    std::unique_ptr<int, std::function<void(int*)>> defer(
2572
203
                            (int*)0x01, [&](int*) { metrics_context.report(); });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
Line
Count
Source
2572
203
                            (int*)0x01, [&](int*) { metrics_context.report(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
2573
203
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2574
203
                    if (res != 0) {
2575
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2576
1
                                     << " path=" << accessor_ptr->uri();
2577
1
                        return std::make_pair(-1, rs_id);
2578
1
                    }
2579
202
                    return std::make_pair(0, rs_id);
2580
203
                });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
2570
203
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
2571
203
                    std::unique_ptr<int, std::function<void(int*)>> defer(
2572
203
                            (int*)0x01, [&](int*) { metrics_context.report(); });
2573
203
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
2574
203
                    if (res != 0) {
2575
1
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
2576
1
                                     << " path=" << accessor_ptr->uri();
2577
1
                        return std::make_pair(-1, rs_id);
2578
1
                    }
2579
202
                    return std::make_pair(0, rs_id);
2580
203
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
2581
203
    }
2582
2583
232
    bool finished = true;
2584
232
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
2585
232
    for (auto& r : rets) {
2586
203
        if (r.first != 0) {
2587
1
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
2588
1
            ret = -1;
2589
1
        }
2590
203
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
2591
203
    }
2592
232
    ret = finished ? ret : -1;
2593
2594
232
    if (ret != 0) { // failed recycle tablet data
2595
1
        LOG_WARNING("ret!=0")
2596
1
                .tag("finished", finished)
2597
1
                .tag("ret", ret)
2598
1
                .tag("instance_id", instance_id_)
2599
1
                .tag("tablet_id", tablet_id);
2600
1
        return ret;
2601
1
    }
2602
2603
231
    tablet_metrics_context_.total_recycled_data_size +=
2604
231
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2605
231
    tablet_metrics_context_.total_recycled_num += 1;
2606
231
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
2607
231
    segment_metrics_context_.total_recycled_data_size +=
2608
231
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2609
231
    metrics_context.total_recycled_data_size +=
2610
231
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2611
231
    tablet_metrics_context_.report();
2612
231
    segment_metrics_context_.report();
2613
231
    metrics_context.report();
2614
2615
231
    txn.reset();
2616
231
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2617
0
        LOG_WARNING("failed to recycle tablet ")
2618
0
                .tag("tablet id", tablet_id)
2619
0
                .tag("instance_id", instance_id_)
2620
0
                .tag("reason", "failed to create txn");
2621
0
        ret = -1;
2622
0
    }
2623
    // delete all rowset kv in this tablet
2624
231
    txn->remove(rs_key0, rs_key1);
2625
231
    txn->remove(recyc_rs_key0, recyc_rs_key1);
2626
231
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
2627
2628
    // remove delete bitmap for MoW table
2629
231
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
2630
231
    txn->remove(pending_key);
2631
231
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
2632
231
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
2633
231
    txn->remove(delete_bitmap_start, delete_bitmap_end);
2634
2635
231
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
2636
231
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
2637
231
    txn->remove(dbm_start_key, dbm_end_key);
2638
231
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
2639
231
              << " end=" << hex(dbm_end_key);
2640
2641
231
    TxnErrorCode err = txn->commit();
2642
231
    if (err != TxnErrorCode::TXN_OK) {
2643
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
2644
0
        ret = -1;
2645
0
    }
2646
2647
231
    if (ret == 0) {
2648
        // All object files under tablet have been deleted
2649
231
        std::lock_guard lock(recycled_tablets_mtx_);
2650
231
        recycled_tablets_.insert(tablet_id);
2651
231
    }
2652
2653
231
    return ret;
2654
232
}
2655
2656
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
2657
6
                                               RecyclerMetricsContext& metrics_context) {
2658
6
    int ret = 0;
2659
6
    auto start_time = steady_clock::now();
2660
2661
6
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
2662
2663
    // collect resource ids
2664
6
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
2665
6
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
2666
6
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
2667
6
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
2668
2669
6
    int64_t recycle_rowsets_number = 0;
2670
6
    int64_t recycle_segments_number = 0;
2671
6
    int64_t recycle_rowsets_data_size = 0;
2672
6
    int64_t recycle_rowsets_index_size = 0;
2673
6
    int64_t max_rowset_version = 0;
2674
6
    int64_t min_rowset_creation_time = INT64_MAX;
2675
6
    int64_t max_rowset_creation_time = 0;
2676
6
    int64_t min_rowset_expiration_time = INT64_MAX;
2677
6
    int64_t max_rowset_expiration_time = 0;
2678
2679
6
    DORIS_CLOUD_DEFER {
2680
6
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2681
6
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2682
6
                .tag("instance_id", instance_id_)
2683
6
                .tag("tablet_id", tablet_id)
2684
6
                .tag("recycle rowsets number", recycle_rowsets_number)
2685
6
                .tag("recycle segments number", recycle_segments_number)
2686
6
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2687
6
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2688
6
                .tag("max rowset version", max_rowset_version)
2689
6
                .tag("min rowset creation time", min_rowset_creation_time)
2690
6
                .tag("max rowset creation time", max_rowset_creation_time)
2691
6
                .tag("min rowset expiration time", min_rowset_expiration_time)
2692
6
                .tag("max rowset expiration time", max_rowset_expiration_time)
2693
6
                .tag("ret", ret);
2694
6
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
2679
6
    DORIS_CLOUD_DEFER {
2680
6
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2681
6
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
2682
6
                .tag("instance_id", instance_id_)
2683
6
                .tag("tablet_id", tablet_id)
2684
6
                .tag("recycle rowsets number", recycle_rowsets_number)
2685
6
                .tag("recycle segments number", recycle_segments_number)
2686
6
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
2687
6
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
2688
6
                .tag("max rowset version", max_rowset_version)
2689
6
                .tag("min rowset creation time", min_rowset_creation_time)
2690
6
                .tag("max rowset creation time", max_rowset_creation_time)
2691
6
                .tag("min rowset expiration time", min_rowset_expiration_time)
2692
6
                .tag("max rowset expiration time", max_rowset_expiration_time)
2693
6
                .tag("ret", ret);
2694
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
2695
2696
6
    std::unique_ptr<Transaction> txn;
2697
6
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2698
0
        LOG_WARNING("failed to recycle tablet ")
2699
0
                .tag("tablet id", tablet_id)
2700
0
                .tag("instance_id", instance_id_)
2701
0
                .tag("reason", "failed to create txn");
2702
0
        ret = -1;
2703
0
    }
2704
2705
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
2706
    // by the related operation logs.
2707
6
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
2708
6
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
2709
6
    MetaReader meta_reader(instance_id_);
2710
6
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
2711
6
    if (err == TxnErrorCode::TXN_OK) {
2712
6
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
2713
6
    }
2714
6
    if (err != TxnErrorCode::TXN_OK) {
2715
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
2716
0
                .tag("tablet id", tablet_id)
2717
0
                .tag("err", err)
2718
0
                .tag("instance id", instance_id_);
2719
0
        ret = -1;
2720
0
    }
2721
2722
6
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
2723
6
             load_rowset_metas.size(), compact_rowset_metas.size())
2724
6
            .tag("instance_id", instance_id_)
2725
6
            .tag("tablet_id", tablet_id);
2726
2727
6
    SyncExecutor<int> concurrent_delete_executor(
2728
6
            _thread_pool_group.s3_producer_pool,
2729
6
            fmt::format("delete tablet {} s3 rowset", tablet_id),
2730
30
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
2730
30
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
2731
2732
30
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
2733
30
        recycle_rowsets_number += 1;
2734
30
        recycle_segments_number += rs_meta.num_segments();
2735
30
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2736
30
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2737
30
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2738
30
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2739
30
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2740
30
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2741
30
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2742
30
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
2732
30
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
2733
30
        recycle_rowsets_number += 1;
2734
30
        recycle_segments_number += rs_meta.num_segments();
2735
30
        recycle_rowsets_data_size += rs_meta.data_disk_size();
2736
30
        recycle_rowsets_index_size += rs_meta.index_disk_size();
2737
30
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
2738
30
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
2739
30
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
2740
30
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
2741
30
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
2742
30
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
2743
2744
30
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
2745
30
        update_rowset_stats(rs_meta);
2746
30
        concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() {
2747
30
            std::string rowset_key = versioned::meta_rowset_load_key(
2748
30
                    {instance_id_, tablet_id, rs_meta_pb.end_version()});
2749
30
            return recycle_rowset_meta_and_data(encode_versioned_key(rowset_key, versionstamp),
2750
30
                                                rs_meta_pb);
2751
30
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
Line
Count
Source
2746
30
        concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() {
2747
30
            std::string rowset_key = versioned::meta_rowset_load_key(
2748
30
                    {instance_id_, tablet_id, rs_meta_pb.end_version()});
2749
30
            return recycle_rowset_meta_and_data(encode_versioned_key(rowset_key, versionstamp),
2750
30
                                                rs_meta_pb);
2751
30
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clEv
2752
30
    }
2753
2754
6
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
2755
0
        update_rowset_stats(rs_meta);
2756
0
        concurrent_delete_executor.add([tablet_id, versionstamp, rs_meta_pb = rs_meta, this]() {
2757
0
            std::string rowset_key = versioned::meta_rowset_compact_key(
2758
0
                    {instance_id_, tablet_id, rs_meta_pb.end_version()});
2759
0
            return recycle_rowset_meta_and_data(encode_versioned_key(rowset_key, versionstamp),
2760
0
                                                rs_meta_pb);
2761
0
        });
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clEv
2762
0
    }
2763
2764
6
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
2765
0
        RecycleRowsetPB recycle_rowset;
2766
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
2767
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
2768
0
            return -1;
2769
0
        }
2770
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
2771
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
2772
                // in old version, keep this key-value pair and it needs to be checked manually
2773
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
2774
0
                return -1;
2775
0
            }
2776
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
2777
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
2778
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
2779
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
2780
0
                return -1;
2781
0
            }
2782
            // decode rowset_id
2783
0
            auto k1 = k;
2784
0
            k1.remove_prefix(1);
2785
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2786
0
            decode_key(&k1, &out);
2787
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
2788
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
2789
0
            LOG_INFO("delete rowset data")
2790
0
                    .tag("instance_id", instance_id_)
2791
0
                    .tag("tablet_id", tablet_id)
2792
0
                    .tag("rowset_id", rowset_id);
2793
2794
0
            concurrent_delete_executor.add(
2795
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
2796
                        // delete by prefix, the recycle rowset key will be deleted by range later.
2797
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
2798
0
                    });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
2799
0
        } else {
2800
0
            concurrent_delete_executor.add(
2801
0
                    [k = std::string(k), recycle_rowset = std::move(recycle_rowset), this]() {
2802
0
                        return recycle_rowset_meta_and_data(k, recycle_rowset.rowset_meta());
2803
0
                    });
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE0_clEv
2804
0
        }
2805
0
        return 0;
2806
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_5clESt17basic_string_viewIcSt11char_traitsIcEES8_
2807
2808
6
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
2809
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
2810
0
                .tag("tablet id", tablet_id)
2811
0
                .tag("instance_id", instance_id_)
2812
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
2813
0
        ret = -1;
2814
0
    }
2815
2816
6
    bool finished = true;
2817
6
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
2818
30
    for (int r : rets) {
2819
30
        if (r != 0) {
2820
0
            ret = -1;
2821
0
        }
2822
30
    }
2823
2824
6
    ret = finished ? ret : -1;
2825
2826
6
    if (ret != 0) { // failed recycle tablet data
2827
0
        LOG_WARNING("ret!=0")
2828
0
                .tag("finished", finished)
2829
0
                .tag("ret", ret)
2830
0
                .tag("instance_id", instance_id_)
2831
0
                .tag("tablet_id", tablet_id);
2832
0
        return ret;
2833
0
    }
2834
2835
6
    tablet_metrics_context_.total_recycled_data_size +=
2836
6
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2837
6
    tablet_metrics_context_.total_recycled_num += 1;
2838
6
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
2839
6
    segment_metrics_context_.total_recycled_data_size +=
2840
6
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2841
6
    metrics_context.total_recycled_data_size +=
2842
6
            recycle_rowsets_data_size + recycle_rowsets_index_size;
2843
6
    tablet_metrics_context_.report();
2844
6
    segment_metrics_context_.report();
2845
6
    metrics_context.report();
2846
2847
6
    txn.reset();
2848
6
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2849
0
        LOG_WARNING("failed to recycle tablet ")
2850
0
                .tag("tablet id", tablet_id)
2851
0
                .tag("instance_id", instance_id_)
2852
0
                .tag("reason", "failed to create txn");
2853
0
        ret = -1;
2854
0
    }
2855
    // delete all rowset kv in this tablet
2856
6
    txn->remove(rs_key0, rs_key1);
2857
6
    txn->remove(recyc_rs_key0, recyc_rs_key1);
2858
2859
    // remove delete bitmap for MoW table
2860
6
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
2861
6
    txn->remove(pending_key);
2862
6
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
2863
6
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
2864
6
    txn->remove(delete_bitmap_start, delete_bitmap_end);
2865
2866
6
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
2867
6
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
2868
6
    txn->remove(dbm_start_key, dbm_end_key);
2869
6
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
2870
6
              << " end=" << hex(dbm_end_key);
2871
2872
6
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
2873
6
    std::string tablet_index_val;
2874
6
    err = txn->get(versioned_idx_key, &tablet_index_val);
2875
6
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
2876
0
        LOG_WARNING("failed to get tablet index kv")
2877
0
                .tag("instance_id", instance_id_)
2878
0
                .tag("tablet_id", tablet_id)
2879
0
                .tag("err", err);
2880
0
        ret = -1;
2881
6
    } else if (err == TxnErrorCode::TXN_OK) {
2882
        // If the tablet index kv exists, we need to delete it
2883
5
        TabletIndexPB tablet_index_pb;
2884
5
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
2885
0
            LOG_WARNING("failed to parse tablet index pb")
2886
0
                    .tag("instance_id", instance_id_)
2887
0
                    .tag("tablet_id", tablet_id);
2888
0
            ret = -1;
2889
5
        } else {
2890
5
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
2891
5
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
2892
5
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
2893
5
            txn->remove(versioned_inverted_idx_key);
2894
5
            txn->remove(versioned_idx_key);
2895
5
        }
2896
5
    }
2897
2898
6
    err = txn->commit();
2899
6
    if (err != TxnErrorCode::TXN_OK) {
2900
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
2901
0
        ret = -1;
2902
0
    }
2903
2904
6
    if (ret == 0) {
2905
        // All object files under tablet have been deleted
2906
6
        std::lock_guard lock(recycled_tablets_mtx_);
2907
6
        recycled_tablets_.insert(tablet_id);
2908
6
    }
2909
2910
6
    return ret;
2911
6
}
2912
2913
18
int InstanceRecycler::recycle_rowsets() {
2914
18
    if (instance_info_.has_multi_version_status() &&
2915
18
        instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED) {
2916
5
        return recycle_versioned_rowsets();
2917
5
    }
2918
2919
13
    const std::string task_name = "recycle_rowsets";
2920
13
    int64_t num_scanned = 0;
2921
13
    int64_t num_expired = 0;
2922
13
    int64_t num_prepare = 0;
2923
13
    int64_t num_compacted = 0;
2924
13
    int64_t num_empty_rowset = 0;
2925
13
    size_t total_rowset_key_size = 0;
2926
13
    size_t total_rowset_value_size = 0;
2927
13
    size_t expired_rowset_size = 0;
2928
13
    std::atomic_long num_recycled = 0;
2929
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2930
2931
13
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
2932
13
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
2933
13
    std::string recyc_rs_key0;
2934
13
    std::string recyc_rs_key1;
2935
13
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
2936
13
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
2937
2938
13
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
2939
2940
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2941
13
    register_recycle_task(task_name, start_time);
2942
2943
13
    DORIS_CLOUD_DEFER {
2944
13
        unregister_recycle_task(task_name);
2945
13
        int64_t cost =
2946
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2947
13
        metrics_context.finish_report();
2948
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
2949
13
                .tag("instance_id", instance_id_)
2950
13
                .tag("num_scanned", num_scanned)
2951
13
                .tag("num_expired", num_expired)
2952
13
                .tag("num_recycled", num_recycled)
2953
13
                .tag("num_recycled.prepare", num_prepare)
2954
13
                .tag("num_recycled.compacted", num_compacted)
2955
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
2956
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2957
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2958
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
2959
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
2943
13
    DORIS_CLOUD_DEFER {
2944
13
        unregister_recycle_task(task_name);
2945
13
        int64_t cost =
2946
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2947
13
        metrics_context.finish_report();
2948
13
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
2949
13
                .tag("instance_id", instance_id_)
2950
13
                .tag("num_scanned", num_scanned)
2951
13
                .tag("num_expired", num_expired)
2952
13
                .tag("num_recycled", num_recycled)
2953
13
                .tag("num_recycled.prepare", num_prepare)
2954
13
                .tag("num_recycled.compacted", num_compacted)
2955
13
                .tag("num_recycled.empty_rowset", num_empty_rowset)
2956
13
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
2957
13
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
2958
13
                .tag("expired_rowset_meta_size", expired_rowset_size);
2959
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
2960
2961
13
    std::vector<std::string> rowset_keys;
2962
    // rowset_id -> rowset_meta
2963
    // store rowset id and meta for statistics rs size when delete
2964
13
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
2965
2966
    // Store keys of rowset recycled by background workers
2967
13
    std::mutex async_recycled_rowset_keys_mutex;
2968
13
    std::vector<std::string> async_recycled_rowset_keys;
2969
13
    auto worker_pool = std::make_unique<SimpleThreadPool>(
2970
13
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
2971
13
    worker_pool->start();
2972
    // TODO bacth delete
2973
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
2974
4.00k
        std::string dbm_start_key =
2975
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
2976
4.00k
        std::string dbm_end_key = dbm_start_key;
2977
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
2978
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
2979
4.00k
        if (ret != 0) {
2980
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
2981
0
                         << instance_id_;
2982
0
        }
2983
4.00k
        return ret;
2984
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
2973
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
2974
4.00k
        std::string dbm_start_key =
2975
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
2976
4.00k
        std::string dbm_end_key = dbm_start_key;
2977
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
2978
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
2979
4.00k
        if (ret != 0) {
2980
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
2981
0
                         << instance_id_;
2982
0
        }
2983
4.00k
        return ret;
2984
4.00k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
2985
13
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
2986
900
                                            int64_t tablet_id, const std::string& rowset_id) {
2987
        // Try to delete rowset data in background thread
2988
900
        int ret = worker_pool->submit_with_timeout(
2989
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2990
799
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2991
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2992
0
                        return;
2993
0
                    }
2994
799
                    std::vector<std::string> keys;
2995
799
                    {
2996
799
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2997
799
                        async_recycled_rowset_keys.push_back(std::move(key));
2998
799
                        if (async_recycled_rowset_keys.size() > 100) {
2999
7
                            keys.swap(async_recycled_rowset_keys);
3000
7
                        }
3001
799
                    }
3002
799
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3003
799
                    if (keys.empty()) return;
3004
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3005
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3006
0
                                     << instance_id_;
3007
7
                    } else {
3008
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3009
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3010
7
                                           num_recycled, start_time);
3011
7
                    }
3012
7
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
2989
799
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2990
799
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2991
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2992
0
                        return;
2993
0
                    }
2994
799
                    std::vector<std::string> keys;
2995
799
                    {
2996
799
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2997
799
                        async_recycled_rowset_keys.push_back(std::move(key));
2998
799
                        if (async_recycled_rowset_keys.size() > 100) {
2999
7
                            keys.swap(async_recycled_rowset_keys);
3000
7
                        }
3001
799
                    }
3002
799
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3003
799
                    if (keys.empty()) return;
3004
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3005
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3006
0
                                     << instance_id_;
3007
7
                    } else {
3008
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3009
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3010
7
                                           num_recycled, start_time);
3011
7
                    }
3012
7
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
3013
900
                0);
3014
900
        if (ret == 0) return 0;
3015
        // Submit task failed, delete rowset data in current thread
3016
101
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3017
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3018
0
            return -1;
3019
0
        }
3020
101
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
3021
0
            return -1;
3022
0
        }
3023
101
        rowset_keys.push_back(std::move(key));
3024
101
        return 0;
3025
101
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
2986
900
                                            int64_t tablet_id, const std::string& rowset_id) {
2987
        // Try to delete rowset data in background thread
2988
900
        int ret = worker_pool->submit_with_timeout(
2989
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
2990
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
2991
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
2992
900
                        return;
2993
900
                    }
2994
900
                    std::vector<std::string> keys;
2995
900
                    {
2996
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
2997
900
                        async_recycled_rowset_keys.push_back(std::move(key));
2998
900
                        if (async_recycled_rowset_keys.size() > 100) {
2999
900
                            keys.swap(async_recycled_rowset_keys);
3000
900
                        }
3001
900
                    }
3002
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
3003
900
                    if (keys.empty()) return;
3004
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3005
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3006
900
                                     << instance_id_;
3007
900
                    } else {
3008
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3009
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3010
900
                                           num_recycled, start_time);
3011
900
                    }
3012
900
                },
3013
900
                0);
3014
900
        if (ret == 0) return 0;
3015
        // Submit task failed, delete rowset data in current thread
3016
101
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3017
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3018
0
            return -1;
3019
0
        }
3020
101
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
3021
0
            return -1;
3022
0
        }
3023
101
        rowset_keys.push_back(std::move(key));
3024
101
        return 0;
3025
101
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
3026
3027
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3028
3029
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
3030
4.00k
        ++num_scanned;
3031
4.00k
        total_rowset_key_size += k.size();
3032
4.00k
        total_rowset_value_size += v.size();
3033
4.00k
        RecycleRowsetPB rowset;
3034
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3035
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3036
0
            return -1;
3037
0
        }
3038
3039
4.00k
        int64_t current_time = ::time(nullptr);
3040
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3041
3042
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3043
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3044
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3045
4.00k
        if (current_time < expiration) { // not expired
3046
0
            return 0;
3047
0
        }
3048
4.00k
        ++num_expired;
3049
4.00k
        expired_rowset_size += v.size();
3050
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3051
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3052
                // in old version, keep this key-value pair and it needs to be checked manually
3053
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3054
0
                return -1;
3055
0
            }
3056
250
            if (rowset.resource_id().empty()) [[unlikely]] {
3057
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3058
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3059
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3060
0
                rowset_keys.emplace_back(k);
3061
0
                return -1;
3062
0
            }
3063
            // decode rowset_id
3064
250
            auto k1 = k;
3065
250
            k1.remove_prefix(1);
3066
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3067
250
            decode_key(&k1, &out);
3068
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3069
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3070
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3071
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3072
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3073
250
                                             rowset.tablet_id(), rowset_id) != 0) {
3074
0
                return -1;
3075
0
            }
3076
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
3077
250
            metrics_context.total_recycled_num++;
3078
250
            segment_metrics_context_.total_recycled_data_size +=
3079
250
                    rowset.rowset_meta().total_disk_size();
3080
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
3081
250
            segment_metrics_context_.report();
3082
250
            metrics_context.report();
3083
250
            return 0;
3084
250
        }
3085
        // TODO(plat1ko): check rowset not referenced
3086
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
3087
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3088
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3089
0
                LOG_INFO("recycle rowset that has empty resource id");
3090
0
            } else {
3091
                // other situations, keep this key-value pair and it needs to be checked manually
3092
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3093
0
                return -1;
3094
0
            }
3095
0
        }
3096
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3097
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
3098
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3099
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3100
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
3101
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3102
3.75k
                  << " rowset_meta_size=" << v.size()
3103
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
3104
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3105
            // unable to calculate file path, can only be deleted by rowset id prefix
3106
650
            num_prepare += 1;
3107
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3108
650
                                             rowset_meta->tablet_id(),
3109
650
                                             rowset_meta->rowset_id_v2()) != 0) {
3110
0
                return -1;
3111
0
            }
3112
3.10k
        } else {
3113
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
3114
3.10k
            rowset_keys.emplace_back(k);
3115
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
3116
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
3117
3.10k
                ++num_empty_rowset;
3118
3.10k
            }
3119
3.10k
        }
3120
3.75k
        return 0;
3121
3.75k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3029
4.00k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
3030
4.00k
        ++num_scanned;
3031
4.00k
        total_rowset_key_size += k.size();
3032
4.00k
        total_rowset_value_size += v.size();
3033
4.00k
        RecycleRowsetPB rowset;
3034
4.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3035
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3036
0
            return -1;
3037
0
        }
3038
3039
4.00k
        int64_t current_time = ::time(nullptr);
3040
4.00k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3041
3042
4.00k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3043
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3044
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3045
4.00k
        if (current_time < expiration) { // not expired
3046
0
            return 0;
3047
0
        }
3048
4.00k
        ++num_expired;
3049
4.00k
        expired_rowset_size += v.size();
3050
4.00k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3051
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3052
                // in old version, keep this key-value pair and it needs to be checked manually
3053
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3054
0
                return -1;
3055
0
            }
3056
250
            if (rowset.resource_id().empty()) [[unlikely]] {
3057
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3058
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3059
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3060
0
                rowset_keys.emplace_back(k);
3061
0
                return -1;
3062
0
            }
3063
            // decode rowset_id
3064
250
            auto k1 = k;
3065
250
            k1.remove_prefix(1);
3066
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3067
250
            decode_key(&k1, &out);
3068
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3069
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3070
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3071
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3072
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3073
250
                                             rowset.tablet_id(), rowset_id) != 0) {
3074
0
                return -1;
3075
0
            }
3076
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
3077
250
            metrics_context.total_recycled_num++;
3078
250
            segment_metrics_context_.total_recycled_data_size +=
3079
250
                    rowset.rowset_meta().total_disk_size();
3080
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
3081
250
            segment_metrics_context_.report();
3082
250
            metrics_context.report();
3083
250
            return 0;
3084
250
        }
3085
        // TODO(plat1ko): check rowset not referenced
3086
3.75k
        auto rowset_meta = rowset.mutable_rowset_meta();
3087
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3088
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3089
0
                LOG_INFO("recycle rowset that has empty resource id");
3090
0
            } else {
3091
                // other situations, keep this key-value pair and it needs to be checked manually
3092
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3093
0
                return -1;
3094
0
            }
3095
0
        }
3096
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3097
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
3098
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3099
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3100
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
3101
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3102
3.75k
                  << " rowset_meta_size=" << v.size()
3103
3.75k
                  << " creation_time=" << rowset_meta->creation_time();
3104
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3105
            // unable to calculate file path, can only be deleted by rowset id prefix
3106
650
            num_prepare += 1;
3107
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3108
650
                                             rowset_meta->tablet_id(),
3109
650
                                             rowset_meta->rowset_id_v2()) != 0) {
3110
0
                return -1;
3111
0
            }
3112
3.10k
        } else {
3113
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
3114
3.10k
            rowset_keys.emplace_back(k);
3115
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
3116
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
3117
3.10k
                ++num_empty_rowset;
3118
3.10k
            }
3119
3.10k
        }
3120
3.75k
        return 0;
3121
3.75k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3122
3123
21
    auto loop_done = [&]() -> int {
3124
21
        std::vector<std::string> rowset_keys_to_delete;
3125
        // rowset_id -> rowset_meta
3126
        // store rowset id and meta for statistics rs size when delete
3127
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
3128
21
        rowset_keys_to_delete.swap(rowset_keys);
3129
21
        rowsets_to_delete.swap(rowsets);
3130
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
3131
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
3132
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
3133
21
                                   metrics_context) != 0) {
3134
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
3135
0
                return;
3136
0
            }
3137
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
3138
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3139
0
                    return;
3140
0
                }
3141
3.10k
            }
3142
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
3143
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3144
0
                return;
3145
0
            }
3146
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
3147
21
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
3131
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
3132
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
3133
21
                                   metrics_context) != 0) {
3134
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
3135
0
                return;
3136
0
            }
3137
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
3138
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3139
0
                    return;
3140
0
                }
3141
3.10k
            }
3142
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
3143
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3144
0
                return;
3145
0
            }
3146
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
3147
21
        });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
3148
21
        return 0;
3149
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
3123
21
    auto loop_done = [&]() -> int {
3124
21
        std::vector<std::string> rowset_keys_to_delete;
3125
        // rowset_id -> rowset_meta
3126
        // store rowset id and meta for statistics rs size when delete
3127
21
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
3128
21
        rowset_keys_to_delete.swap(rowset_keys);
3129
21
        rowsets_to_delete.swap(rowsets);
3130
21
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
3131
21
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
3132
21
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
3133
21
                                   metrics_context) != 0) {
3134
21
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
3135
21
                return;
3136
21
            }
3137
21
            for (const auto& [_, rs] : rowsets_to_delete) {
3138
21
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3139
21
                    return;
3140
21
                }
3141
21
            }
3142
21
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
3143
21
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3144
21
                return;
3145
21
            }
3146
21
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
3147
21
        });
3148
21
        return 0;
3149
21
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
3150
3151
13
    if (config::enable_recycler_stats_metrics) {
3152
0
        scan_and_statistics_rowsets();
3153
0
    }
3154
    // recycle_func and loop_done for scan and recycle
3155
13
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
3156
13
                               std::move(loop_done));
3157
3158
13
    worker_pool->stop();
3159
3160
13
    if (!async_recycled_rowset_keys.empty()) {
3161
2
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
3162
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3163
0
            return -1;
3164
2
        } else {
3165
2
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
3166
2
        }
3167
2
    }
3168
13
    return ret;
3169
13
}
3170
3171
13
int InstanceRecycler::recycle_restore_jobs() {
3172
13
    const std::string task_name = "recycle_restore_jobs";
3173
13
    int64_t num_scanned = 0;
3174
13
    int64_t num_expired = 0;
3175
13
    int64_t num_recycled = 0;
3176
13
    int64_t num_aborted = 0;
3177
3178
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3179
3180
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
3181
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
3182
13
    std::string restore_job_key0;
3183
13
    std::string restore_job_key1;
3184
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
3185
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
3186
3187
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
3188
3189
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3190
13
    register_recycle_task(task_name, start_time);
3191
3192
13
    DORIS_CLOUD_DEFER {
3193
13
        unregister_recycle_task(task_name);
3194
13
        int64_t cost =
3195
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3196
13
        metrics_context.finish_report();
3197
3198
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
3199
13
                .tag("instance_id", instance_id_)
3200
13
                .tag("num_scanned", num_scanned)
3201
13
                .tag("num_expired", num_expired)
3202
13
                .tag("num_recycled", num_recycled)
3203
13
                .tag("num_aborted", num_aborted);
3204
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
3192
13
    DORIS_CLOUD_DEFER {
3193
13
        unregister_recycle_task(task_name);
3194
13
        int64_t cost =
3195
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3196
13
        metrics_context.finish_report();
3197
3198
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
3199
13
                .tag("instance_id", instance_id_)
3200
13
                .tag("num_scanned", num_scanned)
3201
13
                .tag("num_expired", num_expired)
3202
13
                .tag("num_recycled", num_recycled)
3203
13
                .tag("num_aborted", num_aborted);
3204
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
3205
3206
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3207
3208
13
    std::vector<std::string_view> restore_job_keys;
3209
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
3210
41
        ++num_scanned;
3211
41
        RestoreJobCloudPB restore_job_pb;
3212
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
3213
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
3214
0
            return -1;
3215
0
        }
3216
41
        int64_t expiration =
3217
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
3218
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
3219
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
3220
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
3221
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
3222
0
                   << " state=" << restore_job_pb.state();
3223
41
        int64_t current_time = ::time(nullptr);
3224
41
        if (current_time < expiration) { // not expired
3225
0
            return 0;
3226
0
        }
3227
41
        ++num_expired;
3228
3229
41
        int64_t tablet_id = restore_job_pb.tablet_id();
3230
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
3231
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
3232
3233
41
        std::unique_ptr<Transaction> txn;
3234
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3235
41
        if (err != TxnErrorCode::TXN_OK) {
3236
0
            LOG_WARNING("failed to recycle restore job")
3237
0
                    .tag("err", err)
3238
0
                    .tag("tablet id", tablet_id)
3239
0
                    .tag("instance_id", instance_id_)
3240
0
                    .tag("reason", "failed to create txn");
3241
0
            return -1;
3242
0
        }
3243
3244
41
        std::string val;
3245
41
        err = txn->get(k, &val);
3246
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
3247
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
3248
0
            return 0;
3249
0
        }
3250
41
        if (err != TxnErrorCode::TXN_OK) {
3251
0
            LOG_WARNING("failed to get kv");
3252
0
            return -1;
3253
0
        }
3254
41
        restore_job_pb.Clear();
3255
41
        if (!restore_job_pb.ParseFromString(val)) {
3256
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
3257
0
            return -1;
3258
0
        }
3259
3260
        // PREPARED or COMMITTED, change state to DROPPED and return
3261
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
3262
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
3263
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
3264
0
            restore_job_pb.set_need_recycle_data(true);
3265
0
            txn->put(k, restore_job_pb.SerializeAsString());
3266
0
            err = txn->commit();
3267
0
            if (err != TxnErrorCode::TXN_OK) {
3268
0
                LOG_WARNING("failed to commit txn: {}", err);
3269
0
                return -1;
3270
0
            }
3271
0
            num_aborted++;
3272
0
            return 0;
3273
0
        }
3274
3275
        // Change state to RECYCLING
3276
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
3277
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
3278
21
            txn->put(k, restore_job_pb.SerializeAsString());
3279
21
            err = txn->commit();
3280
21
            if (err != TxnErrorCode::TXN_OK) {
3281
0
                LOG_WARNING("failed to commit txn: {}", err);
3282
0
                return -1;
3283
0
            }
3284
21
            return 0;
3285
21
        }
3286
3287
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
3288
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
3289
3290
        // Recycle all data associated with the restore job.
3291
        // This includes rowsets, segments, and related resources.
3292
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
3293
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
3294
0
            LOG_WARNING("failed to recycle tablet")
3295
0
                    .tag("tablet_id", tablet_id)
3296
0
                    .tag("instance_id", instance_id_);
3297
0
            return -1;
3298
0
        }
3299
3300
        // delete all restore job rowset kv
3301
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
3302
3303
20
        err = txn->commit();
3304
20
        if (err != TxnErrorCode::TXN_OK) {
3305
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
3306
0
                    .tag("err", err)
3307
0
                    .tag("tablet id", tablet_id)
3308
0
                    .tag("instance_id", instance_id_)
3309
0
                    .tag("reason", "failed to commit txn");
3310
0
            return -1;
3311
0
        }
3312
3313
20
        metrics_context.total_recycled_num = ++num_recycled;
3314
20
        metrics_context.report();
3315
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3316
20
        restore_job_keys.push_back(k);
3317
3318
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
3319
20
                  << " tablet_id=" << tablet_id;
3320
20
        return 0;
3321
20
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3209
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
3210
41
        ++num_scanned;
3211
41
        RestoreJobCloudPB restore_job_pb;
3212
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
3213
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
3214
0
            return -1;
3215
0
        }
3216
41
        int64_t expiration =
3217
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
3218
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
3219
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
3220
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
3221
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
3222
0
                   << " state=" << restore_job_pb.state();
3223
41
        int64_t current_time = ::time(nullptr);
3224
41
        if (current_time < expiration) { // not expired
3225
0
            return 0;
3226
0
        }
3227
41
        ++num_expired;
3228
3229
41
        int64_t tablet_id = restore_job_pb.tablet_id();
3230
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
3231
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
3232
3233
41
        std::unique_ptr<Transaction> txn;
3234
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3235
41
        if (err != TxnErrorCode::TXN_OK) {
3236
0
            LOG_WARNING("failed to recycle restore job")
3237
0
                    .tag("err", err)
3238
0
                    .tag("tablet id", tablet_id)
3239
0
                    .tag("instance_id", instance_id_)
3240
0
                    .tag("reason", "failed to create txn");
3241
0
            return -1;
3242
0
        }
3243
3244
41
        std::string val;
3245
41
        err = txn->get(k, &val);
3246
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
3247
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
3248
0
            return 0;
3249
0
        }
3250
41
        if (err != TxnErrorCode::TXN_OK) {
3251
0
            LOG_WARNING("failed to get kv");
3252
0
            return -1;
3253
0
        }
3254
41
        restore_job_pb.Clear();
3255
41
        if (!restore_job_pb.ParseFromString(val)) {
3256
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
3257
0
            return -1;
3258
0
        }
3259
3260
        // PREPARED or COMMITTED, change state to DROPPED and return
3261
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
3262
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
3263
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
3264
0
            restore_job_pb.set_need_recycle_data(true);
3265
0
            txn->put(k, restore_job_pb.SerializeAsString());
3266
0
            err = txn->commit();
3267
0
            if (err != TxnErrorCode::TXN_OK) {
3268
0
                LOG_WARNING("failed to commit txn: {}", err);
3269
0
                return -1;
3270
0
            }
3271
0
            num_aborted++;
3272
0
            return 0;
3273
0
        }
3274
3275
        // Change state to RECYCLING
3276
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
3277
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
3278
21
            txn->put(k, restore_job_pb.SerializeAsString());
3279
21
            err = txn->commit();
3280
21
            if (err != TxnErrorCode::TXN_OK) {
3281
0
                LOG_WARNING("failed to commit txn: {}", err);
3282
0
                return -1;
3283
0
            }
3284
21
            return 0;
3285
21
        }
3286
3287
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
3288
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
3289
3290
        // Recycle all data associated with the restore job.
3291
        // This includes rowsets, segments, and related resources.
3292
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
3293
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
3294
0
            LOG_WARNING("failed to recycle tablet")
3295
0
                    .tag("tablet_id", tablet_id)
3296
0
                    .tag("instance_id", instance_id_);
3297
0
            return -1;
3298
0
        }
3299
3300
        // delete all restore job rowset kv
3301
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
3302
3303
20
        err = txn->commit();
3304
20
        if (err != TxnErrorCode::TXN_OK) {
3305
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
3306
0
                    .tag("err", err)
3307
0
                    .tag("tablet id", tablet_id)
3308
0
                    .tag("instance_id", instance_id_)
3309
0
                    .tag("reason", "failed to commit txn");
3310
0
            return -1;
3311
0
        }
3312
3313
20
        metrics_context.total_recycled_num = ++num_recycled;
3314
20
        metrics_context.report();
3315
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
3316
20
        restore_job_keys.push_back(k);
3317
3318
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
3319
20
                  << " tablet_id=" << tablet_id;
3320
20
        return 0;
3321
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
3322
3323
13
    auto loop_done = [&restore_job_keys, this]() -> int {
3324
3
        if (restore_job_keys.empty()) return 0;
3325
1
        DORIS_CLOUD_DEFER {
3326
1
            restore_job_keys.clear();
3327
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3325
1
        DORIS_CLOUD_DEFER {
3326
1
            restore_job_keys.clear();
3327
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
3328
3329
1
        std::unique_ptr<Transaction> txn;
3330
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3331
1
        if (err != TxnErrorCode::TXN_OK) {
3332
0
            LOG_WARNING("failed to recycle restore job")
3333
0
                    .tag("err", err)
3334
0
                    .tag("instance_id", instance_id_)
3335
0
                    .tag("reason", "failed to create txn");
3336
0
            return -1;
3337
0
        }
3338
20
        for (auto& k : restore_job_keys) {
3339
20
            txn->remove(k);
3340
20
        }
3341
1
        err = txn->commit();
3342
1
        if (err != TxnErrorCode::TXN_OK) {
3343
0
            LOG_WARNING("failed to recycle restore job")
3344
0
                    .tag("err", err)
3345
0
                    .tag("instance_id", instance_id_)
3346
0
                    .tag("reason", "failed to commit txn");
3347
0
            return -1;
3348
0
        }
3349
1
        return 0;
3350
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
3323
3
    auto loop_done = [&restore_job_keys, this]() -> int {
3324
3
        if (restore_job_keys.empty()) return 0;
3325
1
        DORIS_CLOUD_DEFER {
3326
1
            restore_job_keys.clear();
3327
1
        };
3328
3329
1
        std::unique_ptr<Transaction> txn;
3330
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3331
1
        if (err != TxnErrorCode::TXN_OK) {
3332
0
            LOG_WARNING("failed to recycle restore job")
3333
0
                    .tag("err", err)
3334
0
                    .tag("instance_id", instance_id_)
3335
0
                    .tag("reason", "failed to create txn");
3336
0
            return -1;
3337
0
        }
3338
20
        for (auto& k : restore_job_keys) {
3339
20
            txn->remove(k);
3340
20
        }
3341
1
        err = txn->commit();
3342
1
        if (err != TxnErrorCode::TXN_OK) {
3343
0
            LOG_WARNING("failed to recycle restore job")
3344
0
                    .tag("err", err)
3345
0
                    .tag("instance_id", instance_id_)
3346
0
                    .tag("reason", "failed to commit txn");
3347
0
            return -1;
3348
0
        }
3349
1
        return 0;
3350
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
3351
3352
13
    if (config::enable_recycler_stats_metrics) {
3353
0
        scan_and_statistics_restore_jobs();
3354
0
    }
3355
3356
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
3357
13
                            std::move(loop_done));
3358
13
}
3359
3360
5
int InstanceRecycler::recycle_versioned_rowsets() {
3361
5
    const std::string task_name = "recycle_rowsets";
3362
5
    int64_t num_scanned = 0;
3363
5
    int64_t num_expired = 0;
3364
5
    int64_t num_prepare = 0;
3365
5
    int64_t num_compacted = 0;
3366
5
    int64_t num_empty_rowset = 0;
3367
5
    size_t total_rowset_key_size = 0;
3368
5
    size_t total_rowset_value_size = 0;
3369
5
    size_t expired_rowset_size = 0;
3370
5
    std::atomic_long num_recycled = 0;
3371
5
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3372
3373
5
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
3374
5
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
3375
5
    std::string recyc_rs_key0;
3376
5
    std::string recyc_rs_key1;
3377
5
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
3378
5
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
3379
3380
5
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
3381
3382
5
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3383
5
    register_recycle_task(task_name, start_time);
3384
3385
5
    DORIS_CLOUD_DEFER {
3386
5
        unregister_recycle_task(task_name);
3387
5
        int64_t cost =
3388
5
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3389
5
        metrics_context.finish_report();
3390
5
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3391
5
                .tag("instance_id", instance_id_)
3392
5
                .tag("num_scanned", num_scanned)
3393
5
                .tag("num_expired", num_expired)
3394
5
                .tag("num_recycled", num_recycled)
3395
5
                .tag("num_recycled.prepare", num_prepare)
3396
5
                .tag("num_recycled.compacted", num_compacted)
3397
5
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3398
5
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3399
5
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3400
5
                .tag("expired_rowset_meta_size", expired_rowset_size);
3401
5
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
3385
5
    DORIS_CLOUD_DEFER {
3386
5
        unregister_recycle_task(task_name);
3387
5
        int64_t cost =
3388
5
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3389
5
        metrics_context.finish_report();
3390
5
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
3391
5
                .tag("instance_id", instance_id_)
3392
5
                .tag("num_scanned", num_scanned)
3393
5
                .tag("num_expired", num_expired)
3394
5
                .tag("num_recycled", num_recycled)
3395
5
                .tag("num_recycled.prepare", num_prepare)
3396
5
                .tag("num_recycled.compacted", num_compacted)
3397
5
                .tag("num_recycled.empty_rowset", num_empty_rowset)
3398
5
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3399
5
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3400
5
                .tag("expired_rowset_meta_size", expired_rowset_size);
3401
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
3402
3403
5
    std::vector<std::string> orphan_rowset_keys;
3404
3405
    // Store keys of rowset recycled by background workers
3406
5
    std::mutex async_recycled_rowset_keys_mutex;
3407
5
    std::vector<std::string> async_recycled_rowset_keys;
3408
5
    auto worker_pool = std::make_unique<SimpleThreadPool>(
3409
5
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
3410
5
    worker_pool->start();
3411
5
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
3412
5
                                            int64_t tablet_id, const std::string& rowset_id) {
3413
        // Try to delete rowset data in background thread
3414
0
        int ret = worker_pool->submit_with_timeout(
3415
0
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
3416
0
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3417
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3418
0
                        return;
3419
0
                    }
3420
                    // The async recycled rowsets are staled format or has not been used,
3421
                    // so we don't need to check the rowset ref count key.
3422
0
                    std::vector<std::string> keys;
3423
0
                    {
3424
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
3425
0
                        async_recycled_rowset_keys.push_back(std::move(key));
3426
0
                        if (async_recycled_rowset_keys.size() > 100) {
3427
0
                            keys.swap(async_recycled_rowset_keys);
3428
0
                        }
3429
0
                    }
3430
0
                    if (keys.empty()) return;
3431
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
3432
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
3433
0
                                     << instance_id_;
3434
0
                    } else {
3435
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
3436
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
3437
0
                                           num_recycled, start_time);
3438
0
                    }
3439
0
                },
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
3440
0
                0);
3441
0
        if (ret == 0) return 0;
3442
        // Submit task failed, delete rowset data in current thread
3443
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
3444
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
3445
0
            return -1;
3446
0
        }
3447
0
        orphan_rowset_keys.push_back(std::move(key));
3448
0
        return 0;
3449
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
3450
3451
5
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3452
3453
13
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
3454
13
        ++num_scanned;
3455
13
        total_rowset_key_size += k.size();
3456
13
        total_rowset_value_size += v.size();
3457
13
        RecycleRowsetPB rowset;
3458
13
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3459
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3460
0
            return -1;
3461
0
        }
3462
3463
13
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3464
3465
13
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3466
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
3467
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3468
13
        int64_t current_time = ::time(nullptr);
3469
13
        if (current_time < final_expiration) { // not expired
3470
0
            return 0;
3471
0
        }
3472
13
        ++num_expired;
3473
13
        expired_rowset_size += v.size();
3474
13
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3475
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3476
                // in old version, keep this key-value pair and it needs to be checked manually
3477
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3478
0
                return -1;
3479
0
            }
3480
0
            if (rowset.resource_id().empty()) [[unlikely]] {
3481
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3482
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3483
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3484
0
                orphan_rowset_keys.emplace_back(k);
3485
0
                return -1;
3486
0
            }
3487
            // decode rowset_id
3488
0
            auto k1 = k;
3489
0
            k1.remove_prefix(1);
3490
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3491
0
            decode_key(&k1, &out);
3492
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3493
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3494
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3495
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3496
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3497
0
                                             rowset.tablet_id(), rowset_id) != 0) {
3498
0
                return -1;
3499
0
            }
3500
0
            return 0;
3501
0
        }
3502
        // TODO(plat1ko): check rowset not referenced
3503
13
        auto rowset_meta = rowset.mutable_rowset_meta();
3504
13
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3505
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3506
0
                LOG_INFO("recycle rowset that has empty resource id");
3507
0
            } else {
3508
                // other situations, keep this key-value pair and it needs to be checked manually
3509
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3510
0
                return -1;
3511
0
            }
3512
0
        }
3513
13
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3514
13
                  << " tablet_id=" << rowset_meta->tablet_id()
3515
13
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3516
13
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3517
13
                  << "] txn_id=" << rowset_meta->txn_id()
3518
13
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3519
13
                  << " rowset_meta_size=" << v.size()
3520
13
                  << " creation_time=" << rowset_meta->creation_time();
3521
13
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3522
            // unable to calculate file path, can only be deleted by rowset id prefix
3523
0
            num_prepare += 1;
3524
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3525
0
                                             rowset_meta->tablet_id(),
3526
0
                                             rowset_meta->rowset_id_v2()) != 0) {
3527
0
                return -1;
3528
0
            }
3529
13
        } else {
3530
13
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
3531
13
            worker_pool->submit(
3532
13
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3533
13
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3534
0
                            return;
3535
0
                        }
3536
13
                        num_compacted += is_compacted;
3537
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3538
13
                        if (rowset_meta.num_segments() == 0) {
3539
0
                            ++num_empty_rowset;
3540
0
                        }
3541
13
                    });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
3532
13
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3533
13
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3534
0
                            return;
3535
0
                        }
3536
13
                        num_compacted += is_compacted;
3537
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3538
13
                        if (rowset_meta.num_segments() == 0) {
3539
0
                            ++num_empty_rowset;
3540
0
                        }
3541
13
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
3542
13
        }
3543
13
        return 0;
3544
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3453
13
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
3454
13
        ++num_scanned;
3455
13
        total_rowset_key_size += k.size();
3456
13
        total_rowset_value_size += v.size();
3457
13
        RecycleRowsetPB rowset;
3458
13
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3459
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
3460
0
            return -1;
3461
0
        }
3462
3463
13
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3464
3465
13
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3466
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
3467
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
3468
13
        int64_t current_time = ::time(nullptr);
3469
13
        if (current_time < final_expiration) { // not expired
3470
0
            return 0;
3471
0
        }
3472
13
        ++num_expired;
3473
13
        expired_rowset_size += v.size();
3474
13
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
3475
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
3476
                // in old version, keep this key-value pair and it needs to be checked manually
3477
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3478
0
                return -1;
3479
0
            }
3480
0
            if (rowset.resource_id().empty()) [[unlikely]] {
3481
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
3482
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
3483
0
                          << hex(k) << " value=" << proto_to_json(rowset);
3484
0
                orphan_rowset_keys.emplace_back(k);
3485
0
                return -1;
3486
0
            }
3487
            // decode rowset_id
3488
0
            auto k1 = k;
3489
0
            k1.remove_prefix(1);
3490
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3491
0
            decode_key(&k1, &out);
3492
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
3493
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
3494
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3495
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
3496
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
3497
0
                                             rowset.tablet_id(), rowset_id) != 0) {
3498
0
                return -1;
3499
0
            }
3500
0
            return 0;
3501
0
        }
3502
        // TODO(plat1ko): check rowset not referenced
3503
13
        auto rowset_meta = rowset.mutable_rowset_meta();
3504
13
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
3505
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
3506
0
                LOG_INFO("recycle rowset that has empty resource id");
3507
0
            } else {
3508
                // other situations, keep this key-value pair and it needs to be checked manually
3509
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
3510
0
                return -1;
3511
0
            }
3512
0
        }
3513
13
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3514
13
                  << " tablet_id=" << rowset_meta->tablet_id()
3515
13
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
3516
13
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
3517
13
                  << "] txn_id=" << rowset_meta->txn_id()
3518
13
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
3519
13
                  << " rowset_meta_size=" << v.size()
3520
13
                  << " creation_time=" << rowset_meta->creation_time();
3521
13
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
3522
            // unable to calculate file path, can only be deleted by rowset id prefix
3523
0
            num_prepare += 1;
3524
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
3525
0
                                             rowset_meta->tablet_id(),
3526
0
                                             rowset_meta->rowset_id_v2()) != 0) {
3527
0
                return -1;
3528
0
            }
3529
13
        } else {
3530
13
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
3531
13
            worker_pool->submit(
3532
13
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
3533
13
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
3534
13
                            return;
3535
13
                        }
3536
13
                        num_compacted += is_compacted;
3537
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
3538
13
                        if (rowset_meta.num_segments() == 0) {
3539
13
                            ++num_empty_rowset;
3540
13
                        }
3541
13
                    });
3542
13
        }
3543
13
        return 0;
3544
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
3545
3546
5
    if (config::enable_recycler_stats_metrics) {
3547
0
        scan_and_statistics_rowsets();
3548
0
    }
3549
3550
5
    auto loop_done = [&]() -> int {
3551
4
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
3552
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3553
0
        }
3554
4
        orphan_rowset_keys.clear();
3555
4
        return 0;
3556
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
3550
4
    auto loop_done = [&]() -> int {
3551
4
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
3552
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3553
0
        }
3554
4
        orphan_rowset_keys.clear();
3555
4
        return 0;
3556
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
3557
3558
    // recycle_func and loop_done for scan and recycle
3559
5
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
3560
5
                               std::move(loop_done));
3561
3562
5
    worker_pool->stop();
3563
3564
5
    if (!async_recycled_rowset_keys.empty()) {
3565
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
3566
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
3567
0
            return -1;
3568
0
        } else {
3569
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
3570
0
        }
3571
0
    }
3572
5
    return ret;
3573
5
}
3574
3575
int InstanceRecycler::recycle_rowset_meta_and_data(std::string_view recycle_rowset_key,
3576
43
                                                   const RowsetMetaCloudPB& rowset_meta) {
3577
43
    constexpr int MAX_RETRY = 10;
3578
43
    int64_t tablet_id = rowset_meta.tablet_id();
3579
43
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
3580
43
    std::string_view reference_instance_id = instance_id_;
3581
43
    if (rowset_meta.has_reference_instance_id()) {
3582
5
        reference_instance_id = rowset_meta.reference_instance_id();
3583
5
    }
3584
3585
43
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
3586
43
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
3587
43
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(recycle_rowset_key));
3588
43
    AnnotateTag instance_id_tag("instance_id", instance_id_);
3589
43
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
3590
47
    for (int i = 0; i < MAX_RETRY; ++i) {
3591
47
        std::unique_ptr<Transaction> txn;
3592
47
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3593
47
        if (err != TxnErrorCode::TXN_OK) {
3594
0
            LOG_WARNING("failed to create txn").tag("err", err);
3595
0
            return -1;
3596
0
        }
3597
3598
47
        std::string rowset_ref_count_key =
3599
47
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
3600
47
        int64_t ref_count = 0;
3601
47
        {
3602
47
            std::string value;
3603
47
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
3604
47
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3605
                // This is the old version rowset, we could recycle it directly.
3606
6
                ref_count = 1;
3607
41
            } else if (err != TxnErrorCode::TXN_OK) {
3608
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
3609
0
                return -1;
3610
41
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
3611
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
3612
0
                return -1;
3613
0
            }
3614
47
        }
3615
3616
47
        if (ref_count == 1) {
3617
            // It would not be added since it is recycling.
3618
34
            if (delete_rowset_data(rowset_meta) != 0) {
3619
0
                LOG_WARNING("failed to delete rowset data");
3620
0
                return -1;
3621
0
            }
3622
3623
            // Reset the transaction to avoid timeout.
3624
34
            err = txn_kv_->create_txn(&txn);
3625
34
            if (err != TxnErrorCode::TXN_OK) {
3626
0
                LOG_WARNING("failed to create txn").tag("err", err);
3627
0
                return -1;
3628
0
            }
3629
34
            txn->remove(rowset_ref_count_key);
3630
34
            LOG_INFO("delete rowset data ref count key")
3631
34
                    .tag("txn_id", rowset_meta.txn_id())
3632
34
                    .tag("ref_count_key", hex(rowset_ref_count_key));
3633
34
        } else {
3634
            // Decrease the rowset ref count.
3635
            //
3636
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
3637
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
3638
13
            txn->atomic_add(rowset_ref_count_key, -1);
3639
13
            LOG_INFO("decrease rowset data ref count")
3640
13
                    .tag("txn_id", rowset_meta.txn_id())
3641
13
                    .tag("ref_count", ref_count - 1)
3642
13
                    .tag("ref_count_key", hex(rowset_ref_count_key));
3643
13
        }
3644
3645
47
        txn->remove(recycle_rowset_key);
3646
47
        err = txn->commit();
3647
47
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
3648
            // The rowset ref count key has been changed, we need to retry.
3649
4
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
3650
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
3651
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
3652
4
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
3653
4
            continue;
3654
43
        } else if (err != TxnErrorCode::TXN_OK) {
3655
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
3656
0
            return -1;
3657
0
        }
3658
43
        LOG_INFO("recycle rowset meta and data success");
3659
43
        return 0;
3660
47
    }
3661
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
3662
0
            .tag("tablet_id", tablet_id)
3663
0
            .tag("rowset_id", rowset_id)
3664
0
            .tag("retry", MAX_RETRY);
3665
0
    return -1;
3666
43
}
3667
3668
18
int InstanceRecycler::recycle_tmp_rowsets() {
3669
18
    const std::string task_name = "recycle_tmp_rowsets";
3670
18
    int64_t num_scanned = 0;
3671
18
    int64_t num_expired = 0;
3672
18
    std::atomic_long num_recycled = 0;
3673
18
    size_t expired_rowset_size = 0;
3674
18
    size_t total_rowset_key_size = 0;
3675
18
    size_t total_rowset_value_size = 0;
3676
18
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3677
3678
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
3679
18
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
3680
18
    std::string tmp_rs_key0;
3681
18
    std::string tmp_rs_key1;
3682
18
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
3683
18
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
3684
3685
18
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
3686
3687
18
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3688
18
    register_recycle_task(task_name, start_time);
3689
3690
18
    DORIS_CLOUD_DEFER {
3691
18
        unregister_recycle_task(task_name);
3692
18
        int64_t cost =
3693
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3694
18
        metrics_context.finish_report();
3695
18
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
3696
18
                .tag("instance_id", instance_id_)
3697
18
                .tag("num_scanned", num_scanned)
3698
18
                .tag("num_expired", num_expired)
3699
18
                .tag("num_recycled", num_recycled)
3700
18
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3701
18
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3702
18
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
3703
18
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
3690
14
    DORIS_CLOUD_DEFER {
3691
14
        unregister_recycle_task(task_name);
3692
14
        int64_t cost =
3693
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3694
14
        metrics_context.finish_report();
3695
14
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
3696
14
                .tag("instance_id", instance_id_)
3697
14
                .tag("num_scanned", num_scanned)
3698
14
                .tag("num_expired", num_expired)
3699
14
                .tag("num_recycled", num_recycled)
3700
14
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3701
14
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3702
14
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
3703
14
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
3690
4
    DORIS_CLOUD_DEFER {
3691
4
        unregister_recycle_task(task_name);
3692
4
        int64_t cost =
3693
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3694
4
        metrics_context.finish_report();
3695
4
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
3696
4
                .tag("instance_id", instance_id_)
3697
4
                .tag("num_scanned", num_scanned)
3698
4
                .tag("num_expired", num_expired)
3699
4
                .tag("num_recycled", num_recycled)
3700
4
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
3701
4
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
3702
4
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
3703
4
    };
3704
3705
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
3706
3707
18
    std::vector<std::string> tmp_rowset_keys;
3708
18
    std::vector<std::string> tmp_rowset_ref_count_keys;
3709
3710
    // rowset_id -> rowset_meta
3711
    // store tmp_rowset id and meta for statistics rs size when delete
3712
18
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
3713
18
    auto worker_pool = std::make_unique<SimpleThreadPool>(
3714
18
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
3715
18
    worker_pool->start();
3716
3717
18
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
3718
3719
18
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
3720
18
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
3721
18
                             &earlest_ts, &tmp_rowset_ref_count_keys,
3722
57.0k
                             this](std::string_view k, std::string_view v) -> int {
3723
57.0k
        ++num_scanned;
3724
57.0k
        total_rowset_key_size += k.size();
3725
57.0k
        total_rowset_value_size += v.size();
3726
57.0k
        doris::RowsetMetaCloudPB rowset;
3727
57.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3728
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
3729
0
            return -1;
3730
0
        }
3731
57.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3732
57.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3733
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3734
0
                   << " txn_expiration=" << rowset.txn_expiration()
3735
0
                   << " rowset_creation_time=" << rowset.creation_time();
3736
57.0k
        int64_t current_time = ::time(nullptr);
3737
57.0k
        if (current_time < expiration) { // not expired
3738
0
            return 0;
3739
0
        }
3740
3741
57.0k
        DCHECK_GT(rowset.txn_id(), 0)
3742
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
3743
57.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
3744
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
3745
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
3746
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
3747
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
3748
2.00k
                      << "] txn_id=" << rowset.txn_id()
3749
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
3750
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
3751
2.00k
            return 0;
3752
2.00k
        }
3753
3754
55.0k
        ++num_expired;
3755
55.0k
        expired_rowset_size += v.size();
3756
55.0k
        if (!rowset.has_resource_id()) {
3757
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
3758
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
3759
0
                return -1;
3760
0
            }
3761
            // might be a delete pred rowset
3762
4.00k
            tmp_rowset_keys.emplace_back(k);
3763
4.00k
            return 0;
3764
4.00k
        }
3765
        // TODO(plat1ko): check rowset not referenced
3766
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3767
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
3768
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
3769
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
3770
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
3771
51.0k
                  << " num_expired=" << num_expired;
3772
3773
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
3774
        // Remove the rowset ref count key directly since it has not been used.
3775
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
3776
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
3777
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
3778
51.0k
                  << "key=" << hex(rowset_ref_count_key);
3779
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
3780
3781
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
3782
51.0k
        return 0;
3783
55.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3722
51.0k
                             this](std::string_view k, std::string_view v) -> int {
3723
51.0k
        ++num_scanned;
3724
51.0k
        total_rowset_key_size += k.size();
3725
51.0k
        total_rowset_value_size += v.size();
3726
51.0k
        doris::RowsetMetaCloudPB rowset;
3727
51.0k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3728
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
3729
0
            return -1;
3730
0
        }
3731
51.0k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3732
51.0k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3733
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3734
0
                   << " txn_expiration=" << rowset.txn_expiration()
3735
0
                   << " rowset_creation_time=" << rowset.creation_time();
3736
51.0k
        int64_t current_time = ::time(nullptr);
3737
51.0k
        if (current_time < expiration) { // not expired
3738
0
            return 0;
3739
0
        }
3740
3741
51.0k
        DCHECK_GT(rowset.txn_id(), 0)
3742
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
3743
51.0k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
3744
0
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
3745
0
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
3746
0
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
3747
0
                      << rowset.start_version() << '-' << rowset.end_version()
3748
0
                      << "] txn_id=" << rowset.txn_id()
3749
0
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
3750
0
                      << " txn_expiration=" << rowset.txn_expiration();
3751
0
            return 0;
3752
0
        }
3753
3754
51.0k
        ++num_expired;
3755
51.0k
        expired_rowset_size += v.size();
3756
51.0k
        if (!rowset.has_resource_id()) {
3757
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
3758
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
3759
0
                return -1;
3760
0
            }
3761
            // might be a delete pred rowset
3762
0
            tmp_rowset_keys.emplace_back(k);
3763
0
            return 0;
3764
0
        }
3765
        // TODO(plat1ko): check rowset not referenced
3766
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3767
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
3768
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
3769
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
3770
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
3771
51.0k
                  << " num_expired=" << num_expired;
3772
3773
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
3774
        // Remove the rowset ref count key directly since it has not been used.
3775
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
3776
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
3777
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
3778
51.0k
                  << "key=" << hex(rowset_ref_count_key);
3779
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
3780
3781
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
3782
51.0k
        return 0;
3783
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3722
6.00k
                             this](std::string_view k, std::string_view v) -> int {
3723
6.00k
        ++num_scanned;
3724
6.00k
        total_rowset_key_size += k.size();
3725
6.00k
        total_rowset_value_size += v.size();
3726
6.00k
        doris::RowsetMetaCloudPB rowset;
3727
6.00k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
3728
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
3729
0
            return -1;
3730
0
        }
3731
6.00k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
3732
6.00k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
3733
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
3734
0
                   << " txn_expiration=" << rowset.txn_expiration()
3735
0
                   << " rowset_creation_time=" << rowset.creation_time();
3736
6.00k
        int64_t current_time = ::time(nullptr);
3737
6.00k
        if (current_time < expiration) { // not expired
3738
0
            return 0;
3739
0
        }
3740
3741
6.00k
        DCHECK_GT(rowset.txn_id(), 0)
3742
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
3743
6.00k
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
3744
2.00k
            LOG(INFO) << "txn is not finished, skip recycle tmp rowset, instance_id="
3745
2.00k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id()
3746
2.00k
                      << " rowset_id=" << rowset.rowset_id_v2() << " version=["
3747
2.00k
                      << rowset.start_version() << '-' << rowset.end_version()
3748
2.00k
                      << "] txn_id=" << rowset.txn_id()
3749
2.00k
                      << " creation_time=" << rowset.creation_time() << " expiration=" << expiration
3750
2.00k
                      << " txn_expiration=" << rowset.txn_expiration();
3751
2.00k
            return 0;
3752
2.00k
        }
3753
3754
4.00k
        ++num_expired;
3755
4.00k
        expired_rowset_size += v.size();
3756
4.00k
        if (!rowset.has_resource_id()) {
3757
4.00k
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
3758
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
3759
0
                return -1;
3760
0
            }
3761
            // might be a delete pred rowset
3762
4.00k
            tmp_rowset_keys.emplace_back(k);
3763
4.00k
            return 0;
3764
4.00k
        }
3765
        // TODO(plat1ko): check rowset not referenced
3766
0
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
3767
0
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
3768
0
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
3769
0
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
3770
0
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
3771
0
                  << " num_expired=" << num_expired;
3772
3773
0
        tmp_rowset_keys.emplace_back(k.data(), k.size());
3774
        // Remove the rowset ref count key directly since it has not been used.
3775
0
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
3776
0
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
3777
0
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
3778
0
                  << "key=" << hex(rowset_ref_count_key);
3779
0
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
3780
3781
0
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
3782
0
        return 0;
3783
4.00k
    };
3784
3785
    // TODO bacth delete
3786
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3787
51.0k
        std::string dbm_start_key =
3788
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3789
51.0k
        std::string dbm_end_key = dbm_start_key;
3790
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
3791
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
3792
51.0k
        if (ret != 0) {
3793
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
3794
0
                         << instance_id_ << ", tablet_id=" << tablet_id
3795
0
                         << ", rowset_id=" << rowset_id;
3796
0
        }
3797
51.0k
        return ret;
3798
51.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3786
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3787
51.0k
        std::string dbm_start_key =
3788
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3789
51.0k
        std::string dbm_end_key = dbm_start_key;
3790
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
3791
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
3792
51.0k
        if (ret != 0) {
3793
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
3794
0
                         << instance_id_ << ", tablet_id=" << tablet_id
3795
0
                         << ", rowset_id=" << rowset_id;
3796
0
        }
3797
51.0k
        return ret;
3798
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
3799
3800
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3801
51.0k
        auto delete_bitmap_start =
3802
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
3803
51.0k
        auto delete_bitmap_end =
3804
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
3805
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
3806
51.0k
        if (ret != 0) {
3807
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
3808
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
3809
0
        }
3810
51.0k
        return ret;
3811
51.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3800
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
3801
51.0k
        auto delete_bitmap_start =
3802
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
3803
51.0k
        auto delete_bitmap_end =
3804
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
3805
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
3806
51.0k
        if (ret != 0) {
3807
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
3808
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
3809
0
        }
3810
51.0k
        return ret;
3811
51.0k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
3812
3813
18
    auto loop_done = [&]() -> int {
3814
10
        DORIS_CLOUD_DEFER {
3815
10
            tmp_rowset_keys.clear();
3816
10
            tmp_rowsets.clear();
3817
10
            tmp_rowset_ref_count_keys.clear();
3818
10
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3814
7
        DORIS_CLOUD_DEFER {
3815
7
            tmp_rowset_keys.clear();
3816
7
            tmp_rowsets.clear();
3817
7
            tmp_rowset_ref_count_keys.clear();
3818
7
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
3814
3
        DORIS_CLOUD_DEFER {
3815
3
            tmp_rowset_keys.clear();
3816
3
            tmp_rowsets.clear();
3817
3
            tmp_rowset_ref_count_keys.clear();
3818
3
        };
3819
10
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
3820
10
                             tmp_rowsets_to_delete = tmp_rowsets,
3821
10
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
3822
10
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
3823
10
                                   metrics_context) != 0) {
3824
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3825
0
                return;
3826
0
            }
3827
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
3828
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3829
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
3830
0
                                 << rs.ShortDebugString();
3831
0
                    return;
3832
0
                }
3833
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3834
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
3835
0
                                 << rs.ShortDebugString();
3836
0
                    return;
3837
0
                }
3838
51.0k
            }
3839
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
3840
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
3841
0
                return;
3842
0
            }
3843
10
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
3844
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
3845
0
                return;
3846
0
            }
3847
10
            num_recycled += tmp_rowset_keys.size();
3848
10
            return;
3849
10
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
3821
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
3822
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
3823
7
                                   metrics_context) != 0) {
3824
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3825
0
                return;
3826
0
            }
3827
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
3828
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3829
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
3830
0
                                 << rs.ShortDebugString();
3831
0
                    return;
3832
0
                }
3833
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3834
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
3835
0
                                 << rs.ShortDebugString();
3836
0
                    return;
3837
0
                }
3838
51.0k
            }
3839
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
3840
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
3841
0
                return;
3842
0
            }
3843
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
3844
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
3845
0
                return;
3846
0
            }
3847
7
            num_recycled += tmp_rowset_keys.size();
3848
7
            return;
3849
7
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
3821
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
3822
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
3823
3
                                   metrics_context) != 0) {
3824
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3825
0
                return;
3826
0
            }
3827
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
3828
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3829
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
3830
0
                                 << rs.ShortDebugString();
3831
0
                    return;
3832
0
                }
3833
0
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3834
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
3835
0
                                 << rs.ShortDebugString();
3836
0
                    return;
3837
0
                }
3838
0
            }
3839
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
3840
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
3841
0
                return;
3842
0
            }
3843
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
3844
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
3845
0
                return;
3846
0
            }
3847
3
            num_recycled += tmp_rowset_keys.size();
3848
3
            return;
3849
3
        });
3850
10
        return 0;
3851
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
3813
7
    auto loop_done = [&]() -> int {
3814
7
        DORIS_CLOUD_DEFER {
3815
7
            tmp_rowset_keys.clear();
3816
7
            tmp_rowsets.clear();
3817
7
            tmp_rowset_ref_count_keys.clear();
3818
7
        };
3819
7
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
3820
7
                             tmp_rowsets_to_delete = tmp_rowsets,
3821
7
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
3822
7
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
3823
7
                                   metrics_context) != 0) {
3824
7
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3825
7
                return;
3826
7
            }
3827
7
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
3828
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3829
7
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
3830
7
                                 << rs.ShortDebugString();
3831
7
                    return;
3832
7
                }
3833
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3834
7
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
3835
7
                                 << rs.ShortDebugString();
3836
7
                    return;
3837
7
                }
3838
7
            }
3839
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
3840
7
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
3841
7
                return;
3842
7
            }
3843
7
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
3844
7
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
3845
7
                return;
3846
7
            }
3847
7
            num_recycled += tmp_rowset_keys.size();
3848
7
            return;
3849
7
        });
3850
7
        return 0;
3851
7
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
3813
3
    auto loop_done = [&]() -> int {
3814
3
        DORIS_CLOUD_DEFER {
3815
3
            tmp_rowset_keys.clear();
3816
3
            tmp_rowsets.clear();
3817
3
            tmp_rowset_ref_count_keys.clear();
3818
3
        };
3819
3
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
3820
3
                             tmp_rowsets_to_delete = tmp_rowsets,
3821
3
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
3822
3
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
3823
3
                                   metrics_context) != 0) {
3824
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
3825
3
                return;
3826
3
            }
3827
3
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
3828
3
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3829
3
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
3830
3
                                 << rs.ShortDebugString();
3831
3
                    return;
3832
3
                }
3833
3
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
3834
3
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
3835
3
                                 << rs.ShortDebugString();
3836
3
                    return;
3837
3
                }
3838
3
            }
3839
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
3840
3
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
3841
3
                return;
3842
3
            }
3843
3
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
3844
3
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
3845
3
                return;
3846
3
            }
3847
3
            num_recycled += tmp_rowset_keys.size();
3848
3
            return;
3849
3
        });
3850
3
        return 0;
3851
3
    };
3852
3853
18
    if (config::enable_recycler_stats_metrics) {
3854
0
        scan_and_statistics_tmp_rowsets();
3855
0
    }
3856
    // recycle_func and loop_done for scan and recycle
3857
18
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
3858
18
                               std::move(loop_done));
3859
3860
18
    worker_pool->stop();
3861
18
    return ret;
3862
18
}
3863
3864
int InstanceRecycler::scan_and_recycle(
3865
        std::string begin, std::string_view end,
3866
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
3867
235
        std::function<int()> loop_done) {
3868
235
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
3869
235
    int ret = 0;
3870
235
    int64_t cnt = 0;
3871
235
    int get_range_retried = 0;
3872
235
    std::string err;
3873
235
    DORIS_CLOUD_DEFER_COPY(begin, end) {
3874
235
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
3875
235
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
3876
235
                  << " ret=" << ret << " err=" << err;
3877
235
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
3873
216
    DORIS_CLOUD_DEFER_COPY(begin, end) {
3874
216
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
3875
216
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
3876
216
                  << " ret=" << ret << " err=" << err;
3877
216
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
3873
19
    DORIS_CLOUD_DEFER_COPY(begin, end) {
3874
19
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
3875
19
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
3876
19
                  << " ret=" << ret << " err=" << err;
3877
19
    };
3878
3879
235
    std::unique_ptr<RangeGetIterator> it;
3880
263
    do {
3881
263
        if (get_range_retried > 1000) {
3882
0
            err = "txn_get exceeds max retry, may not scan all keys";
3883
0
            ret = -1;
3884
0
            return -1;
3885
0
        }
3886
263
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
3887
263
        if (get_ret != 0) { // txn kv may complain "Request for future version"
3888
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
3889
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
3890
0
                         << " get_range_retried=" << get_range_retried;
3891
0
            ++get_range_retried;
3892
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
3893
0
            continue; // try again
3894
0
        }
3895
263
        if (!it->has_next()) {
3896
116
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
3897
116
            break; // scan finished
3898
116
        }
3899
99.6k
        while (it->has_next()) {
3900
99.4k
            ++cnt;
3901
            // recycle corresponding resources
3902
99.4k
            auto [k, v] = it->next();
3903
99.4k
            if (!it->has_next()) {
3904
147
                begin = k;
3905
147
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
3906
147
            }
3907
            // if we want to continue scanning, the recycle_func should not return non-zero
3908
99.4k
            if (recycle_func(k, v) != 0) {
3909
4.00k
                err = "recycle_func error";
3910
4.00k
                ret = -1;
3911
4.00k
            }
3912
99.4k
        }
3913
147
        begin.push_back('\x00'); // Update to next smallest key for iteration
3914
        // if we want to continue scanning, the recycle_func should not return non-zero
3915
147
        if (loop_done && loop_done() != 0) {
3916
3
            err = "loop_done error";
3917
3
            ret = -1;
3918
3
        }
3919
147
    } while (it->more() && !stopped());
3920
235
    return ret;
3921
235
}
3922
3923
20
int InstanceRecycler::abort_timeout_txn() {
3924
20
    const std::string task_name = "abort_timeout_txn";
3925
20
    int64_t num_scanned = 0;
3926
20
    int64_t num_timeout = 0;
3927
20
    int64_t num_abort = 0;
3928
20
    int64_t num_advance = 0;
3929
20
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
3930
3931
20
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
3932
20
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
3933
20
    std::string begin_txn_running_key;
3934
20
    std::string end_txn_running_key;
3935
20
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
3936
20
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
3937
3938
20
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
3939
3940
20
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
3941
20
    register_recycle_task(task_name, start_time);
3942
3943
20
    DORIS_CLOUD_DEFER {
3944
20
        unregister_recycle_task(task_name);
3945
20
        int64_t cost =
3946
20
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3947
20
        metrics_context.finish_report();
3948
20
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
3949
20
                .tag("instance_id", instance_id_)
3950
20
                .tag("num_scanned", num_scanned)
3951
20
                .tag("num_timeout", num_timeout)
3952
20
                .tag("num_abort", num_abort)
3953
20
                .tag("num_advance", num_advance);
3954
20
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
3943
16
    DORIS_CLOUD_DEFER {
3944
16
        unregister_recycle_task(task_name);
3945
16
        int64_t cost =
3946
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3947
16
        metrics_context.finish_report();
3948
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
3949
16
                .tag("instance_id", instance_id_)
3950
16
                .tag("num_scanned", num_scanned)
3951
16
                .tag("num_timeout", num_timeout)
3952
16
                .tag("num_abort", num_abort)
3953
16
                .tag("num_advance", num_advance);
3954
16
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
3943
4
    DORIS_CLOUD_DEFER {
3944
4
        unregister_recycle_task(task_name);
3945
4
        int64_t cost =
3946
4
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3947
4
        metrics_context.finish_report();
3948
4
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
3949
4
                .tag("instance_id", instance_id_)
3950
4
                .tag("num_scanned", num_scanned)
3951
4
                .tag("num_timeout", num_timeout)
3952
4
                .tag("num_abort", num_abort)
3953
4
                .tag("num_advance", num_advance);
3954
4
    };
3955
3956
20
    int64_t current_time =
3957
20
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
3958
3959
20
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
3960
20
                                  &current_time, &metrics_context,
3961
20
                                  this](std::string_view k, std::string_view v) -> int {
3962
10
        ++num_scanned;
3963
3964
10
        std::unique_ptr<Transaction> txn;
3965
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3966
10
        if (err != TxnErrorCode::TXN_OK) {
3967
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3968
0
            return -1;
3969
0
        }
3970
10
        std::string_view k1 = k;
3971
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
3972
10
        k1.remove_prefix(1); // Remove key space
3973
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3974
10
        if (decode_key(&k1, &out) != 0) {
3975
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
3976
0
            return -1;
3977
0
        }
3978
10
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3979
10
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3980
10
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3981
        // Update txn_info
3982
10
        std::string txn_inf_key, txn_inf_val;
3983
10
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
3984
10
        err = txn->get(txn_inf_key, &txn_inf_val);
3985
10
        if (err != TxnErrorCode::TXN_OK) {
3986
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
3987
0
            return -1;
3988
0
        }
3989
10
        TxnInfoPB txn_info;
3990
10
        if (!txn_info.ParseFromString(txn_inf_val)) {
3991
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
3992
0
            return -1;
3993
0
        }
3994
3995
10
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
3996
4
            txn.reset();
3997
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
3998
4
            std::shared_ptr<TxnLazyCommitTask> task =
3999
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
4000
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
4001
4
            if (ret.first != MetaServiceCode::OK) {
4002
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
4003
0
                             << "msg=" << ret.second;
4004
0
                return -1;
4005
0
            }
4006
4
            ++num_advance;
4007
4
            return 0;
4008
6
        } else {
4009
6
            TxnRunningPB txn_running_pb;
4010
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
4011
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4012
0
                return -1;
4013
0
            }
4014
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
4015
4
                return 0;
4016
4
            }
4017
2
            ++num_timeout;
4018
4019
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
4020
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
4021
2
            txn_info.set_finish_time(current_time);
4022
2
            txn_info.set_reason("timeout");
4023
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
4024
2
            txn_inf_val.clear();
4025
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
4026
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
4027
0
                return -1;
4028
0
            }
4029
2
            txn->put(txn_inf_key, txn_inf_val);
4030
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
4031
            // Put recycle txn key
4032
2
            std::string recyc_txn_key, recyc_txn_val;
4033
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
4034
2
            RecycleTxnPB recycle_txn_pb;
4035
2
            recycle_txn_pb.set_creation_time(current_time);
4036
2
            recycle_txn_pb.set_label(txn_info.label());
4037
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
4038
0
                LOG_WARNING("failed to serialize txn recycle info")
4039
0
                        .tag("key", hex(k))
4040
0
                        .tag("db_id", db_id)
4041
0
                        .tag("txn_id", txn_id);
4042
0
                return -1;
4043
0
            }
4044
2
            txn->put(recyc_txn_key, recyc_txn_val);
4045
            // Remove txn running key
4046
2
            txn->remove(k);
4047
2
            err = txn->commit();
4048
2
            if (err != TxnErrorCode::TXN_OK) {
4049
0
                LOG_WARNING("failed to commit txn err={}", err)
4050
0
                        .tag("key", hex(k))
4051
0
                        .tag("db_id", db_id)
4052
0
                        .tag("txn_id", txn_id);
4053
0
                return -1;
4054
0
            }
4055
2
            metrics_context.total_recycled_num = ++num_abort;
4056
2
            metrics_context.report();
4057
2
        }
4058
4059
2
        return 0;
4060
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3961
6
                                  this](std::string_view k, std::string_view v) -> int {
3962
6
        ++num_scanned;
3963
3964
6
        std::unique_ptr<Transaction> txn;
3965
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3966
6
        if (err != TxnErrorCode::TXN_OK) {
3967
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3968
0
            return -1;
3969
0
        }
3970
6
        std::string_view k1 = k;
3971
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
3972
6
        k1.remove_prefix(1); // Remove key space
3973
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3974
6
        if (decode_key(&k1, &out) != 0) {
3975
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
3976
0
            return -1;
3977
0
        }
3978
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3979
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3980
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3981
        // Update txn_info
3982
6
        std::string txn_inf_key, txn_inf_val;
3983
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
3984
6
        err = txn->get(txn_inf_key, &txn_inf_val);
3985
6
        if (err != TxnErrorCode::TXN_OK) {
3986
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
3987
0
            return -1;
3988
0
        }
3989
6
        TxnInfoPB txn_info;
3990
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
3991
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
3992
0
            return -1;
3993
0
        }
3994
3995
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
3996
0
            txn.reset();
3997
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
3998
0
            std::shared_ptr<TxnLazyCommitTask> task =
3999
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
4000
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
4001
0
            if (ret.first != MetaServiceCode::OK) {
4002
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
4003
0
                             << "msg=" << ret.second;
4004
0
                return -1;
4005
0
            }
4006
0
            ++num_advance;
4007
0
            return 0;
4008
6
        } else {
4009
6
            TxnRunningPB txn_running_pb;
4010
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
4011
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4012
0
                return -1;
4013
0
            }
4014
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
4015
4
                return 0;
4016
4
            }
4017
2
            ++num_timeout;
4018
4019
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
4020
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
4021
2
            txn_info.set_finish_time(current_time);
4022
2
            txn_info.set_reason("timeout");
4023
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
4024
2
            txn_inf_val.clear();
4025
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
4026
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
4027
0
                return -1;
4028
0
            }
4029
2
            txn->put(txn_inf_key, txn_inf_val);
4030
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
4031
            // Put recycle txn key
4032
2
            std::string recyc_txn_key, recyc_txn_val;
4033
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
4034
2
            RecycleTxnPB recycle_txn_pb;
4035
2
            recycle_txn_pb.set_creation_time(current_time);
4036
2
            recycle_txn_pb.set_label(txn_info.label());
4037
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
4038
0
                LOG_WARNING("failed to serialize txn recycle info")
4039
0
                        .tag("key", hex(k))
4040
0
                        .tag("db_id", db_id)
4041
0
                        .tag("txn_id", txn_id);
4042
0
                return -1;
4043
0
            }
4044
2
            txn->put(recyc_txn_key, recyc_txn_val);
4045
            // Remove txn running key
4046
2
            txn->remove(k);
4047
2
            err = txn->commit();
4048
2
            if (err != TxnErrorCode::TXN_OK) {
4049
0
                LOG_WARNING("failed to commit txn err={}", err)
4050
0
                        .tag("key", hex(k))
4051
0
                        .tag("db_id", db_id)
4052
0
                        .tag("txn_id", txn_id);
4053
0
                return -1;
4054
0
            }
4055
2
            metrics_context.total_recycled_num = ++num_abort;
4056
2
            metrics_context.report();
4057
2
        }
4058
4059
2
        return 0;
4060
6
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
3961
4
                                  this](std::string_view k, std::string_view v) -> int {
3962
4
        ++num_scanned;
3963
3964
4
        std::unique_ptr<Transaction> txn;
3965
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
3966
4
        if (err != TxnErrorCode::TXN_OK) {
3967
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
3968
0
            return -1;
3969
0
        }
3970
4
        std::string_view k1 = k;
3971
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
3972
4
        k1.remove_prefix(1); // Remove key space
3973
4
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
3974
4
        if (decode_key(&k1, &out) != 0) {
3975
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
3976
0
            return -1;
3977
0
        }
3978
4
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
3979
4
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
3980
4
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
3981
        // Update txn_info
3982
4
        std::string txn_inf_key, txn_inf_val;
3983
4
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
3984
4
        err = txn->get(txn_inf_key, &txn_inf_val);
3985
4
        if (err != TxnErrorCode::TXN_OK) {
3986
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
3987
0
            return -1;
3988
0
        }
3989
4
        TxnInfoPB txn_info;
3990
4
        if (!txn_info.ParseFromString(txn_inf_val)) {
3991
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
3992
0
            return -1;
3993
0
        }
3994
3995
4
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
3996
4
            txn.reset();
3997
4
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
3998
4
            std::shared_ptr<TxnLazyCommitTask> task =
3999
4
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
4000
4
            std::pair<MetaServiceCode, std::string> ret = task->wait();
4001
4
            if (ret.first != MetaServiceCode::OK) {
4002
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
4003
0
                             << "msg=" << ret.second;
4004
0
                return -1;
4005
0
            }
4006
4
            ++num_advance;
4007
4
            return 0;
4008
4
        } else {
4009
0
            TxnRunningPB txn_running_pb;
4010
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
4011
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4012
0
                return -1;
4013
0
            }
4014
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
4015
0
                return 0;
4016
0
            }
4017
0
            ++num_timeout;
4018
4019
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
4020
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
4021
0
            txn_info.set_finish_time(current_time);
4022
0
            txn_info.set_reason("timeout");
4023
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
4024
0
            txn_inf_val.clear();
4025
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
4026
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
4027
0
                return -1;
4028
0
            }
4029
0
            txn->put(txn_inf_key, txn_inf_val);
4030
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
4031
            // Put recycle txn key
4032
0
            std::string recyc_txn_key, recyc_txn_val;
4033
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
4034
0
            RecycleTxnPB recycle_txn_pb;
4035
0
            recycle_txn_pb.set_creation_time(current_time);
4036
0
            recycle_txn_pb.set_label(txn_info.label());
4037
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
4038
0
                LOG_WARNING("failed to serialize txn recycle info")
4039
0
                        .tag("key", hex(k))
4040
0
                        .tag("db_id", db_id)
4041
0
                        .tag("txn_id", txn_id);
4042
0
                return -1;
4043
0
            }
4044
0
            txn->put(recyc_txn_key, recyc_txn_val);
4045
            // Remove txn running key
4046
0
            txn->remove(k);
4047
0
            err = txn->commit();
4048
0
            if (err != TxnErrorCode::TXN_OK) {
4049
0
                LOG_WARNING("failed to commit txn err={}", err)
4050
0
                        .tag("key", hex(k))
4051
0
                        .tag("db_id", db_id)
4052
0
                        .tag("txn_id", txn_id);
4053
0
                return -1;
4054
0
            }
4055
0
            metrics_context.total_recycled_num = ++num_abort;
4056
0
            metrics_context.report();
4057
0
        }
4058
4059
0
        return 0;
4060
4
    };
4061
4062
20
    if (config::enable_recycler_stats_metrics) {
4063
0
        scan_and_statistics_abort_timeout_txn();
4064
0
    }
4065
    // recycle_func and loop_done for scan and recycle
4066
20
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
4067
20
                            std::move(handle_txn_running_kv));
4068
20
}
4069
4070
21
int InstanceRecycler::recycle_expired_txn_label() {
4071
21
    const std::string task_name = "recycle_expired_txn_label";
4072
21
    int64_t num_scanned = 0;
4073
21
    int64_t num_expired = 0;
4074
21
    int64_t num_recycled = 0;
4075
21
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4076
21
    int ret = 0;
4077
4078
21
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
4079
21
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
4080
21
    std::string begin_recycle_txn_key;
4081
21
    std::string end_recycle_txn_key;
4082
21
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
4083
21
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
4084
21
    std::vector<std::string> recycle_txn_info_keys;
4085
4086
21
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
4087
4088
21
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4089
21
    register_recycle_task(task_name, start_time);
4090
21
    DORIS_CLOUD_DEFER {
4091
21
        unregister_recycle_task(task_name);
4092
21
        int64_t cost =
4093
21
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4094
21
        metrics_context.finish_report();
4095
21
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
4096
21
                .tag("instance_id", instance_id_)
4097
21
                .tag("num_scanned", num_scanned)
4098
21
                .tag("num_expired", num_expired)
4099
21
                .tag("num_recycled", num_recycled);
4100
21
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
4090
18
    DORIS_CLOUD_DEFER {
4091
18
        unregister_recycle_task(task_name);
4092
18
        int64_t cost =
4093
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4094
18
        metrics_context.finish_report();
4095
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
4096
18
                .tag("instance_id", instance_id_)
4097
18
                .tag("num_scanned", num_scanned)
4098
18
                .tag("num_expired", num_expired)
4099
18
                .tag("num_recycled", num_recycled);
4100
18
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
4090
3
    DORIS_CLOUD_DEFER {
4091
3
        unregister_recycle_task(task_name);
4092
3
        int64_t cost =
4093
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4094
3
        metrics_context.finish_report();
4095
3
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
4096
3
                .tag("instance_id", instance_id_)
4097
3
                .tag("num_scanned", num_scanned)
4098
3
                .tag("num_expired", num_expired)
4099
3
                .tag("num_recycled", num_recycled);
4100
3
    };
4101
4102
21
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4103
4104
21
    SyncExecutor<int> concurrent_delete_executor(
4105
21
            _thread_pool_group.s3_producer_pool,
4106
21
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
4107
23.0k
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
4107
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
4107
3
            [](const int& ret) { return ret != 0; });
4108
4109
21
    int64_t current_time_ms =
4110
21
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4111
4112
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
4113
30.0k
        ++num_scanned;
4114
30.0k
        RecycleTxnPB recycle_txn_pb;
4115
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
4116
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4117
0
            return -1;
4118
0
        }
4119
30.0k
        if ((config::force_immediate_recycle) ||
4120
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
4121
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
4122
30.0k
             current_time_ms)) {
4123
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
4124
23.0k
            num_expired++;
4125
23.0k
            recycle_txn_info_keys.emplace_back(k);
4126
23.0k
        }
4127
30.0k
        return 0;
4128
30.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4112
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
4113
30.0k
        ++num_scanned;
4114
30.0k
        RecycleTxnPB recycle_txn_pb;
4115
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
4116
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4117
0
            return -1;
4118
0
        }
4119
30.0k
        if ((config::force_immediate_recycle) ||
4120
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
4121
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
4122
30.0k
             current_time_ms)) {
4123
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
4124
23.0k
            num_expired++;
4125
23.0k
            recycle_txn_info_keys.emplace_back(k);
4126
23.0k
        }
4127
30.0k
        return 0;
4128
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4112
3
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
4113
3
        ++num_scanned;
4114
3
        RecycleTxnPB recycle_txn_pb;
4115
3
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
4116
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
4117
0
            return -1;
4118
0
        }
4119
3
        if ((config::force_immediate_recycle) ||
4120
3
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
4121
3
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
4122
3
             current_time_ms)) {
4123
3
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
4124
3
            num_expired++;
4125
3
            recycle_txn_info_keys.emplace_back(k);
4126
3
        }
4127
3
        return 0;
4128
3
    };
4129
4130
    // int 0 for success, 1 for conflict, -1 for error
4131
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
4132
23.0k
        std::string_view k1 = k;
4133
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
4134
23.0k
        k1.remove_prefix(1); // Remove key space
4135
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4136
23.0k
        int ret = decode_key(&k1, &out);
4137
23.0k
        if (ret != 0) {
4138
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
4139
0
            return -1;
4140
0
        }
4141
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4142
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4143
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4144
23.0k
        std::unique_ptr<Transaction> txn;
4145
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4146
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4147
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4148
0
            return -1;
4149
0
        }
4150
        // Remove txn index kv
4151
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
4152
23.0k
        txn->remove(index_key);
4153
        // Remove txn info kv
4154
23.0k
        std::string info_key, info_val;
4155
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
4156
23.0k
        err = txn->get(info_key, &info_val);
4157
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4158
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
4159
0
            return -1;
4160
0
        }
4161
23.0k
        TxnInfoPB txn_info;
4162
23.0k
        if (!txn_info.ParseFromString(info_val)) {
4163
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
4164
0
            return -1;
4165
0
        }
4166
23.0k
        txn->remove(info_key);
4167
        // Remove sub txn index kvs
4168
23.0k
        std::vector<std::string> sub_txn_index_keys;
4169
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
4170
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
4171
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
4172
22.9k
        }
4173
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
4174
22.9k
            txn->remove(sub_txn_index_key);
4175
22.9k
        }
4176
        // Update txn label
4177
23.0k
        std::string label_key, label_val;
4178
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
4179
23.0k
        err = txn->get(label_key, &label_val);
4180
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4181
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
4182
0
                         << " err=" << err;
4183
0
            return -1;
4184
0
        }
4185
23.0k
        TxnLabelPB txn_label;
4186
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
4187
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
4188
0
            return -1;
4189
0
        }
4190
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
4191
23.0k
        if (it != txn_label.txn_ids().end()) {
4192
23.0k
            txn_label.mutable_txn_ids()->erase(it);
4193
23.0k
        }
4194
23.0k
        if (txn_label.txn_ids().empty()) {
4195
23.0k
            txn->remove(label_key);
4196
23.0k
            TEST_SYNC_POINT_CALLBACK(
4197
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
4198
23.0k
        } else {
4199
74
            if (!txn_label.SerializeToString(&label_val)) {
4200
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
4201
0
                return -1;
4202
0
            }
4203
74
            TEST_SYNC_POINT_CALLBACK(
4204
74
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
4205
74
            txn->atomic_set_ver_value(label_key, label_val);
4206
74
            TEST_SYNC_POINT_CALLBACK(
4207
74
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
4208
74
        }
4209
        // Remove recycle txn kv
4210
23.0k
        txn->remove(k);
4211
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
4212
23.0k
        err = txn->commit();
4213
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4214
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
4215
62
                TEST_SYNC_POINT_CALLBACK(
4216
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
4217
                // log the txn_id and label
4218
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
4219
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
4220
62
                             << " txn_label=" << txn_info.label();
4221
62
                return 1;
4222
62
            }
4223
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
4224
0
            return -1;
4225
62
        }
4226
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
4227
23.0k
        metrics_context.report();
4228
4229
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
4230
23.0k
        return 0;
4231
23.0k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4131
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
4132
23.0k
        std::string_view k1 = k;
4133
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
4134
23.0k
        k1.remove_prefix(1); // Remove key space
4135
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4136
23.0k
        int ret = decode_key(&k1, &out);
4137
23.0k
        if (ret != 0) {
4138
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
4139
0
            return -1;
4140
0
        }
4141
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4142
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4143
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4144
23.0k
        std::unique_ptr<Transaction> txn;
4145
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4146
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4147
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4148
0
            return -1;
4149
0
        }
4150
        // Remove txn index kv
4151
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
4152
23.0k
        txn->remove(index_key);
4153
        // Remove txn info kv
4154
23.0k
        std::string info_key, info_val;
4155
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
4156
23.0k
        err = txn->get(info_key, &info_val);
4157
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4158
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
4159
0
            return -1;
4160
0
        }
4161
23.0k
        TxnInfoPB txn_info;
4162
23.0k
        if (!txn_info.ParseFromString(info_val)) {
4163
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
4164
0
            return -1;
4165
0
        }
4166
23.0k
        txn->remove(info_key);
4167
        // Remove sub txn index kvs
4168
23.0k
        std::vector<std::string> sub_txn_index_keys;
4169
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
4170
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
4171
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
4172
22.9k
        }
4173
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
4174
22.9k
            txn->remove(sub_txn_index_key);
4175
22.9k
        }
4176
        // Update txn label
4177
23.0k
        std::string label_key, label_val;
4178
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
4179
23.0k
        err = txn->get(label_key, &label_val);
4180
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4181
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
4182
0
                         << " err=" << err;
4183
0
            return -1;
4184
0
        }
4185
23.0k
        TxnLabelPB txn_label;
4186
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
4187
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
4188
0
            return -1;
4189
0
        }
4190
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
4191
23.0k
        if (it != txn_label.txn_ids().end()) {
4192
23.0k
            txn_label.mutable_txn_ids()->erase(it);
4193
23.0k
        }
4194
23.0k
        if (txn_label.txn_ids().empty()) {
4195
23.0k
            txn->remove(label_key);
4196
23.0k
            TEST_SYNC_POINT_CALLBACK(
4197
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
4198
23.0k
        } else {
4199
74
            if (!txn_label.SerializeToString(&label_val)) {
4200
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
4201
0
                return -1;
4202
0
            }
4203
74
            TEST_SYNC_POINT_CALLBACK(
4204
74
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
4205
74
            txn->atomic_set_ver_value(label_key, label_val);
4206
74
            TEST_SYNC_POINT_CALLBACK(
4207
74
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
4208
74
        }
4209
        // Remove recycle txn kv
4210
23.0k
        txn->remove(k);
4211
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
4212
23.0k
        err = txn->commit();
4213
23.0k
        if (err != TxnErrorCode::TXN_OK) {
4214
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
4215
62
                TEST_SYNC_POINT_CALLBACK(
4216
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
4217
                // log the txn_id and label
4218
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
4219
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
4220
62
                             << " txn_label=" << txn_info.label();
4221
62
                return 1;
4222
62
            }
4223
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
4224
0
            return -1;
4225
62
        }
4226
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
4227
23.0k
        metrics_context.report();
4228
4229
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
4230
23.0k
        return 0;
4231
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4131
3
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
4132
3
        std::string_view k1 = k;
4133
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
4134
3
        k1.remove_prefix(1); // Remove key space
4135
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4136
3
        int ret = decode_key(&k1, &out);
4137
3
        if (ret != 0) {
4138
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
4139
0
            return -1;
4140
0
        }
4141
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
4142
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
4143
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
4144
3
        std::unique_ptr<Transaction> txn;
4145
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4146
3
        if (err != TxnErrorCode::TXN_OK) {
4147
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
4148
0
            return -1;
4149
0
        }
4150
        // Remove txn index kv
4151
3
        auto index_key = txn_index_key({instance_id_, txn_id});
4152
3
        txn->remove(index_key);
4153
        // Remove txn info kv
4154
3
        std::string info_key, info_val;
4155
3
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
4156
3
        err = txn->get(info_key, &info_val);
4157
3
        if (err != TxnErrorCode::TXN_OK) {
4158
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
4159
0
            return -1;
4160
0
        }
4161
3
        TxnInfoPB txn_info;
4162
3
        if (!txn_info.ParseFromString(info_val)) {
4163
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
4164
0
            return -1;
4165
0
        }
4166
3
        txn->remove(info_key);
4167
        // Remove sub txn index kvs
4168
3
        std::vector<std::string> sub_txn_index_keys;
4169
3
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
4170
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
4171
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
4172
0
        }
4173
3
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
4174
0
            txn->remove(sub_txn_index_key);
4175
0
        }
4176
        // Update txn label
4177
3
        std::string label_key, label_val;
4178
3
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
4179
3
        err = txn->get(label_key, &label_val);
4180
3
        if (err != TxnErrorCode::TXN_OK) {
4181
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
4182
0
                         << " err=" << err;
4183
0
            return -1;
4184
0
        }
4185
3
        TxnLabelPB txn_label;
4186
3
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
4187
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
4188
0
            return -1;
4189
0
        }
4190
3
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
4191
3
        if (it != txn_label.txn_ids().end()) {
4192
3
            txn_label.mutable_txn_ids()->erase(it);
4193
3
        }
4194
3
        if (txn_label.txn_ids().empty()) {
4195
3
            txn->remove(label_key);
4196
3
            TEST_SYNC_POINT_CALLBACK(
4197
3
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
4198
3
        } else {
4199
0
            if (!txn_label.SerializeToString(&label_val)) {
4200
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
4201
0
                return -1;
4202
0
            }
4203
0
            TEST_SYNC_POINT_CALLBACK(
4204
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
4205
0
            txn->atomic_set_ver_value(label_key, label_val);
4206
0
            TEST_SYNC_POINT_CALLBACK(
4207
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
4208
0
        }
4209
        // Remove recycle txn kv
4210
3
        txn->remove(k);
4211
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
4212
3
        err = txn->commit();
4213
3
        if (err != TxnErrorCode::TXN_OK) {
4214
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
4215
0
                TEST_SYNC_POINT_CALLBACK(
4216
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
4217
                // log the txn_id and label
4218
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
4219
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
4220
0
                             << " txn_label=" << txn_info.label();
4221
0
                return 1;
4222
0
            }
4223
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
4224
0
            return -1;
4225
0
        }
4226
3
        metrics_context.total_recycled_num = ++num_recycled;
4227
3
        metrics_context.report();
4228
4229
3
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
4230
3
        return 0;
4231
3
    };
4232
4233
21
    auto loop_done = [&]() -> int {
4234
12
        DORIS_CLOUD_DEFER {
4235
12
            recycle_txn_info_keys.clear();
4236
12
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4234
9
        DORIS_CLOUD_DEFER {
4235
9
            recycle_txn_info_keys.clear();
4236
9
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4234
3
        DORIS_CLOUD_DEFER {
4235
3
            recycle_txn_info_keys.clear();
4236
3
        };
4237
12
        TEST_SYNC_POINT_CALLBACK(
4238
12
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
4239
12
                &recycle_txn_info_keys);
4240
23.0k
        for (const auto& k : recycle_txn_info_keys) {
4241
23.0k
            concurrent_delete_executor.add([&]() {
4242
23.0k
                int ret = delete_recycle_txn_kv(k);
4243
23.0k
                if (ret == 1) {
4244
18
                    constexpr int MAX_RETRY = 10;
4245
54
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4246
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4247
54
                        ret = delete_recycle_txn_kv(k);
4248
                        // clang-format off
4249
54
                        TEST_SYNC_POINT_CALLBACK(
4250
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4251
                        // clang-format off
4252
54
                        if (ret != 1) {
4253
18
                            break;
4254
18
                        }
4255
                        // random sleep 0-100 ms to retry
4256
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4257
36
                    }
4258
18
                }
4259
23.0k
                if (ret != 0) {
4260
9
                    LOG_WARNING("failed to delete recycle txn kv")
4261
9
                            .tag("instance id", instance_id_)
4262
9
                            .tag("key", hex(k));
4263
9
                    return -1;
4264
9
                }
4265
23.0k
                return 0;
4266
23.0k
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4241
23.0k
            concurrent_delete_executor.add([&]() {
4242
23.0k
                int ret = delete_recycle_txn_kv(k);
4243
23.0k
                if (ret == 1) {
4244
18
                    constexpr int MAX_RETRY = 10;
4245
54
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4246
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4247
54
                        ret = delete_recycle_txn_kv(k);
4248
                        // clang-format off
4249
54
                        TEST_SYNC_POINT_CALLBACK(
4250
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4251
                        // clang-format off
4252
54
                        if (ret != 1) {
4253
18
                            break;
4254
18
                        }
4255
                        // random sleep 0-100 ms to retry
4256
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4257
36
                    }
4258
18
                }
4259
23.0k
                if (ret != 0) {
4260
9
                    LOG_WARNING("failed to delete recycle txn kv")
4261
9
                            .tag("instance id", instance_id_)
4262
9
                            .tag("key", hex(k));
4263
9
                    return -1;
4264
9
                }
4265
23.0k
                return 0;
4266
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
4241
3
            concurrent_delete_executor.add([&]() {
4242
3
                int ret = delete_recycle_txn_kv(k);
4243
3
                if (ret == 1) {
4244
0
                    constexpr int MAX_RETRY = 10;
4245
0
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4246
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4247
0
                        ret = delete_recycle_txn_kv(k);
4248
                        // clang-format off
4249
0
                        TEST_SYNC_POINT_CALLBACK(
4250
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4251
                        // clang-format off
4252
0
                        if (ret != 1) {
4253
0
                            break;
4254
0
                        }
4255
                        // random sleep 0-100 ms to retry
4256
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4257
0
                    }
4258
0
                }
4259
3
                if (ret != 0) {
4260
0
                    LOG_WARNING("failed to delete recycle txn kv")
4261
0
                            .tag("instance id", instance_id_)
4262
0
                            .tag("key", hex(k));
4263
0
                    return -1;
4264
0
                }
4265
3
                return 0;
4266
3
            });
4267
23.0k
        }
4268
12
        bool finished = true;
4269
12
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4270
23.0k
        for (int r : rets) {
4271
23.0k
            if (r != 0) {
4272
9
                ret = -1;
4273
9
            }
4274
23.0k
        }
4275
4276
12
        ret = finished ? ret : -1;
4277
4278
12
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
4279
4280
12
        if (ret != 0) {
4281
3
            LOG_WARNING("recycle txn kv ret!=0")
4282
3
                    .tag("finished", finished)
4283
3
                    .tag("ret", ret)
4284
3
                    .tag("instance_id", instance_id_);
4285
3
            return ret;
4286
3
        }
4287
9
        return ret;
4288
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
4233
9
    auto loop_done = [&]() -> int {
4234
9
        DORIS_CLOUD_DEFER {
4235
9
            recycle_txn_info_keys.clear();
4236
9
        };
4237
9
        TEST_SYNC_POINT_CALLBACK(
4238
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
4239
9
                &recycle_txn_info_keys);
4240
23.0k
        for (const auto& k : recycle_txn_info_keys) {
4241
23.0k
            concurrent_delete_executor.add([&]() {
4242
23.0k
                int ret = delete_recycle_txn_kv(k);
4243
23.0k
                if (ret == 1) {
4244
23.0k
                    constexpr int MAX_RETRY = 10;
4245
23.0k
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4246
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4247
23.0k
                        ret = delete_recycle_txn_kv(k);
4248
                        // clang-format off
4249
23.0k
                        TEST_SYNC_POINT_CALLBACK(
4250
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4251
                        // clang-format off
4252
23.0k
                        if (ret != 1) {
4253
23.0k
                            break;
4254
23.0k
                        }
4255
                        // random sleep 0-100 ms to retry
4256
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4257
23.0k
                    }
4258
23.0k
                }
4259
23.0k
                if (ret != 0) {
4260
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
4261
23.0k
                            .tag("instance id", instance_id_)
4262
23.0k
                            .tag("key", hex(k));
4263
23.0k
                    return -1;
4264
23.0k
                }
4265
23.0k
                return 0;
4266
23.0k
            });
4267
23.0k
        }
4268
9
        bool finished = true;
4269
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4270
23.0k
        for (int r : rets) {
4271
23.0k
            if (r != 0) {
4272
9
                ret = -1;
4273
9
            }
4274
23.0k
        }
4275
4276
9
        ret = finished ? ret : -1;
4277
4278
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
4279
4280
9
        if (ret != 0) {
4281
3
            LOG_WARNING("recycle txn kv ret!=0")
4282
3
                    .tag("finished", finished)
4283
3
                    .tag("ret", ret)
4284
3
                    .tag("instance_id", instance_id_);
4285
3
            return ret;
4286
3
        }
4287
6
        return ret;
4288
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
4233
3
    auto loop_done = [&]() -> int {
4234
3
        DORIS_CLOUD_DEFER {
4235
3
            recycle_txn_info_keys.clear();
4236
3
        };
4237
3
        TEST_SYNC_POINT_CALLBACK(
4238
3
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
4239
3
                &recycle_txn_info_keys);
4240
3
        for (const auto& k : recycle_txn_info_keys) {
4241
3
            concurrent_delete_executor.add([&]() {
4242
3
                int ret = delete_recycle_txn_kv(k);
4243
3
                if (ret == 1) {
4244
3
                    constexpr int MAX_RETRY = 10;
4245
3
                    for (size_t i = 1; i <= MAX_RETRY; ++i) {
4246
3
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
4247
3
                        ret = delete_recycle_txn_kv(k);
4248
                        // clang-format off
4249
3
                        TEST_SYNC_POINT_CALLBACK(
4250
3
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
4251
                        // clang-format off
4252
3
                        if (ret != 1) {
4253
3
                            break;
4254
3
                        }
4255
                        // random sleep 0-100 ms to retry
4256
3
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
4257
3
                    }
4258
3
                }
4259
3
                if (ret != 0) {
4260
3
                    LOG_WARNING("failed to delete recycle txn kv")
4261
3
                            .tag("instance id", instance_id_)
4262
3
                            .tag("key", hex(k));
4263
3
                    return -1;
4264
3
                }
4265
3
                return 0;
4266
3
            });
4267
3
        }
4268
3
        bool finished = true;
4269
3
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4270
3
        for (int r : rets) {
4271
3
            if (r != 0) {
4272
0
                ret = -1;
4273
0
            }
4274
3
        }
4275
4276
3
        ret = finished ? ret : -1;
4277
4278
3
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
4279
4280
3
        if (ret != 0) {
4281
0
            LOG_WARNING("recycle txn kv ret!=0")
4282
0
                    .tag("finished", finished)
4283
0
                    .tag("ret", ret)
4284
0
                    .tag("instance_id", instance_id_);
4285
0
            return ret;
4286
0
        }
4287
3
        return ret;
4288
3
    };
4289
4290
21
    if (config::enable_recycler_stats_metrics) {
4291
0
        scan_and_statistics_expired_txn_label();
4292
0
    }
4293
    // recycle_func and loop_done for scan and recycle
4294
21
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
4295
21
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
4296
21
}
4297
4298
struct CopyJobIdTuple {
4299
    std::string instance_id;
4300
    std::string stage_id;
4301
    long table_id;
4302
    std::string copy_id;
4303
    std::string stage_path;
4304
};
4305
struct BatchObjStoreAccessor {
4306
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
4307
                          TxnKv* txn_kv)
4308
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
4309
3
    ~BatchObjStoreAccessor() {
4310
3
        if (!paths_.empty()) {
4311
3
            consume();
4312
3
        }
4313
3
    }
4314
4315
    /**
4316
    * To implicitely do batch work and submit the batch delete task to s3
4317
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
4318
    *
4319
    * @param copy_job The protubuf struct consists of the copy job files.
4320
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
4321
    *            it would last until we finish the delete task, here we need pass one string value
4322
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
4323
    */
4324
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
4325
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
4326
5
        auto& file_keys = copy_file_keys_[key];
4327
5
        file_keys.log_trace =
4328
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
4329
5
                            instance_id, stage_id, table_id, copy_id, path);
4330
5
        std::string_view log_trace = file_keys.log_trace;
4331
2.03k
        for (const auto& file : copy_job.object_files()) {
4332
2.03k
            auto relative_path = file.relative_path();
4333
2.03k
            paths_.push_back(relative_path);
4334
2.03k
            file_keys.keys.push_back(copy_file_key(
4335
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
4336
2.03k
            LOG_INFO(log_trace)
4337
2.03k
                    .tag("relative_path", relative_path)
4338
2.03k
                    .tag("batch_count", batch_count_);
4339
2.03k
        }
4340
5
        LOG_INFO(log_trace)
4341
5
                .tag("objects_num", copy_job.object_files().size())
4342
5
                .tag("batch_count", batch_count_);
4343
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
4344
        // recommend using delete objects when objects num is less than 10)
4345
5
        if (paths_.size() < 1000) {
4346
3
            return;
4347
3
        }
4348
2
        consume();
4349
2
    }
4350
4351
private:
4352
5
    void consume() {
4353
5
        DORIS_CLOUD_DEFER {
4354
5
            paths_.clear();
4355
5
            copy_file_keys_.clear();
4356
5
            batch_count_++;
4357
4358
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
4359
5
                        batch_count_);
4360
5
        };
4361
4362
5
        StopWatch sw;
4363
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
4364
5
        if (0 != accessor_->delete_files(paths_)) {
4365
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
4366
2
                        paths_.size(), batch_count_, sw.elapsed_us());
4367
2
            return;
4368
2
        }
4369
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
4370
3
                    paths_.size(), batch_count_, sw.elapsed_us());
4371
        // delete fdb's keys
4372
3
        for (auto& file_keys : copy_file_keys_) {
4373
3
            auto& [log_trace, keys] = file_keys.second;
4374
3
            std::unique_ptr<Transaction> txn;
4375
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
4376
0
                LOG(WARNING) << "failed to create txn";
4377
0
                continue;
4378
0
            }
4379
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4380
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4381
            // limited, should not cause the txn commit failed.
4382
1.02k
            for (const auto& key : keys) {
4383
1.02k
                txn->remove(key);
4384
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
4385
1.02k
            }
4386
3
            txn->remove(file_keys.first);
4387
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
4388
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
4389
0
                continue;
4390
0
            }
4391
3
        }
4392
3
    }
4393
    std::shared_ptr<StorageVaultAccessor> accessor_;
4394
    // the path of the s3 files to be deleted
4395
    std::vector<std::string> paths_;
4396
    struct CopyFiles {
4397
        std::string log_trace;
4398
        std::vector<std::string> keys;
4399
    };
4400
    // pair<std::string, std::vector<std::string>>
4401
    // first: instance_id_ stage_id table_id query_id
4402
    // second: keys to be deleted
4403
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
4404
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
4405
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
4406
    // which can together uniquely identifies different tasks for tracing log
4407
    uint64_t& batch_count_;
4408
    TxnKv* txn_kv_;
4409
};
4410
4411
13
int InstanceRecycler::recycle_copy_jobs() {
4412
13
    int64_t num_scanned = 0;
4413
13
    int64_t num_finished = 0;
4414
13
    int64_t num_expired = 0;
4415
13
    int64_t num_recycled = 0;
4416
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
4417
13
    uint64_t batch_count = 0;
4418
13
    const std::string task_name = "recycle_copy_jobs";
4419
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4420
4421
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
4422
4423
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4424
13
    register_recycle_task(task_name, start_time);
4425
4426
13
    DORIS_CLOUD_DEFER {
4427
13
        unregister_recycle_task(task_name);
4428
13
        int64_t cost =
4429
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4430
13
        metrics_context.finish_report();
4431
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
4432
13
                .tag("instance_id", instance_id_)
4433
13
                .tag("num_scanned", num_scanned)
4434
13
                .tag("num_finished", num_finished)
4435
13
                .tag("num_expired", num_expired)
4436
13
                .tag("num_recycled", num_recycled);
4437
13
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
4426
13
    DORIS_CLOUD_DEFER {
4427
13
        unregister_recycle_task(task_name);
4428
13
        int64_t cost =
4429
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4430
13
        metrics_context.finish_report();
4431
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
4432
13
                .tag("instance_id", instance_id_)
4433
13
                .tag("num_scanned", num_scanned)
4434
13
                .tag("num_finished", num_finished)
4435
13
                .tag("num_expired", num_expired)
4436
13
                .tag("num_recycled", num_recycled);
4437
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
4438
4439
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
4440
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
4441
13
    std::string key0;
4442
13
    std::string key1;
4443
13
    copy_job_key(key_info0, &key0);
4444
13
    copy_job_key(key_info1, &key1);
4445
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
4446
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
4447
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
4448
16
                         this](std::string_view k, std::string_view v) -> int {
4449
16
        ++num_scanned;
4450
16
        CopyJobPB copy_job;
4451
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
4452
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
4453
0
            return -1;
4454
0
        }
4455
4456
        // decode copy job key
4457
16
        auto k1 = k;
4458
16
        k1.remove_prefix(1);
4459
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4460
16
        decode_key(&k1, &out);
4461
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
4462
        // -> CopyJobPB
4463
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
4464
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
4465
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
4466
4467
16
        bool check_storage = true;
4468
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4469
12
            ++num_finished;
4470
4471
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
4472
7
                auto it = stage_accessor_map.find(stage_id);
4473
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
4474
7
                std::string_view path;
4475
7
                if (it != stage_accessor_map.end()) {
4476
2
                    accessor = it->second;
4477
5
                } else {
4478
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
4479
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
4480
5
                                                      &inner_accessor);
4481
5
                    if (ret < 0) { // error
4482
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
4483
0
                        return -1;
4484
5
                    } else if (ret == 0) {
4485
3
                        path = inner_accessor->uri();
4486
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
4487
3
                                inner_accessor, batch_count, txn_kv_.get());
4488
3
                        stage_accessor_map.emplace(stage_id, accessor);
4489
3
                    } else { // stage not found, skip check storage
4490
2
                        check_storage = false;
4491
2
                    }
4492
5
                }
4493
7
                if (check_storage) {
4494
                    // TODO delete objects with key and etag is not supported
4495
5
                    accessor->add(std::move(copy_job), std::string(k),
4496
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
4497
5
                    return 0;
4498
5
                }
4499
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
4500
5
                int64_t current_time =
4501
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4502
5
                if (copy_job.finish_time_ms() > 0) {
4503
2
                    if (!config::force_immediate_recycle &&
4504
2
                        current_time < copy_job.finish_time_ms() +
4505
2
                                               config::copy_job_max_retention_second * 1000) {
4506
1
                        return 0;
4507
1
                    }
4508
3
                } else {
4509
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
4510
3
                    if (!config::force_immediate_recycle &&
4511
3
                        current_time < copy_job.start_time_ms() +
4512
3
                                               config::copy_job_max_retention_second * 1000) {
4513
1
                        return 0;
4514
1
                    }
4515
3
                }
4516
5
            }
4517
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4518
4
            int64_t current_time =
4519
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4520
            // if copy job is timeout: delete all copy file kvs and copy job kv
4521
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4522
2
                return 0;
4523
2
            }
4524
2
            ++num_expired;
4525
2
        }
4526
4527
        // delete all copy files
4528
7
        std::vector<std::string> copy_file_keys;
4529
70
        for (auto& file : copy_job.object_files()) {
4530
70
            copy_file_keys.push_back(copy_file_key(
4531
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
4532
70
        }
4533
7
        std::unique_ptr<Transaction> txn;
4534
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4535
0
            LOG(WARNING) << "failed to create txn";
4536
0
            return -1;
4537
0
        }
4538
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4539
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4540
        // limited, should not cause the txn commit failed.
4541
70
        for (const auto& key : copy_file_keys) {
4542
70
            txn->remove(key);
4543
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
4544
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
4545
70
                      << ", query_id=" << copy_id;
4546
70
        }
4547
7
        txn->remove(k);
4548
7
        TxnErrorCode err = txn->commit();
4549
7
        if (err != TxnErrorCode::TXN_OK) {
4550
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
4551
0
            return -1;
4552
0
        }
4553
4554
7
        metrics_context.total_recycled_num = ++num_recycled;
4555
7
        metrics_context.report();
4556
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4557
7
        return 0;
4558
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4448
16
                         this](std::string_view k, std::string_view v) -> int {
4449
16
        ++num_scanned;
4450
16
        CopyJobPB copy_job;
4451
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
4452
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
4453
0
            return -1;
4454
0
        }
4455
4456
        // decode copy job key
4457
16
        auto k1 = k;
4458
16
        k1.remove_prefix(1);
4459
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4460
16
        decode_key(&k1, &out);
4461
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
4462
        // -> CopyJobPB
4463
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
4464
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
4465
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
4466
4467
16
        bool check_storage = true;
4468
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
4469
12
            ++num_finished;
4470
4471
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
4472
7
                auto it = stage_accessor_map.find(stage_id);
4473
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
4474
7
                std::string_view path;
4475
7
                if (it != stage_accessor_map.end()) {
4476
2
                    accessor = it->second;
4477
5
                } else {
4478
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
4479
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
4480
5
                                                      &inner_accessor);
4481
5
                    if (ret < 0) { // error
4482
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
4483
0
                        return -1;
4484
5
                    } else if (ret == 0) {
4485
3
                        path = inner_accessor->uri();
4486
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
4487
3
                                inner_accessor, batch_count, txn_kv_.get());
4488
3
                        stage_accessor_map.emplace(stage_id, accessor);
4489
3
                    } else { // stage not found, skip check storage
4490
2
                        check_storage = false;
4491
2
                    }
4492
5
                }
4493
7
                if (check_storage) {
4494
                    // TODO delete objects with key and etag is not supported
4495
5
                    accessor->add(std::move(copy_job), std::string(k),
4496
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
4497
5
                    return 0;
4498
5
                }
4499
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
4500
5
                int64_t current_time =
4501
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4502
5
                if (copy_job.finish_time_ms() > 0) {
4503
2
                    if (!config::force_immediate_recycle &&
4504
2
                        current_time < copy_job.finish_time_ms() +
4505
2
                                               config::copy_job_max_retention_second * 1000) {
4506
1
                        return 0;
4507
1
                    }
4508
3
                } else {
4509
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
4510
3
                    if (!config::force_immediate_recycle &&
4511
3
                        current_time < copy_job.start_time_ms() +
4512
3
                                               config::copy_job_max_retention_second * 1000) {
4513
1
                        return 0;
4514
1
                    }
4515
3
                }
4516
5
            }
4517
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
4518
4
            int64_t current_time =
4519
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
4520
            // if copy job is timeout: delete all copy file kvs and copy job kv
4521
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
4522
2
                return 0;
4523
2
            }
4524
2
            ++num_expired;
4525
2
        }
4526
4527
        // delete all copy files
4528
7
        std::vector<std::string> copy_file_keys;
4529
70
        for (auto& file : copy_job.object_files()) {
4530
70
            copy_file_keys.push_back(copy_file_key(
4531
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
4532
70
        }
4533
7
        std::unique_ptr<Transaction> txn;
4534
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4535
0
            LOG(WARNING) << "failed to create txn";
4536
0
            return -1;
4537
0
        }
4538
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
4539
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
4540
        // limited, should not cause the txn commit failed.
4541
70
        for (const auto& key : copy_file_keys) {
4542
70
            txn->remove(key);
4543
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
4544
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
4545
70
                      << ", query_id=" << copy_id;
4546
70
        }
4547
7
        txn->remove(k);
4548
7
        TxnErrorCode err = txn->commit();
4549
7
        if (err != TxnErrorCode::TXN_OK) {
4550
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
4551
0
            return -1;
4552
0
        }
4553
4554
7
        metrics_context.total_recycled_num = ++num_recycled;
4555
7
        metrics_context.report();
4556
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4557
7
        return 0;
4558
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
4559
4560
13
    if (config::enable_recycler_stats_metrics) {
4561
0
        scan_and_statistics_copy_jobs();
4562
0
    }
4563
    // recycle_func and loop_done for scan and recycle
4564
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
4565
13
}
4566
4567
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
4568
                                             const StagePB::StageType& stage_type,
4569
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
4570
5
#ifdef UNIT_TEST
4571
    // In unit test, external use the same accessor as the internal stage
4572
5
    auto it = accessor_map_.find(stage_id);
4573
5
    if (it != accessor_map_.end()) {
4574
3
        *accessor = it->second;
4575
3
    } else {
4576
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
4577
2
        return 1;
4578
2
    }
4579
#else
4580
    // init s3 accessor and add to accessor map
4581
    auto stage_it =
4582
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
4583
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
4584
4585
    if (stage_it == instance_info_.stages().end()) {
4586
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
4587
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
4588
        return 1;
4589
    }
4590
4591
    const auto& object_store_info = stage_it->obj_info();
4592
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
4593
4594
    S3Conf s3_conf;
4595
    if (stage_type == StagePB::EXTERNAL) {
4596
        if (stage_access_type == StagePB::AKSK) {
4597
            auto conf = S3Conf::from_obj_store_info(object_store_info);
4598
            if (!conf) {
4599
                return -1;
4600
            }
4601
4602
            s3_conf = std::move(*conf);
4603
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
4604
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
4605
            if (!conf) {
4606
                return -1;
4607
            }
4608
4609
            s3_conf = std::move(*conf);
4610
            if (instance_info_.ram_user().has_encryption_info()) {
4611
                AkSkPair plain_ak_sk_pair;
4612
                int ret = decrypt_ak_sk_helper(
4613
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
4614
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
4615
                if (ret != 0) {
4616
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
4617
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
4618
                    return -1;
4619
                }
4620
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
4621
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
4622
            } else {
4623
                s3_conf.ak = instance_info_.ram_user().ak();
4624
                s3_conf.sk = instance_info_.ram_user().sk();
4625
            }
4626
        } else {
4627
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
4628
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
4629
            return -1;
4630
        }
4631
    } else if (stage_type == StagePB::INTERNAL) {
4632
        int idx = stoi(object_store_info.id());
4633
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4634
            LOG(WARNING) << "invalid idx: " << idx;
4635
            return -1;
4636
        }
4637
4638
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
4639
        auto conf = S3Conf::from_obj_store_info(old_obj);
4640
        if (!conf) {
4641
            return -1;
4642
        }
4643
4644
        s3_conf = std::move(*conf);
4645
        s3_conf.prefix = object_store_info.prefix();
4646
    } else {
4647
        LOG(WARNING) << "unknown stage type " << stage_type;
4648
        return -1;
4649
    }
4650
4651
    std::shared_ptr<S3Accessor> s3_accessor;
4652
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
4653
    if (ret != 0) {
4654
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
4655
        return -1;
4656
    }
4657
4658
    *accessor = std::move(s3_accessor);
4659
#endif
4660
3
    return 0;
4661
5
}
4662
4663
11
int InstanceRecycler::recycle_stage() {
4664
11
    int64_t num_scanned = 0;
4665
11
    int64_t num_recycled = 0;
4666
11
    const std::string task_name = "recycle_stage";
4667
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4668
4669
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
4670
4671
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4672
11
    register_recycle_task(task_name, start_time);
4673
4674
11
    DORIS_CLOUD_DEFER {
4675
11
        unregister_recycle_task(task_name);
4676
11
        int64_t cost =
4677
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4678
11
        metrics_context.finish_report();
4679
11
        LOG_WARNING("recycle stage, cost={}s", cost)
4680
11
                .tag("instance_id", instance_id_)
4681
11
                .tag("num_scanned", num_scanned)
4682
11
                .tag("num_recycled", num_recycled);
4683
11
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
4674
11
    DORIS_CLOUD_DEFER {
4675
11
        unregister_recycle_task(task_name);
4676
11
        int64_t cost =
4677
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4678
11
        metrics_context.finish_report();
4679
11
        LOG_WARNING("recycle stage, cost={}s", cost)
4680
11
                .tag("instance_id", instance_id_)
4681
11
                .tag("num_scanned", num_scanned)
4682
11
                .tag("num_recycled", num_recycled);
4683
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
4684
4685
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
4686
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
4687
11
    std::string key0 = recycle_stage_key(key_info0);
4688
11
    std::string key1 = recycle_stage_key(key_info1);
4689
4690
11
    std::vector<std::string_view> stage_keys;
4691
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
4692
11
                         this](std::string_view k, std::string_view v) -> int {
4693
1
        ++num_scanned;
4694
1
        RecycleStagePB recycle_stage;
4695
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
4696
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
4697
0
            return -1;
4698
0
        }
4699
4700
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
4701
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4702
0
            LOG(WARNING) << "invalid idx: " << idx;
4703
0
            return -1;
4704
0
        }
4705
4706
1
        std::shared_ptr<StorageVaultAccessor> accessor;
4707
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
4708
1
                [&] {
4709
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
4710
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4711
1
                    if (!s3_conf) {
4712
1
                        return -1;
4713
1
                    }
4714
4715
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
4716
1
                    std::shared_ptr<S3Accessor> s3_accessor;
4717
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
4718
1
                    if (ret != 0) {
4719
1
                        return -1;
4720
1
                    }
4721
4722
1
                    accessor = std::move(s3_accessor);
4723
1
                    return 0;
4724
1
                }(),
4725
1
                "recycle_stage:get_accessor", &accessor);
4726
4727
1
        if (ret != 0) {
4728
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
4729
0
            return ret;
4730
0
        }
4731
4732
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
4733
1
                .tag("instance_id", instance_id_)
4734
1
                .tag("stage_id", recycle_stage.stage().stage_id())
4735
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
4736
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
4737
1
                .tag("obj_info_id", idx)
4738
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
4739
1
        ret = accessor->delete_all();
4740
1
        if (ret != 0) {
4741
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
4742
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
4743
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
4744
0
                         << ", ret=" << ret;
4745
0
            return -1;
4746
0
        }
4747
1
        metrics_context.total_recycled_num = ++num_recycled;
4748
1
        metrics_context.report();
4749
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
4750
1
        stage_keys.push_back(k);
4751
1
        return 0;
4752
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4692
1
                         this](std::string_view k, std::string_view v) -> int {
4693
1
        ++num_scanned;
4694
1
        RecycleStagePB recycle_stage;
4695
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
4696
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
4697
0
            return -1;
4698
0
        }
4699
4700
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
4701
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4702
0
            LOG(WARNING) << "invalid idx: " << idx;
4703
0
            return -1;
4704
0
        }
4705
4706
1
        std::shared_ptr<StorageVaultAccessor> accessor;
4707
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
4708
1
                [&] {
4709
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
4710
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4711
1
                    if (!s3_conf) {
4712
1
                        return -1;
4713
1
                    }
4714
4715
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
4716
1
                    std::shared_ptr<S3Accessor> s3_accessor;
4717
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
4718
1
                    if (ret != 0) {
4719
1
                        return -1;
4720
1
                    }
4721
4722
1
                    accessor = std::move(s3_accessor);
4723
1
                    return 0;
4724
1
                }(),
4725
1
                "recycle_stage:get_accessor", &accessor);
4726
4727
1
        if (ret != 0) {
4728
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
4729
0
            return ret;
4730
0
        }
4731
4732
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
4733
1
                .tag("instance_id", instance_id_)
4734
1
                .tag("stage_id", recycle_stage.stage().stage_id())
4735
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
4736
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
4737
1
                .tag("obj_info_id", idx)
4738
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
4739
1
        ret = accessor->delete_all();
4740
1
        if (ret != 0) {
4741
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
4742
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
4743
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
4744
0
                         << ", ret=" << ret;
4745
0
            return -1;
4746
0
        }
4747
1
        metrics_context.total_recycled_num = ++num_recycled;
4748
1
        metrics_context.report();
4749
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
4750
1
        stage_keys.push_back(k);
4751
1
        return 0;
4752
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
4753
4754
11
    auto loop_done = [&stage_keys, this]() -> int {
4755
1
        if (stage_keys.empty()) return 0;
4756
1
        DORIS_CLOUD_DEFER {
4757
1
            stage_keys.clear();
4758
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4756
1
        DORIS_CLOUD_DEFER {
4757
1
            stage_keys.clear();
4758
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
4759
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
4760
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
4761
0
            return -1;
4762
0
        }
4763
1
        return 0;
4764
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
4754
1
    auto loop_done = [&stage_keys, this]() -> int {
4755
1
        if (stage_keys.empty()) return 0;
4756
1
        DORIS_CLOUD_DEFER {
4757
1
            stage_keys.clear();
4758
1
        };
4759
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
4760
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
4761
0
            return -1;
4762
0
        }
4763
1
        return 0;
4764
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
4765
11
    if (config::enable_recycler_stats_metrics) {
4766
0
        scan_and_statistics_stage();
4767
0
    }
4768
    // recycle_func and loop_done for scan and recycle
4769
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
4770
11
}
4771
4772
10
int InstanceRecycler::recycle_expired_stage_objects() {
4773
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
4774
4775
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4776
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
4777
4778
10
    DORIS_CLOUD_DEFER {
4779
10
        int64_t cost =
4780
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4781
10
        metrics_context.finish_report();
4782
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
4783
10
                .tag("instance_id", instance_id_);
4784
10
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
4778
10
    DORIS_CLOUD_DEFER {
4779
10
        int64_t cost =
4780
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4781
10
        metrics_context.finish_report();
4782
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
4783
10
                .tag("instance_id", instance_id_);
4784
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
4785
4786
10
    int ret = 0;
4787
4788
10
    if (config::enable_recycler_stats_metrics) {
4789
0
        scan_and_statistics_expired_stage_objects();
4790
0
    }
4791
4792
10
    for (const auto& stage : instance_info_.stages()) {
4793
0
        std::stringstream ss;
4794
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
4795
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
4796
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
4797
0
           << ", prefix=" << stage.obj_info().prefix();
4798
4799
0
        if (stopped()) {
4800
0
            break;
4801
0
        }
4802
0
        if (stage.type() == StagePB::EXTERNAL) {
4803
0
            continue;
4804
0
        }
4805
0
        int idx = stoi(stage.obj_info().id());
4806
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
4807
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
4808
0
            continue;
4809
0
        }
4810
4811
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
4812
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
4813
0
        if (!s3_conf) {
4814
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
4815
0
            continue;
4816
0
        }
4817
4818
0
        s3_conf->prefix = stage.obj_info().prefix();
4819
0
        std::shared_ptr<S3Accessor> accessor;
4820
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
4821
0
        if (ret1 != 0) {
4822
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
4823
0
            ret = -1;
4824
0
            continue;
4825
0
        }
4826
4827
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
4828
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
4829
0
            ret = -1;
4830
0
            continue;
4831
0
        }
4832
4833
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
4834
0
        int64_t expiration_time =
4835
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
4836
0
                config::internal_stage_objects_expire_time_second;
4837
0
        if (config::force_immediate_recycle) {
4838
0
            expiration_time = INT64_MAX;
4839
0
        }
4840
0
        ret1 = accessor->delete_all(expiration_time);
4841
0
        if (ret1 != 0) {
4842
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
4843
0
                         << ss.str();
4844
0
            ret = -1;
4845
0
            continue;
4846
0
        }
4847
0
        metrics_context.total_recycled_num++;
4848
0
        metrics_context.report();
4849
0
    }
4850
10
    return ret;
4851
10
}
4852
4853
146
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
4854
146
    std::lock_guard lock(recycle_tasks_mutex);
4855
146
    running_recycle_tasks[task_name] = start_time;
4856
146
}
4857
4858
146
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
4859
146
    std::lock_guard lock(recycle_tasks_mutex);
4860
146
    DCHECK(running_recycle_tasks[task_name] > 0);
4861
146
    running_recycle_tasks.erase(task_name);
4862
146
}
4863
4864
21
bool InstanceRecycler::check_recycle_tasks() {
4865
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
4866
21
    {
4867
21
        std::lock_guard lock(recycle_tasks_mutex);
4868
21
        tmp_running_recycle_tasks = running_recycle_tasks;
4869
21
    }
4870
4871
21
    bool found = false;
4872
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4873
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
4874
20
        int64_t cost = now - start_time;
4875
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
4876
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
4877
20
                    .tag("instance_id", instance_id_)
4878
20
                    .tag("task", task_name);
4879
20
            found = true;
4880
20
        }
4881
20
    }
4882
4883
21
    return found;
4884
21
}
4885
4886
// Scan and statistics indexes that need to be recycled
4887
0
int InstanceRecycler::scan_and_statistics_indexes() {
4888
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
4889
4890
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
4891
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
4892
0
    std::string index_key0;
4893
0
    std::string index_key1;
4894
0
    recycle_index_key(index_key_info0, &index_key0);
4895
0
    recycle_index_key(index_key_info1, &index_key1);
4896
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4897
4898
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
4899
0
        RecycleIndexPB index_pb;
4900
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
4901
0
            return 0;
4902
0
        }
4903
0
        int64_t current_time = ::time(nullptr);
4904
0
        if (current_time <
4905
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
4906
0
            return 0;
4907
0
        }
4908
        // decode index_id
4909
0
        auto k1 = k;
4910
0
        k1.remove_prefix(1);
4911
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4912
0
        decode_key(&k1, &out);
4913
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
4914
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
4915
0
        std::unique_ptr<Transaction> txn;
4916
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4917
0
        if (err != TxnErrorCode::TXN_OK) {
4918
0
            return 0;
4919
0
        }
4920
0
        std::string val;
4921
0
        err = txn->get(k, &val);
4922
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
4923
0
            return 0;
4924
0
        }
4925
0
        if (err != TxnErrorCode::TXN_OK) {
4926
0
            return 0;
4927
0
        }
4928
0
        index_pb.Clear();
4929
0
        if (!index_pb.ParseFromString(val)) {
4930
0
            return 0;
4931
0
        }
4932
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
4933
0
            return 0;
4934
0
        }
4935
0
        metrics_context.total_need_recycle_num++;
4936
0
        return 0;
4937
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
4938
4939
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
4940
0
    metrics_context.report(true);
4941
0
    segment_metrics_context_.report(true);
4942
0
    tablet_metrics_context_.report(true);
4943
0
    return ret;
4944
0
}
4945
4946
// Scan and statistics partitions that need to be recycled
4947
0
int InstanceRecycler::scan_and_statistics_partitions() {
4948
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
4949
4950
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
4951
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
4952
0
    std::string part_key0;
4953
0
    std::string part_key1;
4954
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4955
4956
0
    recycle_partition_key(part_key_info0, &part_key0);
4957
0
    recycle_partition_key(part_key_info1, &part_key1);
4958
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
4959
0
        RecyclePartitionPB part_pb;
4960
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
4961
0
            return 0;
4962
0
        }
4963
0
        int64_t current_time = ::time(nullptr);
4964
0
        if (current_time <
4965
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
4966
0
            return 0;
4967
0
        }
4968
        // decode partition_id
4969
0
        auto k1 = k;
4970
0
        k1.remove_prefix(1);
4971
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4972
0
        decode_key(&k1, &out);
4973
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
4974
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
4975
        // Change state to RECYCLING
4976
0
        std::unique_ptr<Transaction> txn;
4977
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4978
0
        if (err != TxnErrorCode::TXN_OK) {
4979
0
            return 0;
4980
0
        }
4981
0
        std::string val;
4982
0
        err = txn->get(k, &val);
4983
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
4984
0
            return 0;
4985
0
        }
4986
0
        if (err != TxnErrorCode::TXN_OK) {
4987
0
            return 0;
4988
0
        }
4989
0
        part_pb.Clear();
4990
0
        if (!part_pb.ParseFromString(val)) {
4991
0
            return 0;
4992
0
        }
4993
        // Partitions with PREPARED state MUST have no data
4994
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
4995
0
        int ret = 0;
4996
0
        for (int64_t index_id : part_pb.index_id()) {
4997
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
4998
0
                                            partition_id, is_empty_tablet) != 0) {
4999
0
                ret = 0;
5000
0
            }
5001
0
        }
5002
0
        metrics_context.total_need_recycle_num++;
5003
0
        return ret;
5004
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5005
5006
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
5007
0
    metrics_context.report(true);
5008
0
    segment_metrics_context_.report(true);
5009
0
    tablet_metrics_context_.report(true);
5010
0
    return ret;
5011
0
}
5012
5013
// Scan and statistics rowsets that need to be recycled
5014
0
int InstanceRecycler::scan_and_statistics_rowsets() {
5015
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
5016
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
5017
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
5018
0
    std::string recyc_rs_key0;
5019
0
    std::string recyc_rs_key1;
5020
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
5021
0
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
5022
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5023
5024
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5025
0
        RecycleRowsetPB rowset;
5026
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5027
0
            return 0;
5028
0
        }
5029
0
        int64_t current_time = ::time(nullptr);
5030
0
        if (current_time <
5031
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
5032
0
            return 0;
5033
0
        }
5034
0
        if (!rowset.has_type()) {
5035
0
            if (!rowset.has_resource_id()) [[unlikely]] {
5036
0
                return 0;
5037
0
            }
5038
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5039
0
                return 0;
5040
0
            }
5041
0
            metrics_context.total_need_recycle_num++;
5042
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
5043
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
5044
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
5045
0
            return 0;
5046
0
        }
5047
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
5048
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
5049
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
5050
0
                return 0;
5051
0
            }
5052
0
        }
5053
0
        metrics_context.total_need_recycle_num++;
5054
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
5055
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
5056
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
5057
0
        return 0;
5058
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5059
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
5060
0
    metrics_context.report(true);
5061
0
    segment_metrics_context_.report(true);
5062
0
    return ret;
5063
0
}
5064
5065
// Scan and statistics tmp_rowsets that need to be recycled
5066
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
5067
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
5068
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5069
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5070
0
    std::string tmp_rs_key0;
5071
0
    std::string tmp_rs_key1;
5072
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5073
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5074
5075
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5076
5077
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
5078
0
        doris::RowsetMetaCloudPB rowset;
5079
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5080
0
            return 0;
5081
0
        }
5082
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5083
0
        int64_t current_time = ::time(nullptr);
5084
0
        if (current_time < expiration) {
5085
0
            return 0;
5086
0
        }
5087
5088
0
        DCHECK_GT(rowset.txn_id(), 0)
5089
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
5090
0
        if (!is_txn_finished(txn_kv_, instance_id_, rowset.txn_id())) {
5091
0
            return 0;
5092
0
        }
5093
5094
0
        if (!rowset.has_resource_id()) {
5095
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5096
0
                return 0;
5097
0
            }
5098
0
            return 0;
5099
0
        }
5100
5101
0
        metrics_context.total_need_recycle_num++;
5102
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
5103
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
5104
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
5105
0
        return 0;
5106
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5107
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
5108
0
    metrics_context.report(true);
5109
0
    segment_metrics_context_.report(true);
5110
0
    return ret;
5111
0
}
5112
5113
// Scan and statistics abort_timeout_txn that need to be recycled
5114
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
5115
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
5116
5117
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
5118
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5119
0
    std::string begin_txn_running_key;
5120
0
    std::string end_txn_running_key;
5121
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
5122
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
5123
5124
0
    int64_t current_time =
5125
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5126
5127
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
5128
0
                                               std::string_view k, std::string_view v) -> int {
5129
0
        std::unique_ptr<Transaction> txn;
5130
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5131
0
        if (err != TxnErrorCode::TXN_OK) {
5132
0
            return 0;
5133
0
        }
5134
0
        std::string_view k1 = k;
5135
0
        k1.remove_prefix(1);
5136
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5137
0
        if (decode_key(&k1, &out) != 0) {
5138
0
            return 0;
5139
0
        }
5140
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5141
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5142
        // Update txn_info
5143
0
        std::string txn_inf_key, txn_inf_val;
5144
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5145
0
        err = txn->get(txn_inf_key, &txn_inf_val);
5146
0
        if (err != TxnErrorCode::TXN_OK) {
5147
0
            return 0;
5148
0
        }
5149
0
        TxnInfoPB txn_info;
5150
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
5151
0
            return 0;
5152
0
        }
5153
5154
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
5155
0
            TxnRunningPB txn_running_pb;
5156
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5157
0
                return 0;
5158
0
            }
5159
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5160
0
                return 0;
5161
0
            }
5162
0
            metrics_context.total_need_recycle_num++;
5163
0
        }
5164
0
        return 0;
5165
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5166
5167
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
5168
0
    metrics_context.report(true);
5169
0
    return ret;
5170
0
}
5171
5172
// Scan and statistics expired_txn_label that need to be recycled
5173
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
5174
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
5175
5176
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
5177
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5178
0
    std::string begin_recycle_txn_key;
5179
0
    std::string end_recycle_txn_key;
5180
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
5181
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
5182
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5183
0
    int64_t current_time_ms =
5184
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5185
5186
    // for calculate the total num or bytes of recyled objects
5187
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
5188
0
        RecycleTxnPB recycle_txn_pb;
5189
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5190
0
            return 0;
5191
0
        }
5192
0
        if ((config::force_immediate_recycle) ||
5193
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5194
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5195
0
             current_time_ms)) {
5196
0
            metrics_context.total_need_recycle_num++;
5197
0
        }
5198
0
        return 0;
5199
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5200
5201
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
5202
0
    metrics_context.report(true);
5203
0
    return ret;
5204
0
}
5205
5206
// Scan and statistics copy_jobs that need to be recycled
5207
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
5208
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
5209
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
5210
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
5211
0
    std::string key0;
5212
0
    std::string key1;
5213
0
    copy_job_key(key_info0, &key0);
5214
0
    copy_job_key(key_info1, &key1);
5215
5216
    // for calculate the total num or bytes of recyled objects
5217
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
5218
0
        CopyJobPB copy_job;
5219
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
5220
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
5221
0
            return 0;
5222
0
        }
5223
5224
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
5225
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
5226
0
                int64_t current_time =
5227
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5228
0
                if (copy_job.finish_time_ms() > 0) {
5229
0
                    if (!config::force_immediate_recycle &&
5230
0
                        current_time < copy_job.finish_time_ms() +
5231
0
                                               config::copy_job_max_retention_second * 1000) {
5232
0
                        return 0;
5233
0
                    }
5234
0
                } else {
5235
0
                    if (!config::force_immediate_recycle &&
5236
0
                        current_time < copy_job.start_time_ms() +
5237
0
                                               config::copy_job_max_retention_second * 1000) {
5238
0
                        return 0;
5239
0
                    }
5240
0
                }
5241
0
            }
5242
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
5243
0
            int64_t current_time =
5244
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5245
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
5246
0
                return 0;
5247
0
            }
5248
0
        }
5249
0
        metrics_context.total_need_recycle_num++;
5250
0
        return 0;
5251
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5252
5253
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
5254
0
    metrics_context.report(true);
5255
0
    return ret;
5256
0
}
5257
5258
// Scan and statistics stage that need to be recycled
5259
0
int InstanceRecycler::scan_and_statistics_stage() {
5260
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
5261
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
5262
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
5263
0
    std::string key0 = recycle_stage_key(key_info0);
5264
0
    std::string key1 = recycle_stage_key(key_info1);
5265
5266
    // for calculate the total num or bytes of recyled objects
5267
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
5268
0
                                                        std::string_view v) -> int {
5269
0
        RecycleStagePB recycle_stage;
5270
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
5271
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
5272
0
            return 0;
5273
0
        }
5274
5275
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
5276
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
5277
0
            LOG(WARNING) << "invalid idx: " << idx;
5278
0
            return 0;
5279
0
        }
5280
5281
0
        std::shared_ptr<StorageVaultAccessor> accessor;
5282
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
5283
0
                [&] {
5284
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
5285
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5286
0
                    if (!s3_conf) {
5287
0
                        return 0;
5288
0
                    }
5289
5290
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
5291
0
                    std::shared_ptr<S3Accessor> s3_accessor;
5292
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
5293
0
                    if (ret != 0) {
5294
0
                        return 0;
5295
0
                    }
5296
5297
0
                    accessor = std::move(s3_accessor);
5298
0
                    return 0;
5299
0
                }(),
5300
0
                "recycle_stage:get_accessor", &accessor);
5301
5302
0
        if (ret != 0) {
5303
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
5304
0
            return 0;
5305
0
        }
5306
5307
0
        metrics_context.total_need_recycle_num++;
5308
0
        return 0;
5309
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5310
5311
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
5312
0
    metrics_context.report(true);
5313
0
    return ret;
5314
0
}
5315
5316
// Scan and statistics expired_stage_objects that need to be recycled
5317
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
5318
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
5319
5320
    // for calculate the total num or bytes of recyled objects
5321
0
    auto scan_and_statistics = [&metrics_context, this]() {
5322
0
        for (const auto& stage : instance_info_.stages()) {
5323
0
            if (stopped()) {
5324
0
                break;
5325
0
            }
5326
0
            if (stage.type() == StagePB::EXTERNAL) {
5327
0
                continue;
5328
0
            }
5329
0
            int idx = stoi(stage.obj_info().id());
5330
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
5331
0
                continue;
5332
0
            }
5333
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
5334
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
5335
0
            if (!s3_conf) {
5336
0
                continue;
5337
0
            }
5338
0
            s3_conf->prefix = stage.obj_info().prefix();
5339
0
            std::shared_ptr<S3Accessor> accessor;
5340
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
5341
0
            if (ret1 != 0) {
5342
0
                continue;
5343
0
            }
5344
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
5345
0
                continue;
5346
0
            }
5347
0
            metrics_context.total_need_recycle_num++;
5348
0
        }
5349
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
5350
5351
0
    scan_and_statistics();
5352
0
    metrics_context.report(true);
5353
0
    return 0;
5354
0
}
5355
5356
// Scan and statistics versions that need to be recycled
5357
0
int InstanceRecycler::scan_and_statistics_versions() {
5358
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
5359
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
5360
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
5361
5362
0
    int64_t last_scanned_table_id = 0;
5363
0
    bool is_recycled = false; // Is last scanned kv recycled
5364
    // for calculate the total num or bytes of recyled objects
5365
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
5366
0
                                       std::string_view k, std::string_view) {
5367
0
        auto k1 = k;
5368
0
        k1.remove_prefix(1);
5369
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
5370
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5371
0
        decode_key(&k1, &out);
5372
0
        DCHECK_EQ(out.size(), 6) << k;
5373
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
5374
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
5375
0
            metrics_context.total_need_recycle_num +=
5376
0
                    is_recycled; // Version kv of this table has been recycled
5377
0
            return 0;
5378
0
        }
5379
0
        last_scanned_table_id = table_id;
5380
0
        is_recycled = false;
5381
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
5382
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
5383
0
        std::unique_ptr<Transaction> txn;
5384
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5385
0
        if (err != TxnErrorCode::TXN_OK) {
5386
0
            return 0;
5387
0
        }
5388
0
        std::unique_ptr<RangeGetIterator> iter;
5389
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
5390
0
        if (err != TxnErrorCode::TXN_OK) {
5391
0
            return 0;
5392
0
        }
5393
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
5394
0
            return 0;
5395
0
        }
5396
0
        metrics_context.total_need_recycle_num++;
5397
0
        is_recycled = true;
5398
0
        return 0;
5399
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5400
5401
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
5402
0
    metrics_context.report(true);
5403
0
    return ret;
5404
0
}
5405
5406
// Scan and statistics restore jobs that need to be recycled
5407
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
5408
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
5409
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
5410
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
5411
0
    std::string restore_job_key0;
5412
0
    std::string restore_job_key1;
5413
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
5414
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
5415
5416
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5417
5418
    // for calculate the total num or bytes of recyled objects
5419
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
5420
0
        RestoreJobCloudPB restore_job_pb;
5421
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5422
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5423
0
            return 0;
5424
0
        }
5425
0
        int64_t expiration =
5426
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5427
0
        int64_t current_time = ::time(nullptr);
5428
0
        if (current_time < expiration) { // not expired
5429
0
            return 0;
5430
0
        }
5431
0
        metrics_context.total_need_recycle_num++;
5432
0
        if(restore_job_pb.need_recycle_data()) {
5433
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
5434
0
        }
5435
0
        return 0;
5436
0
    };
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
5437
5438
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
5439
0
    metrics_context.report(true);
5440
0
    return ret;
5441
0
}
5442
5443
} // namespace doris::cloud