Coverage Report

Created: 2026-05-12 00:20

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <optional>
40
#include <random>
41
#include <string>
42
#include <string_view>
43
#include <thread>
44
#include <unordered_map>
45
#include <utility>
46
#include <variant>
47
48
#include "common/defer.h"
49
#include "common/stopwatch.h"
50
#include "meta-service/meta_service.h"
51
#include "meta-service/meta_service_helper.h"
52
#include "meta-service/meta_service_schema.h"
53
#include "meta-store/blob_message.h"
54
#include "meta-store/meta_reader.h"
55
#include "meta-store/txn_kv.h"
56
#include "meta-store/txn_kv_error.h"
57
#include "meta-store/versioned_value.h"
58
#include "recycler/checker.h"
59
#ifdef ENABLE_HDFS_STORAGE_VAULT
60
#include "recycler/hdfs_accessor.h"
61
#endif
62
#include "recycler/s3_accessor.h"
63
#include "recycler/storage_vault_accessor.h"
64
#ifdef UNIT_TEST
65
#include "../test/mock_accessor.h"
66
#endif
67
#include "common/bvars.h"
68
#include "common/config.h"
69
#include "common/encryption_util.h"
70
#include "common/logging.h"
71
#include "common/simple_thread_pool.h"
72
#include "common/util.h"
73
#include "cpp/sync_point.h"
74
#include "meta-store/codec.h"
75
#include "meta-store/document_message.h"
76
#include "meta-store/keys.h"
77
#include "recycler/recycler_service.h"
78
#include "recycler/sync_executor.h"
79
#include "recycler/util.h"
80
#include "snapshot/snapshot_manager_factory.h"
81
82
namespace doris::cloud {
83
84
using namespace std::chrono;
85
86
namespace {
87
88
0
int64_t packed_file_retry_sleep_ms() {
89
0
    const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms);
90
0
    const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms);
91
0
    thread_local std::mt19937_64 gen(std::random_device {}());
92
0
    std::uniform_int_distribution<int64_t> dist(min_ms, max_ms);
93
0
    return dist(gen);
94
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
95
96
0
void sleep_for_packed_file_retry() {
97
0
    std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms()));
98
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
99
100
} // namespace
101
102
// return 0 for success get a key, 1 for key not found, negative for error
103
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
104
0
    std::unique_ptr<Transaction> txn;
105
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
106
0
    if (err != TxnErrorCode::TXN_OK) {
107
0
        return -1;
108
0
    }
109
0
    switch (txn->get(key, &val, true)) {
110
0
    case TxnErrorCode::TXN_OK:
111
0
        return 0;
112
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
113
0
        return 1;
114
0
    default:
115
0
        return -1;
116
0
    };
117
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
118
119
// 0 for success, negative for error
120
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
121
337
                   std::unique_ptr<RangeGetIterator>& it) {
122
337
    std::unique_ptr<Transaction> txn;
123
337
    TxnErrorCode err = txn_kv->create_txn(&txn);
124
337
    if (err != TxnErrorCode::TXN_OK) {
125
0
        return -1;
126
0
    }
127
337
    switch (txn->get(begin, end, &it, true)) {
128
337
    case TxnErrorCode::TXN_OK:
129
337
        return 0;
130
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
131
0
        return 1;
132
0
    default:
133
0
        return -1;
134
337
    };
135
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
121
31
                   std::unique_ptr<RangeGetIterator>& it) {
122
31
    std::unique_ptr<Transaction> txn;
123
31
    TxnErrorCode err = txn_kv->create_txn(&txn);
124
31
    if (err != TxnErrorCode::TXN_OK) {
125
0
        return -1;
126
0
    }
127
31
    switch (txn->get(begin, end, &it, true)) {
128
31
    case TxnErrorCode::TXN_OK:
129
31
        return 0;
130
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
131
0
        return 1;
132
0
    default:
133
0
        return -1;
134
31
    };
135
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
121
306
                   std::unique_ptr<RangeGetIterator>& it) {
122
306
    std::unique_ptr<Transaction> txn;
123
306
    TxnErrorCode err = txn_kv->create_txn(&txn);
124
306
    if (err != TxnErrorCode::TXN_OK) {
125
0
        return -1;
126
0
    }
127
306
    switch (txn->get(begin, end, &it, true)) {
128
306
    case TxnErrorCode::TXN_OK:
129
306
        return 0;
130
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
131
0
        return 1;
132
0
    default:
133
0
        return -1;
134
306
    };
135
0
}
136
137
// return 0 for success otherwise error
138
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
139
6
    std::unique_ptr<Transaction> txn;
140
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
141
6
    if (err != TxnErrorCode::TXN_OK) {
142
0
        return -1;
143
0
    }
144
10
    for (auto k : keys) {
145
10
        txn->remove(k);
146
10
    }
147
6
    switch (txn->commit()) {
148
6
    case TxnErrorCode::TXN_OK:
149
6
        return 0;
150
0
    case TxnErrorCode::TXN_CONFLICT:
151
0
        return -1;
152
0
    default:
153
0
        return -1;
154
6
    }
155
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
138
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
139
1
    std::unique_ptr<Transaction> txn;
140
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
141
1
    if (err != TxnErrorCode::TXN_OK) {
142
0
        return -1;
143
0
    }
144
1
    for (auto k : keys) {
145
1
        txn->remove(k);
146
1
    }
147
1
    switch (txn->commit()) {
148
1
    case TxnErrorCode::TXN_OK:
149
1
        return 0;
150
0
    case TxnErrorCode::TXN_CONFLICT:
151
0
        return -1;
152
0
    default:
153
0
        return -1;
154
1
    }
155
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
138
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
139
5
    std::unique_ptr<Transaction> txn;
140
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
141
5
    if (err != TxnErrorCode::TXN_OK) {
142
0
        return -1;
143
0
    }
144
9
    for (auto k : keys) {
145
9
        txn->remove(k);
146
9
    }
147
5
    switch (txn->commit()) {
148
5
    case TxnErrorCode::TXN_OK:
149
5
        return 0;
150
0
    case TxnErrorCode::TXN_CONFLICT:
151
0
        return -1;
152
0
    default:
153
0
        return -1;
154
5
    }
155
5
}
156
157
// return 0 for success otherwise error
158
139
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
159
139
    std::unique_ptr<Transaction> txn;
160
139
    TxnErrorCode err = txn_kv->create_txn(&txn);
161
139
    if (err != TxnErrorCode::TXN_OK) {
162
0
        return -1;
163
0
    }
164
106k
    for (auto& k : keys) {
165
106k
        txn->remove(k);
166
106k
    }
167
139
    switch (txn->commit()) {
168
139
    case TxnErrorCode::TXN_OK:
169
139
        return 0;
170
0
    case TxnErrorCode::TXN_CONFLICT:
171
0
        return -1;
172
0
    default:
173
0
        return -1;
174
139
    }
175
139
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
158
33
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
159
33
    std::unique_ptr<Transaction> txn;
160
33
    TxnErrorCode err = txn_kv->create_txn(&txn);
161
33
    if (err != TxnErrorCode::TXN_OK) {
162
0
        return -1;
163
0
    }
164
33
    for (auto& k : keys) {
165
16
        txn->remove(k);
166
16
    }
167
33
    switch (txn->commit()) {
168
33
    case TxnErrorCode::TXN_OK:
169
33
        return 0;
170
0
    case TxnErrorCode::TXN_CONFLICT:
171
0
        return -1;
172
0
    default:
173
0
        return -1;
174
33
    }
175
33
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
158
106
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
159
106
    std::unique_ptr<Transaction> txn;
160
106
    TxnErrorCode err = txn_kv->create_txn(&txn);
161
106
    if (err != TxnErrorCode::TXN_OK) {
162
0
        return -1;
163
0
    }
164
106k
    for (auto& k : keys) {
165
106k
        txn->remove(k);
166
106k
    }
167
106
    switch (txn->commit()) {
168
106
    case TxnErrorCode::TXN_OK:
169
106
        return 0;
170
0
    case TxnErrorCode::TXN_CONFLICT:
171
0
        return -1;
172
0
    default:
173
0
        return -1;
174
106
    }
175
106
}
176
177
// return 0 for success otherwise error
178
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
179
106k
                                       std::string_view end) {
180
106k
    std::unique_ptr<Transaction> txn;
181
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
182
106k
    if (err != TxnErrorCode::TXN_OK) {
183
0
        return -1;
184
0
    }
185
106k
    txn->remove(begin, end);
186
106k
    switch (txn->commit()) {
187
106k
    case TxnErrorCode::TXN_OK:
188
106k
        return 0;
189
0
    case TxnErrorCode::TXN_CONFLICT:
190
0
        return -1;
191
0
    default:
192
0
        return -1;
193
106k
    }
194
106k
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
179
16
                                       std::string_view end) {
180
16
    std::unique_ptr<Transaction> txn;
181
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
182
16
    if (err != TxnErrorCode::TXN_OK) {
183
0
        return -1;
184
0
    }
185
16
    txn->remove(begin, end);
186
16
    switch (txn->commit()) {
187
16
    case TxnErrorCode::TXN_OK:
188
16
        return 0;
189
0
    case TxnErrorCode::TXN_CONFLICT:
190
0
        return -1;
191
0
    default:
192
0
        return -1;
193
16
    }
194
16
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
179
106k
                                       std::string_view end) {
180
106k
    std::unique_ptr<Transaction> txn;
181
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
182
106k
    if (err != TxnErrorCode::TXN_OK) {
183
0
        return -1;
184
0
    }
185
106k
    txn->remove(begin, end);
186
106k
    switch (txn->commit()) {
187
106k
    case TxnErrorCode::TXN_OK:
188
106k
        return 0;
189
0
    case TxnErrorCode::TXN_CONFLICT:
190
0
        return -1;
191
0
    default:
192
0
        return -1;
193
106k
    }
194
106k
}
195
196
void scan_restore_job_rowset(
197
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
198
        std::string& msg,
199
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
200
201
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
202
                                      int64_t num_scanned, int64_t num_recycled,
203
47
                                      int64_t start_time) {
204
47
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
205
0
        int64_t cost =
206
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
207
0
        if (cost > config::recycle_task_threshold_seconds) {
208
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
209
0
                    .tag("instance_id", instance_id)
210
0
                    .tag("task", task_name)
211
0
                    .tag("num_scanned", num_scanned)
212
0
                    .tag("num_recycled", num_recycled);
213
0
        }
214
0
    }
215
47
    return;
216
47
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
203
2
                                      int64_t start_time) {
204
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
205
0
        int64_t cost =
206
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
207
0
        if (cost > config::recycle_task_threshold_seconds) {
208
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
209
0
                    .tag("instance_id", instance_id)
210
0
                    .tag("task", task_name)
211
0
                    .tag("num_scanned", num_scanned)
212
0
                    .tag("num_recycled", num_recycled);
213
0
        }
214
0
    }
215
2
    return;
216
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
203
45
                                      int64_t start_time) {
204
45
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
205
0
        int64_t cost =
206
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
207
0
        if (cost > config::recycle_task_threshold_seconds) {
208
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
209
0
                    .tag("instance_id", instance_id)
210
0
                    .tag("task", task_name)
211
0
                    .tag("num_scanned", num_scanned)
212
0
                    .tag("num_recycled", num_recycled);
213
0
        }
214
0
    }
215
45
    return;
216
45
}
217
218
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
219
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
220
221
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
222
4
                                                               "s3_producer_pool");
223
4
    s3_producer_pool->start();
224
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
225
4
                                                                  "recycle_tablet_pool");
226
4
    recycle_tablet_pool->start();
227
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
228
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
229
4
    group_recycle_function_pool->start();
230
4
    _thread_pool_group =
231
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
232
4
                                    std::move(group_recycle_function_pool));
233
234
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
235
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
236
4
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
237
4
}
238
239
4
Recycler::~Recycler() {
240
4
    if (!stopped()) {
241
0
        stop();
242
0
    }
243
4
}
244
245
4
void Recycler::instance_scanner_callback() {
246
    // sleep 60 seconds before scheduling for the launch procedure to complete:
247
    // some bad hdfs connection may cause some log to stdout stderr
248
    // which may pollute .out file and affect the script to check success
249
4
    std::this_thread::sleep_for(
250
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
251
8
    while (!stopped()) {
252
4
        std::vector<InstanceInfoPB> instances;
253
4
        get_all_instances(txn_kv_.get(), instances);
254
        // TODO(plat1ko): delete job recycle kv of non-existent instances
255
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
256
4
            std::stringstream ss;
257
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
258
4
            return ss.str();
259
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
255
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
256
4
            std::stringstream ss;
257
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
258
4
            return ss.str();
259
4
        }();
260
4
        if (!instances.empty()) {
261
            // enqueue instances
262
3
            std::lock_guard lock(mtx_);
263
30
            for (auto& instance : instances) {
264
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
265
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
266
                // skip instance already in pending queue
267
30
                if (success) {
268
30
                    pending_instance_queue_.push_back(std::move(instance));
269
30
                }
270
30
            }
271
3
            pending_instance_cond_.notify_all();
272
3
        }
273
4
        {
274
4
            std::unique_lock lock(mtx_);
275
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
276
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
276
7
                               [&]() { return stopped(); });
277
4
        }
278
4
    }
279
4
}
280
281
8
void Recycler::recycle_callback() {
282
38
    while (!stopped()) {
283
38
        InstanceInfoPB instance;
284
38
        {
285
38
            std::unique_lock lock(mtx_);
286
38
            pending_instance_cond_.wait(
287
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
287
51
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
288
38
            if (stopped()) {
289
8
                return;
290
8
            }
291
30
            instance = std::move(pending_instance_queue_.front());
292
30
            pending_instance_queue_.pop_front();
293
30
            pending_instance_set_.erase(instance.instance_id());
294
30
        }
295
0
        auto& instance_id = instance.instance_id();
296
30
        {
297
30
            std::lock_guard lock(mtx_);
298
            // skip instance in recycling
299
30
            if (recycling_instance_map_.count(instance_id)) continue;
300
30
        }
301
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
302
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
303
304
30
        if (int r = instance_recycler->init(); r != 0) {
305
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
306
0
                         << " ret=" << r;
307
0
            continue;
308
0
        }
309
30
        std::string recycle_job_key;
310
30
        job_recycle_key({instance_id}, &recycle_job_key);
311
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
312
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
313
30
        if (ret != 0) { // Prepare failed
314
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
315
20
                         << " ret=" << ret;
316
20
            continue;
317
20
        } else {
318
10
            std::lock_guard lock(mtx_);
319
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
320
10
        }
321
10
        if (stopped()) return;
322
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
323
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
324
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
325
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
326
10
        ret = instance_recycler->do_recycle();
327
        // If instance recycler has been aborted, don't finish this job
328
329
10
        if (!instance_recycler->stopped()) {
330
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
331
10
                                        ret == 0, ctime_ms);
332
10
        }
333
10
        if (instance_recycler->stopped() || ret != 0) {
334
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
335
0
        }
336
10
        {
337
10
            std::lock_guard lock(mtx_);
338
10
            recycling_instance_map_.erase(instance_id);
339
10
        }
340
341
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
342
10
        auto elpased_ms = now - ctime_ms;
343
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
344
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
345
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
346
10
                                             now + config::recycle_interval_seconds * 1000);
347
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
348
10
        LOG(INFO) << "recycle instance done, "
349
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
350
10
                  << " now: " << now;
351
352
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
353
354
10
        LOG_WARNING("finish recycle instance")
355
10
                .tag("instance_id", instance_id)
356
10
                .tag("cost_ms", elpased_ms);
357
10
    }
358
8
}
359
360
4
void Recycler::lease_recycle_jobs() {
361
54
    while (!stopped()) {
362
50
        std::vector<std::string> instances;
363
50
        instances.reserve(recycling_instance_map_.size());
364
50
        {
365
50
            std::lock_guard lock(mtx_);
366
50
            for (auto& [id, _] : recycling_instance_map_) {
367
30
                instances.push_back(id);
368
30
            }
369
50
        }
370
50
        for (auto& i : instances) {
371
30
            std::string recycle_job_key;
372
30
            job_recycle_key({i}, &recycle_job_key);
373
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
374
30
            if (ret == 1) {
375
0
                std::lock_guard lock(mtx_);
376
0
                if (auto it = recycling_instance_map_.find(i);
377
0
                    it != recycling_instance_map_.end()) {
378
0
                    it->second->stop();
379
0
                }
380
0
            }
381
30
        }
382
50
        {
383
50
            std::unique_lock lock(mtx_);
384
50
            notifier_.wait_for(lock,
385
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
386
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
386
100
                               [&]() { return stopped(); });
387
50
        }
388
50
    }
389
4
}
390
391
4
void Recycler::check_recycle_tasks() {
392
7
    while (!stopped()) {
393
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
394
3
        {
395
3
            std::lock_guard lock(mtx_);
396
3
            recycling_instance_map = recycling_instance_map_;
397
3
        }
398
3
        for (auto& entry : recycling_instance_map) {
399
0
            entry.second->check_recycle_tasks();
400
0
        }
401
402
3
        std::unique_lock lock(mtx_);
403
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
404
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
404
6
                           [&]() { return stopped(); });
405
3
    }
406
4
}
407
408
4
int Recycler::start(brpc::Server* server) {
409
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
410
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
411
4
    S3Environment::getInstance();
412
413
4
    if (config::enable_checker) {
414
0
        checker_ = std::make_unique<Checker>(txn_kv_);
415
0
        int ret = checker_->start();
416
0
        std::string msg;
417
0
        if (ret != 0) {
418
0
            msg = "failed to start checker";
419
0
            LOG(ERROR) << msg;
420
0
            std::cerr << msg << std::endl;
421
0
            return ret;
422
0
        }
423
0
        msg = "checker started";
424
0
        LOG(INFO) << msg;
425
0
        std::cout << msg << std::endl;
426
0
    }
427
428
4
    if (server) {
429
        // Add service
430
1
        auto recycler_service =
431
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
432
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
433
1
    }
434
435
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
435
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
436
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
437
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
437
8
        workers_.emplace_back([this] { recycle_callback(); });
438
8
    }
439
440
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
441
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
442
443
4
    if (config::enable_snapshot_data_migrator) {
444
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
445
0
        int ret = snapshot_data_migrator_->start();
446
0
        if (ret != 0) {
447
0
            LOG(ERROR) << "failed to start snapshot data migrator";
448
0
            return ret;
449
0
        }
450
0
        LOG(INFO) << "snapshot data migrator started";
451
0
    }
452
453
4
    if (config::enable_snapshot_chain_compactor) {
454
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
455
0
        int ret = snapshot_chain_compactor_->start();
456
0
        if (ret != 0) {
457
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
458
0
            return ret;
459
0
        }
460
0
        LOG(INFO) << "snapshot chain compactor started";
461
0
    }
462
463
4
    return 0;
464
4
}
465
466
4
void Recycler::stop() {
467
4
    stopped_ = true;
468
4
    notifier_.notify_all();
469
4
    pending_instance_cond_.notify_all();
470
4
    {
471
4
        std::lock_guard lock(mtx_);
472
4
        for (auto& [_, recycler] : recycling_instance_map_) {
473
0
            recycler->stop();
474
0
        }
475
4
    }
476
20
    for (auto& w : workers_) {
477
20
        if (w.joinable()) w.join();
478
20
    }
479
4
    if (checker_) {
480
0
        checker_->stop();
481
0
    }
482
4
    if (snapshot_data_migrator_) {
483
0
        snapshot_data_migrator_->stop();
484
0
    }
485
4
    if (snapshot_chain_compactor_) {
486
0
        snapshot_chain_compactor_->stop();
487
0
    }
488
4
}
489
490
class InstanceRecycler::InvertedIndexIdCache {
491
public:
492
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
493
132
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
494
495
    // Return 0 if success, 1 if schema kv not found, negative for error
496
    // For the same index_id, schema_version, res, since `get` is not completely atomic
497
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
498
    // resulting in repeated addition and inaccuracy.
499
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
500
    // repeated addition does not affect correctness.
501
28.4k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
502
28.4k
        {
503
28.4k
            std::lock_guard lock(mtx_);
504
28.4k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
505
3.83k
                return 0;
506
3.83k
            }
507
24.5k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
508
24.5k
                it != inverted_index_id_map_.end()) {
509
17.4k
                res = it->second;
510
17.4k
                return 0;
511
17.4k
            }
512
24.5k
        }
513
        // Get schema from kv
514
        // TODO(plat1ko): Single flight
515
7.17k
        std::unique_ptr<Transaction> txn;
516
7.17k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
517
7.17k
        if (err != TxnErrorCode::TXN_OK) {
518
0
            LOG(WARNING) << "failed to create txn, err=" << err;
519
0
            return -1;
520
0
        }
521
7.17k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
522
7.17k
        ValueBuf val_buf;
523
7.17k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
524
7.17k
        if (err != TxnErrorCode::TXN_OK) {
525
500
            LOG(WARNING) << "failed to get schema, err=" << err;
526
500
            return static_cast<int>(err);
527
500
        }
528
6.67k
        doris::TabletSchemaCloudPB schema;
529
6.67k
        if (!parse_schema_value(val_buf, &schema)) {
530
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
531
0
            return -1;
532
0
        }
533
6.67k
        if (schema.index_size() > 0) {
534
4.80k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
535
4.80k
            if (schema.has_inverted_index_storage_format()) {
536
4.79k
                index_format = schema.inverted_index_storage_format();
537
4.79k
            }
538
4.80k
            res.first = index_format;
539
4.80k
            res.second.reserve(schema.index_size());
540
12.3k
            for (auto& i : schema.index()) {
541
12.3k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
542
12.3k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
543
12.3k
                }
544
12.3k
            }
545
4.80k
        }
546
6.67k
        insert(index_id, schema_version, res);
547
6.67k
        return 0;
548
6.67k
    }
549
550
    // Empty `ids` means this schema has no inverted index
551
6.67k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
552
6.67k
        if (index_info.second.empty()) {
553
1.87k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
554
1.87k
            std::lock_guard lock(mtx_);
555
1.87k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
556
4.80k
        } else {
557
4.80k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
558
4.80k
            std::lock_guard lock(mtx_);
559
4.80k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
560
4.80k
        }
561
6.67k
    }
562
563
private:
564
    std::string instance_id_;
565
    std::shared_ptr<TxnKv> txn_kv_;
566
567
    std::mutex mtx_;
568
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
569
    struct HashOfKey {
570
59.6k
        size_t operator()(const Key& key) const {
571
59.6k
            size_t seed = 0;
572
59.6k
            seed = std::hash<int64_t> {}(key.first);
573
59.6k
            seed = std::hash<int32_t> {}(key.second);
574
59.6k
            return seed;
575
59.6k
        }
576
    };
577
    // <index_id, schema_version> -> inverted_index_ids
578
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
579
    // Store <index_id, schema_version> of schema which doesn't have inverted index
580
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
581
};
582
583
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
584
                                   RecyclerThreadPoolGroup thread_pool_group,
585
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
586
        : txn_kv_(std::move(txn_kv)),
587
          instance_id_(instance.instance_id()),
588
          instance_info_(instance),
589
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
590
          _thread_pool_group(std::move(thread_pool_group)),
591
          txn_lazy_committer_(std::move(txn_lazy_committer)),
592
          delete_bitmap_lock_white_list_(std::make_shared<DeleteBitmapLockWhiteList>()),
593
132
          resource_mgr_(std::make_shared<ResourceManager>(txn_kv_)) {
594
132
    delete_bitmap_lock_white_list_->init();
595
132
    resource_mgr_->init();
596
597
132
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
598
599
    // Since the recycler's resource manager could not be notified when instance info changes,
600
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
601
132
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
602
132
};
603
604
132
InstanceRecycler::~InstanceRecycler() = default;
605
606
116
int InstanceRecycler::init_obj_store_accessors() {
607
116
    for (const auto& obj_info : instance_info_.obj_info()) {
608
76
#ifdef UNIT_TEST
609
76
        auto accessor = std::make_shared<MockAccessor>();
610
#else
611
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
612
        if (!s3_conf) {
613
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
614
            return -1;
615
        }
616
617
        std::shared_ptr<S3Accessor> accessor;
618
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
619
        if (ret != 0) {
620
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
621
                         << " resource_id=" << obj_info.id();
622
            return ret;
623
        }
624
#endif
625
76
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
626
76
    }
627
628
116
    return 0;
629
116
}
630
631
116
int InstanceRecycler::init_storage_vault_accessors() {
632
116
    if (instance_info_.resource_ids().empty()) {
633
109
        return 0;
634
109
    }
635
636
7
    FullRangeGetOptions opts(txn_kv_);
637
7
    opts.prefetch = true;
638
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
639
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
640
641
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
642
18
        auto [k, v] = *kv;
643
18
        StorageVaultPB vault;
644
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
645
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
646
0
            return -1;
647
0
        }
648
18
        std::string recycler_storage_vault_white_list = accumulate(
649
18
                config::recycler_storage_vault_white_list.begin(),
650
18
                config::recycler_storage_vault_white_list.end(), std::string(),
651
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
651
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
652
18
        LOG_INFO("config::recycler_storage_vault_white_list")
653
18
                .tag("", recycler_storage_vault_white_list);
654
18
        if (!config::recycler_storage_vault_white_list.empty()) {
655
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
656
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
657
8
                it == config::recycler_storage_vault_white_list.end()) {
658
2
                LOG_WARNING(
659
2
                        "failed to init accessor for vault because this vault is not in "
660
2
                        "config::recycler_storage_vault_white_list. ")
661
2
                        .tag(" vault name:", vault.name())
662
2
                        .tag(" config::recycler_storage_vault_white_list:",
663
2
                             recycler_storage_vault_white_list);
664
2
                continue;
665
2
            }
666
8
        }
667
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
668
16
                                 &accessor_map_, &vault);
669
16
        if (vault.has_hdfs_info()) {
670
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
671
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
672
9
            int ret = accessor->init();
673
9
            if (ret != 0) {
674
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
675
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
676
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
677
4
                continue;
678
4
            }
679
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
680
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
681
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
682
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
683
#else
684
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
685
                       << "but HDFS storage vaults were detected";
686
#endif
687
7
        } else if (vault.has_obj_info()) {
688
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
689
7
            if (!s3_conf) {
690
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
691
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
692
1
                continue;
693
1
            }
694
695
6
            std::shared_ptr<S3Accessor> accessor;
696
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
697
6
            if (ret != 0) {
698
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
699
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
700
0
                             << " ret=" << ret
701
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
702
0
                continue;
703
0
            }
704
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
705
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
706
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
707
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
708
6
        }
709
16
    }
710
711
7
    if (!it->is_valid()) {
712
0
        LOG_WARNING("failed to get storage vault kv");
713
0
        return -1;
714
0
    }
715
716
7
    if (accessor_map_.empty()) {
717
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
718
1
        return -2;
719
1
    }
720
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
721
6
             instance_id_);
722
723
6
    return 0;
724
7
}
725
726
116
int InstanceRecycler::init() {
727
116
    int ret = init_obj_store_accessors();
728
116
    if (ret != 0) {
729
0
        return ret;
730
0
    }
731
732
116
    return init_storage_vault_accessors();
733
116
}
734
735
template <typename... Func>
736
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
120
    return [funcs...]() {
738
120
        return [](std::initializer_list<int> ret_vals) {
739
120
            int i = 0;
740
140
            for (int ret : ret_vals) {
741
140
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
140
            }
745
120
            return i;
746
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
20
            for (int ret : ret_vals) {
741
20
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
20
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
20
            for (int ret : ret_vals) {
741
20
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
20
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
0
                    i = ret;
743
0
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
736
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
737
10
    return [funcs...]() {
738
10
        return [](std::initializer_list<int> ret_vals) {
739
10
            int i = 0;
740
10
            for (int ret : ret_vals) {
741
10
                if (ret != 0) {
742
10
                    i = ret;
743
10
                }
744
10
            }
745
10
            return i;
746
10
        }({funcs()...});
747
10
    };
748
10
}
749
750
10
int InstanceRecycler::do_recycle() {
751
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
752
10
    tablet_metrics_context_.reset();
753
10
    segment_metrics_context_.reset();
754
10
    DORIS_CLOUD_DEFER {
755
10
        tablet_metrics_context_.finish_report();
756
10
        segment_metrics_context_.finish_report();
757
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
754
10
    DORIS_CLOUD_DEFER {
755
10
        tablet_metrics_context_.finish_report();
756
10
        segment_metrics_context_.finish_report();
757
10
    };
758
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
759
0
        int res = recycle_cluster_snapshots();
760
0
        if (res != 0) {
761
0
            return -1;
762
0
        }
763
0
        return recycle_deleted_instance();
764
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
765
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
766
10
                                        fmt::format("instance id {}", instance_id_),
767
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
767
120
                                        [](int r) { return r != 0; });
768
10
        sync_executor
769
10
                .add(task_wrapper(
770
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
770
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
771
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
771
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
772
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
773
                                   // becase they may both recycle the same set of tablets
774
                        // recycle dropped table or idexes(mv, rollup)
775
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
775
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
776
                        // recycle dropped partitions
777
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
777
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
778
10
                .add(task_wrapper(
779
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
779
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
780
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
780
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
781
10
                .add(task_wrapper(
782
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
782
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
783
10
                .add(task_wrapper(
784
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
784
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
785
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
785
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
786
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
786
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
787
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
787
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
788
10
                .add(task_wrapper(
789
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
789
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
790
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
790
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
791
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
791
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
792
10
        bool finished = true;
793
10
        std::vector<int> rets = sync_executor.when_all(&finished);
794
120
        for (int ret : rets) {
795
120
            if (ret != 0) {
796
0
                return ret;
797
0
            }
798
120
        }
799
10
        return finished ? 0 : -1;
800
10
    } else {
801
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
802
0
                     << " instance_id=" << instance_id_;
803
0
        return -1;
804
0
    }
805
10
}
806
807
/**
808
* 1. delete all remote data
809
* 2. delete all kv
810
* 3. remove instance kv
811
*/
812
5
int InstanceRecycler::recycle_deleted_instance() {
813
5
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
814
815
5
    int ret = 0;
816
5
    auto start_time = steady_clock::now();
817
818
5
    DORIS_CLOUD_DEFER {
819
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
820
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
821
5
                     << " recycle deleted instance, cost=" << cost
822
5
                     << "s, instance_id=" << instance_id_;
823
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
818
5
    DORIS_CLOUD_DEFER {
819
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
820
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
821
5
                     << " recycle deleted instance, cost=" << cost
822
5
                     << "s, instance_id=" << instance_id_;
823
5
    };
824
825
    // Step 1: Recycle tmp rowsets (contains ref count but txn is not committed)
826
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
827
5
        int res = recycle_tmp_rowsets();
828
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
829
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
830
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
831
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
832
            // and cannot be recycled.
833
5
            res = recycle_tmp_rowsets();
834
5
        }
835
5
        return res;
836
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
Line
Count
Source
826
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
827
5
        int res = recycle_tmp_rowsets();
828
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
829
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
830
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
831
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
832
            // and cannot be recycled.
833
5
            res = recycle_tmp_rowsets();
834
5
        }
835
5
        return res;
836
5
    };
837
5
    if (recycle_tmp_rowsets_with_mark_delete_enabled() != 0) {
838
0
        LOG_WARNING("failed to recycle tmp rowsets").tag("instance_id", instance_id_);
839
0
        ret = -1;
840
0
        return -1;
841
0
    }
842
843
    // Step 2: Recycle versioned rowsets in recycle space (already marked for deletion)
844
5
    if (recycle_versioned_rowsets() != 0) {
845
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
846
0
        ret = -1;
847
0
        return -1;
848
0
    }
849
850
    // Step 3: Recycle operation logs (can recycle logs not referenced by snapshots)
851
5
    if (recycle_operation_logs() != 0) {
852
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
853
0
        ret = -1;
854
0
        return -1;
855
0
    }
856
857
    // Step 4: Check if there are still cluster snapshots
858
5
    bool has_snapshots = false;
859
5
    if (has_cluster_snapshots(&has_snapshots) != 0) {
860
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
861
0
        ret = -1;
862
0
        return -1;
863
5
    } else if (has_snapshots) {
864
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
865
1
        return 0;
866
1
    }
867
868
4
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
869
4
                            instance_info().snapshot_switch_status() !=
870
1
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
871
4
    if (snapshot_enabled) {
872
1
        bool has_unrecycled_rowsets = false;
873
1
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
874
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
875
0
            ret = -1;
876
0
            return -1;
877
1
        } else if (has_unrecycled_rowsets) {
878
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
879
0
                    .tag("instance_id", instance_id_);
880
0
            return ret;
881
0
        }
882
3
    } else { // delete all remote data if snapshot is disabled
883
3
        for (auto& [_, accessor] : accessor_map_) {
884
3
            if (stopped()) {
885
0
                return ret;
886
0
            }
887
888
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
889
3
            int del_ret = accessor->delete_all();
890
3
            if (del_ret == 0) {
891
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
892
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
893
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
894
                // so the recycling has been successful.
895
0
                ret = -1;
896
0
            }
897
3
        }
898
899
3
        if (ret != 0) {
900
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
901
0
            return ret;
902
0
        }
903
3
    }
904
905
    // Check successor instance, if exists, skip deleting kv because successor instance may still need the data in kv
906
4
    if (instance_info_.has_successor_instance_id() &&
907
4
        !instance_info_.successor_instance_id().empty()) {
908
0
        std::string key = instance_key(instance_info_.successor_instance_id());
909
0
        std::unique_ptr<Transaction> txn;
910
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
911
0
        if (err != TxnErrorCode::TXN_OK) {
912
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_
913
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
914
0
                         << " err=" << err;
915
0
            ret = -1;
916
0
            return -1;
917
0
        }
918
919
0
        std::string value;
920
0
        err = txn->get(key, &value);
921
0
        if (err == TxnErrorCode::TXN_OK) {
922
0
            LOG(INFO) << "instance successor instance is still exist, skip deleting kv,"
923
0
                      << " instance_id=" << instance_id_
924
0
                      << " successor_instance_id=" << instance_info_.successor_instance_id();
925
0
            return 0;
926
0
        } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
927
0
            LOG(WARNING) << "failed to get successor instance, instance_id=" << instance_id_
928
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
929
0
                         << " err=" << err;
930
0
            ret = -1;
931
0
            return -1;
932
0
        }
933
0
    }
934
935
    // delete all kv
936
4
    std::unique_ptr<Transaction> txn;
937
4
    TxnErrorCode err = txn_kv_->create_txn(&txn);
938
4
    if (err != TxnErrorCode::TXN_OK) {
939
0
        LOG(WARNING) << "failed to create txn";
940
0
        ret = -1;
941
0
        return -1;
942
0
    }
943
4
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
944
    // delete kv before deleting objects to prevent the checker from misjudging data loss
945
4
    std::string start_txn_key = txn_key_prefix(instance_id_);
946
4
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
947
4
    txn->remove(start_txn_key, end_txn_key);
948
4
    std::string start_version_key = version_key_prefix(instance_id_);
949
4
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
950
4
    txn->remove(start_version_key, end_version_key);
951
4
    std::string start_meta_key = meta_key_prefix(instance_id_);
952
4
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
953
4
    txn->remove(start_meta_key, end_meta_key);
954
4
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
955
4
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
956
4
    txn->remove(start_recycle_key, end_recycle_key);
957
4
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
958
4
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
959
4
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
960
4
    std::string start_copy_key = copy_key_prefix(instance_id_);
961
4
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
962
4
    txn->remove(start_copy_key, end_copy_key);
963
    // should not remove job key range, because we need to reserve job recycle kv
964
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
965
4
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
966
4
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
967
4
    txn->remove(start_job_tablet_key, end_job_tablet_key);
968
4
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
969
4
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
970
4
    std::string start_vault_key = storage_vault_key(key_info0);
971
4
    std::string end_vault_key = storage_vault_key(key_info1);
972
4
    txn->remove(start_vault_key, end_vault_key);
973
4
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
974
4
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
975
4
    txn->remove(versioned_version_key_start, versioned_version_key_end);
976
4
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
977
4
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
978
4
    txn->remove(versioned_index_key_start, versioned_index_key_end);
979
4
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
980
4
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
981
4
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
982
4
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
983
4
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
984
4
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
985
4
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
986
4
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
987
4
    txn->remove(versioned_data_key_start, versioned_data_key_end);
988
4
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
989
4
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
990
4
    txn->remove(versioned_log_key_start, versioned_log_key_end);
991
4
    err = txn->commit();
992
4
    if (err != TxnErrorCode::TXN_OK) {
993
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
994
0
        ret = -1;
995
0
    }
996
997
4
    if (ret == 0) {
998
        // remove instance kv
999
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
1000
4
        err = txn_kv_->create_txn(&txn);
1001
4
        if (err != TxnErrorCode::TXN_OK) {
1002
0
            LOG(WARNING) << "failed to create txn";
1003
0
            ret = -1;
1004
0
            return ret;
1005
0
        }
1006
4
        std::string key;
1007
4
        instance_key({instance_id_}, &key);
1008
4
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
1009
4
        txn->remove(key);
1010
4
        err = txn->commit();
1011
4
        if (err != TxnErrorCode::TXN_OK) {
1012
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
1013
0
                         << " err=" << err;
1014
0
            ret = -1;
1015
0
        }
1016
4
    }
1017
4
    return ret;
1018
4
}
1019
1020
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
1021
9
                                          bool* exists, PackedFileRecycleStats* stats) {
1022
9
    if (exists == nullptr) {
1023
0
        return -1;
1024
0
    }
1025
9
    *exists = false;
1026
1027
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
1028
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
1029
9
    std::string scan_begin = begin;
1030
1031
9
    while (true) {
1032
9
        std::unique_ptr<RangeGetIterator> it_range;
1033
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
1034
9
        if (get_ret < 0) {
1035
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
1036
0
                    .tag("instance_id", instance_id_)
1037
0
                    .tag("tablet_id", tablet_id)
1038
0
                    .tag("ret", get_ret);
1039
0
            return -1;
1040
0
        }
1041
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
1042
6
            return 0;
1043
6
        }
1044
1045
3
        std::string last_key;
1046
3
        while (it_range->has_next()) {
1047
3
            auto [k, v] = it_range->next();
1048
3
            last_key.assign(k.data(), k.size());
1049
3
            doris::RowsetMetaCloudPB rowset_meta;
1050
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
1051
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
1052
0
                        .tag("instance_id", instance_id_)
1053
0
                        .tag("tablet_id", tablet_id)
1054
0
                        .tag("key", hex(k));
1055
0
                continue;
1056
0
            }
1057
3
            if (stats) {
1058
3
                ++stats->rowset_scan_count;
1059
3
            }
1060
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
1061
3
                *exists = true;
1062
3
                return 0;
1063
3
            }
1064
3
        }
1065
1066
0
        if (!it_range->more()) {
1067
0
            return 0;
1068
0
        }
1069
1070
        // Continue scanning from the next key to keep each transaction short.
1071
0
        scan_begin = std::move(last_key);
1072
0
        scan_begin.push_back('\x00');
1073
0
    }
1074
9
}
1075
1076
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
1077
                                                          const std::string& rowset_id,
1078
                                                          int64_t txn_id, bool* recycle_exists,
1079
11
                                                          bool* tmp_exists) {
1080
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
1081
0
        return -1;
1082
0
    }
1083
11
    *recycle_exists = false;
1084
11
    *tmp_exists = false;
1085
1086
11
    if (txn_id <= 0) {
1087
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
1088
0
                .tag("instance_id", instance_id_)
1089
0
                .tag("tablet_id", tablet_id)
1090
0
                .tag("rowset_id", rowset_id)
1091
0
                .tag("txn_id", txn_id);
1092
0
        return -1;
1093
0
    }
1094
1095
11
    std::unique_ptr<Transaction> txn;
1096
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1097
11
    if (err != TxnErrorCode::TXN_OK) {
1098
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
1099
0
                .tag("instance_id", instance_id_)
1100
0
                .tag("tablet_id", tablet_id)
1101
0
                .tag("rowset_id", rowset_id)
1102
0
                .tag("txn_id", txn_id)
1103
0
                .tag("err", err);
1104
0
        return -1;
1105
0
    }
1106
1107
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1108
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1109
11
    if (ret == TxnErrorCode::TXN_OK) {
1110
1
        *recycle_exists = true;
1111
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1112
0
        LOG_WARNING("failed to check recycle rowset existence")
1113
0
                .tag("instance_id", instance_id_)
1114
0
                .tag("tablet_id", tablet_id)
1115
0
                .tag("rowset_id", rowset_id)
1116
0
                .tag("key", hex(recycle_key))
1117
0
                .tag("err", ret);
1118
0
        return -1;
1119
0
    }
1120
1121
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1122
11
    ret = key_exists(txn.get(), tmp_key, true);
1123
11
    if (ret == TxnErrorCode::TXN_OK) {
1124
1
        *tmp_exists = true;
1125
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1126
0
        LOG_WARNING("failed to check tmp rowset existence")
1127
0
                .tag("instance_id", instance_id_)
1128
0
                .tag("tablet_id", tablet_id)
1129
0
                .tag("txn_id", txn_id)
1130
0
                .tag("key", hex(tmp_key))
1131
0
                .tag("err", ret);
1132
0
        return -1;
1133
0
    }
1134
1135
11
    return 0;
1136
11
}
1137
1138
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1139
8
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1140
8
    if (!hint.empty()) {
1141
8
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1142
8
            return {hint, it->second};
1143
8
        }
1144
8
    }
1145
1146
0
    return {"", nullptr};
1147
8
}
1148
1149
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1150
                                               const std::string& packed_file_path,
1151
3
                                               PackedFileRecycleStats* stats) {
1152
3
    bool local_changed = false;
1153
3
    int64_t left_num = 0;
1154
3
    int64_t left_bytes = 0;
1155
3
    bool all_small_files_confirmed = true;
1156
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1157
1158
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1159
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1160
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1161
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1162
14
        LOG_INFO("packed slice correction status")
1163
14
                .tag("instance_id", instance_id_)
1164
14
                .tag("packed_file_path", packed_file_path)
1165
14
                .tag("small_file_path", file.path())
1166
14
                .tag("tablet_id", tablet_id)
1167
14
                .tag("rowset_id", rowset_id)
1168
14
                .tag("txn_id", txn_id)
1169
14
                .tag("size", file.size())
1170
14
                .tag("deleted", file.deleted())
1171
14
                .tag("corrected", file.corrected())
1172
14
                .tag("confirmed_this_round", confirmed_this_round);
1173
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1158
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1159
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1160
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1161
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1162
14
        LOG_INFO("packed slice correction status")
1163
14
                .tag("instance_id", instance_id_)
1164
14
                .tag("packed_file_path", packed_file_path)
1165
14
                .tag("small_file_path", file.path())
1166
14
                .tag("tablet_id", tablet_id)
1167
14
                .tag("rowset_id", rowset_id)
1168
14
                .tag("txn_id", txn_id)
1169
14
                .tag("size", file.size())
1170
14
                .tag("deleted", file.deleted())
1171
14
                .tag("corrected", file.corrected())
1172
14
                .tag("confirmed_this_round", confirmed_this_round);
1173
14
    };
1174
1175
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1176
14
        auto* small_file = packed_info->mutable_slices(i);
1177
14
        if (small_file->deleted()) {
1178
3
            log_small_file_status(*small_file, small_file->corrected());
1179
3
            continue;
1180
3
        }
1181
1182
11
        if (small_file->corrected()) {
1183
0
            left_num++;
1184
0
            left_bytes += small_file->size();
1185
0
            log_small_file_status(*small_file, true);
1186
0
            continue;
1187
0
        }
1188
1189
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1190
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1191
0
                    .tag("instance_id", instance_id_)
1192
0
                    .tag("small_file_path", small_file->path())
1193
0
                    .tag("index", i);
1194
0
            return -1;
1195
0
        }
1196
1197
11
        int64_t tablet_id = small_file->tablet_id();
1198
11
        const std::string& rowset_id = small_file->rowset_id();
1199
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1200
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1201
0
                    .tag("instance_id", instance_id_)
1202
0
                    .tag("small_file_path", small_file->path())
1203
0
                    .tag("index", i)
1204
0
                    .tag("tablet_id", tablet_id)
1205
0
                    .tag("rowset_id", rowset_id)
1206
0
                    .tag("has_txn_id", small_file->has_txn_id())
1207
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1208
0
            return -1;
1209
0
        }
1210
11
        int64_t txn_id = small_file->txn_id();
1211
11
        bool recycle_exists = false;
1212
11
        bool tmp_exists = false;
1213
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1214
11
                                                &tmp_exists) != 0) {
1215
0
            return -1;
1216
0
        }
1217
1218
11
        bool small_file_confirmed = false;
1219
11
        if (tmp_exists) {
1220
1
            left_num++;
1221
1
            left_bytes += small_file->size();
1222
1
            small_file_confirmed = true;
1223
10
        } else if (recycle_exists) {
1224
1
            left_num++;
1225
1
            left_bytes += small_file->size();
1226
            // keep small_file_confirmed=false so the packed file remains uncorrected
1227
9
        } else {
1228
9
            bool rowset_exists = false;
1229
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1230
0
                return -1;
1231
0
            }
1232
1233
9
            if (!rowset_exists) {
1234
6
                if (!small_file->deleted()) {
1235
6
                    small_file->set_deleted(true);
1236
6
                    local_changed = true;
1237
6
                }
1238
6
                if (!small_file->corrected()) {
1239
6
                    small_file->set_corrected(true);
1240
6
                    local_changed = true;
1241
6
                }
1242
6
                small_file_confirmed = true;
1243
6
            } else {
1244
3
                left_num++;
1245
3
                left_bytes += small_file->size();
1246
3
                small_file_confirmed = true;
1247
3
            }
1248
9
        }
1249
1250
11
        if (!small_file_confirmed) {
1251
1
            all_small_files_confirmed = false;
1252
1
        }
1253
1254
11
        if (small_file->corrected() != small_file_confirmed) {
1255
4
            small_file->set_corrected(small_file_confirmed);
1256
4
            local_changed = true;
1257
4
        }
1258
1259
11
        log_small_file_status(*small_file, small_file_confirmed);
1260
11
    }
1261
1262
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1263
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1264
3
        local_changed = true;
1265
3
    }
1266
3
    if (packed_info->ref_cnt() != left_num) {
1267
3
        auto old_ref_cnt = packed_info->ref_cnt();
1268
3
        packed_info->set_ref_cnt(left_num);
1269
3
        LOG_INFO("corrected packed file ref count")
1270
3
                .tag("instance_id", instance_id_)
1271
3
                .tag("resource_id", packed_info->resource_id())
1272
3
                .tag("packed_file_path", packed_file_path)
1273
3
                .tag("old_ref_cnt", old_ref_cnt)
1274
3
                .tag("new_ref_cnt", left_num);
1275
3
        local_changed = true;
1276
3
    }
1277
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1278
2
        packed_info->set_corrected(all_small_files_confirmed);
1279
2
        local_changed = true;
1280
2
    }
1281
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1282
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1283
1
        local_changed = true;
1284
1
    }
1285
1286
3
    if (changed != nullptr) {
1287
3
        *changed = local_changed;
1288
3
    }
1289
3
    return 0;
1290
3
}
1291
1292
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1293
                                                 const std::string& packed_file_path,
1294
4
                                                 PackedFileRecycleStats* stats) {
1295
4
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
1296
4
    bool correction_ok = false;
1297
4
    cloud::PackedFileInfoPB packed_info;
1298
1299
4
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
1300
4
        if (stopped()) {
1301
0
            LOG_WARNING("recycler stopped before processing packed file")
1302
0
                    .tag("instance_id", instance_id_)
1303
0
                    .tag("packed_file_path", packed_file_path)
1304
0
                    .tag("attempt", attempt);
1305
0
            return -1;
1306
0
        }
1307
1308
4
        std::unique_ptr<Transaction> txn;
1309
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1310
4
        if (err != TxnErrorCode::TXN_OK) {
1311
0
            LOG_WARNING("failed to create txn when processing packed file")
1312
0
                    .tag("instance_id", instance_id_)
1313
0
                    .tag("packed_file_path", packed_file_path)
1314
0
                    .tag("attempt", attempt)
1315
0
                    .tag("err", err);
1316
0
            return -1;
1317
0
        }
1318
1319
4
        std::string packed_val;
1320
4
        err = txn->get(packed_key, &packed_val);
1321
4
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1322
0
            return 0;
1323
0
        }
1324
4
        if (err != TxnErrorCode::TXN_OK) {
1325
0
            LOG_WARNING("failed to get packed file kv")
1326
0
                    .tag("instance_id", instance_id_)
1327
0
                    .tag("packed_file_path", packed_file_path)
1328
0
                    .tag("attempt", attempt)
1329
0
                    .tag("err", err);
1330
0
            return -1;
1331
0
        }
1332
1333
4
        if (!packed_info.ParseFromString(packed_val)) {
1334
0
            LOG_WARNING("failed to parse packed file info")
1335
0
                    .tag("instance_id", instance_id_)
1336
0
                    .tag("packed_file_path", packed_file_path)
1337
0
                    .tag("attempt", attempt);
1338
0
            return -1;
1339
0
        }
1340
1341
4
        int64_t now_sec = ::time(nullptr);
1342
4
        bool corrected = packed_info.corrected();
1343
4
        bool due = config::force_immediate_recycle ||
1344
4
                   now_sec - packed_info.created_at_sec() >=
1345
4
                           config::packed_file_correction_delay_seconds;
1346
1347
4
        if (!corrected && due) {
1348
3
            bool changed = false;
1349
3
            if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1350
0
                LOG_WARNING("correct_packed_file_info failed")
1351
0
                        .tag("instance_id", instance_id_)
1352
0
                        .tag("packed_file_path", packed_file_path)
1353
0
                        .tag("attempt", attempt);
1354
0
                return -1;
1355
0
            }
1356
3
            if (changed) {
1357
3
                std::string updated;
1358
3
                if (!packed_info.SerializeToString(&updated)) {
1359
0
                    LOG_WARNING("failed to serialize packed file info after correction")
1360
0
                            .tag("instance_id", instance_id_)
1361
0
                            .tag("packed_file_path", packed_file_path)
1362
0
                            .tag("attempt", attempt);
1363
0
                    return -1;
1364
0
                }
1365
3
                txn->put(packed_key, updated);
1366
3
                err = txn->commit();
1367
3
                if (err == TxnErrorCode::TXN_OK) {
1368
3
                    if (stats) {
1369
3
                        ++stats->num_corrected;
1370
3
                    }
1371
3
                } else {
1372
0
                    if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) {
1373
0
                        LOG_WARNING(
1374
0
                                "failed to commit correction for packed file due to conflict, "
1375
0
                                "retrying")
1376
0
                                .tag("instance_id", instance_id_)
1377
0
                                .tag("packed_file_path", packed_file_path)
1378
0
                                .tag("attempt", attempt);
1379
0
                        sleep_for_packed_file_retry();
1380
0
                        packed_info.Clear();
1381
0
                        continue;
1382
0
                    }
1383
0
                    LOG_WARNING("failed to commit correction for packed file")
1384
0
                            .tag("instance_id", instance_id_)
1385
0
                            .tag("packed_file_path", packed_file_path)
1386
0
                            .tag("attempt", attempt)
1387
0
                            .tag("err", err);
1388
0
                    return -1;
1389
0
                }
1390
3
            }
1391
3
        }
1392
1393
4
        correction_ok = true;
1394
4
        break;
1395
4
    }
1396
1397
4
    if (!correction_ok) {
1398
0
        return -1;
1399
0
    }
1400
1401
4
    if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1402
4
          packed_info.ref_cnt() == 0)) {
1403
3
        return 0;
1404
3
    }
1405
1406
1
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1407
0
        LOG_WARNING("packed file missing resource id when recycling")
1408
0
                .tag("instance_id", instance_id_)
1409
0
                .tag("packed_file_path", packed_file_path);
1410
0
        return -1;
1411
0
    }
1412
1
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1413
1
    if (!accessor) {
1414
0
        LOG_WARNING("no accessor available to delete packed file")
1415
0
                .tag("instance_id", instance_id_)
1416
0
                .tag("packed_file_path", packed_file_path)
1417
0
                .tag("resource_id", packed_info.resource_id());
1418
0
        return -1;
1419
0
    }
1420
1
    int del_ret = accessor->delete_file(packed_file_path);
1421
1
    if (del_ret != 0 && del_ret != 1) {
1422
0
        LOG_WARNING("failed to delete packed file")
1423
0
                .tag("instance_id", instance_id_)
1424
0
                .tag("packed_file_path", packed_file_path)
1425
0
                .tag("resource_id", resource_id)
1426
0
                .tag("ret", del_ret);
1427
0
        return -1;
1428
0
    }
1429
1
    if (del_ret == 1) {
1430
0
        LOG_INFO("packed file already removed")
1431
0
                .tag("instance_id", instance_id_)
1432
0
                .tag("packed_file_path", packed_file_path)
1433
0
                .tag("resource_id", resource_id);
1434
1
    } else {
1435
1
        LOG_INFO("deleted packed file")
1436
1
                .tag("instance_id", instance_id_)
1437
1
                .tag("packed_file_path", packed_file_path)
1438
1
                .tag("resource_id", resource_id);
1439
1
    }
1440
1441
1
    for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) {
1442
1
        std::unique_ptr<Transaction> del_txn;
1443
1
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
1444
1
        if (err != TxnErrorCode::TXN_OK) {
1445
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1446
0
                    .tag("instance_id", instance_id_)
1447
0
                    .tag("packed_file_path", packed_file_path)
1448
0
                    .tag("del_attempt", del_attempt)
1449
0
                    .tag("err", err);
1450
0
            return -1;
1451
0
        }
1452
1453
1
        std::string latest_val;
1454
1
        err = del_txn->get(packed_key, &latest_val);
1455
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1456
0
            return 0;
1457
0
        }
1458
1
        if (err != TxnErrorCode::TXN_OK) {
1459
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1460
0
                    .tag("instance_id", instance_id_)
1461
0
                    .tag("packed_file_path", packed_file_path)
1462
0
                    .tag("del_attempt", del_attempt)
1463
0
                    .tag("err", err);
1464
0
            return -1;
1465
0
        }
1466
1467
1
        cloud::PackedFileInfoPB latest_info;
1468
1
        if (!latest_info.ParseFromString(latest_val)) {
1469
0
            LOG_WARNING("failed to parse packed file info before removal")
1470
0
                    .tag("instance_id", instance_id_)
1471
0
                    .tag("packed_file_path", packed_file_path)
1472
0
                    .tag("del_attempt", del_attempt);
1473
0
            return -1;
1474
0
        }
1475
1476
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1477
1
              latest_info.ref_cnt() == 0)) {
1478
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1479
0
                    .tag("instance_id", instance_id_)
1480
0
                    .tag("packed_file_path", packed_file_path)
1481
0
                    .tag("del_attempt", del_attempt);
1482
0
            return 0;
1483
0
        }
1484
1485
1
        del_txn->remove(packed_key);
1486
1
        err = del_txn->commit();
1487
1
        if (err == TxnErrorCode::TXN_OK) {
1488
1
            if (stats) {
1489
1
                ++stats->num_deleted;
1490
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1491
1
                                        static_cast<int64_t>(latest_val.size());
1492
1
                if (del_ret == 0 || del_ret == 1) {
1493
1
                    ++stats->num_object_deleted;
1494
1
                    int64_t object_size = latest_info.total_slice_bytes();
1495
1
                    if (object_size <= 0) {
1496
0
                        object_size = packed_info.total_slice_bytes();
1497
0
                    }
1498
1
                    stats->bytes_object_deleted += object_size;
1499
1
                }
1500
1
            }
1501
1
            LOG_INFO("removed packed file metadata")
1502
1
                    .tag("instance_id", instance_id_)
1503
1
                    .tag("packed_file_path", packed_file_path);
1504
1
            return 0;
1505
1
        }
1506
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1507
0
            if (del_attempt >= max_retry_times) {
1508
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
1509
0
                        .tag("instance_id", instance_id_)
1510
0
                        .tag("packed_file_path", packed_file_path)
1511
0
                        .tag("del_attempt", del_attempt);
1512
0
                return -1;
1513
0
            }
1514
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
1515
0
                    .tag("instance_id", instance_id_)
1516
0
                    .tag("packed_file_path", packed_file_path)
1517
0
                    .tag("del_attempt", del_attempt);
1518
0
            sleep_for_packed_file_retry();
1519
0
            continue;
1520
0
        }
1521
0
        LOG_WARNING("failed to remove packed file kv")
1522
0
                .tag("instance_id", instance_id_)
1523
0
                .tag("packed_file_path", packed_file_path)
1524
0
                .tag("del_attempt", del_attempt)
1525
0
                .tag("err", err);
1526
0
        return -1;
1527
0
    }
1528
1529
0
    return -1;
1530
1
}
1531
1532
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/,
1533
4
                                            PackedFileRecycleStats* stats, int* ret) {
1534
4
    if (stats) {
1535
4
        ++stats->num_scanned;
1536
4
    }
1537
4
    std::string packed_file_path;
1538
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1539
0
        LOG_WARNING("failed to decode packed file key")
1540
0
                .tag("instance_id", instance_id_)
1541
0
                .tag("key", hex(key));
1542
0
        if (stats) {
1543
0
            ++stats->num_failed;
1544
0
        }
1545
0
        if (ret) {
1546
0
            *ret = -1;
1547
0
        }
1548
0
        return 0;
1549
0
    }
1550
1551
4
    std::string packed_key(key);
1552
4
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1553
4
    if (process_ret != 0) {
1554
0
        if (stats) {
1555
0
            ++stats->num_failed;
1556
0
        }
1557
0
        if (ret) {
1558
0
            *ret = -1;
1559
0
        }
1560
0
    }
1561
4
    return 0;
1562
4
}
1563
1564
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1565
9.77k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1566
9.77k
    if (config::force_immediate_recycle) {
1567
15
        return 0L;
1568
15
    }
1569
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1570
9.75k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1571
9.75k
    int64_t retention_seconds = config::retention_seconds;
1572
9.75k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1573
7.80k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1574
7.80k
    }
1575
9.75k
    int64_t final_expiration = expiration + retention_seconds;
1576
9.75k
    if (*earlest_ts > final_expiration) {
1577
7
        *earlest_ts = final_expiration;
1578
7
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1579
7
    }
1580
9.75k
    return final_expiration;
1581
9.77k
}
1582
1583
int64_t calculate_partition_expired_time(
1584
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1585
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1586
9
    if (config::force_immediate_recycle) {
1587
3
        return 0L;
1588
3
    }
1589
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1590
6
                                                            : partition_meta_pb.creation_time();
1591
6
    int64_t retention_seconds = config::retention_seconds;
1592
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1593
6
        retention_seconds =
1594
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1595
6
    }
1596
6
    int64_t final_expiration = expiration + retention_seconds;
1597
6
    if (*earlest_ts > final_expiration) {
1598
2
        *earlest_ts = final_expiration;
1599
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1600
2
    }
1601
6
    return final_expiration;
1602
9
}
1603
1604
int64_t calculate_index_expired_time(const std::string& instance_id_,
1605
                                     const RecycleIndexPB& index_meta_pb,
1606
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1607
10
    if (config::force_immediate_recycle) {
1608
4
        return 0L;
1609
4
    }
1610
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1611
6
                                                        : index_meta_pb.creation_time();
1612
6
    int64_t retention_seconds = config::retention_seconds;
1613
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1614
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1615
6
    }
1616
6
    int64_t final_expiration = expiration + retention_seconds;
1617
6
    if (*earlest_ts > final_expiration) {
1618
2
        *earlest_ts = final_expiration;
1619
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1620
2
    }
1621
6
    return final_expiration;
1622
10
}
1623
1624
int64_t calculate_tmp_rowset_expired_time(
1625
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1626
106k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1627
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1628
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1629
    //  duration or timeout always < `retention_time` in practice.
1630
106k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1631
106k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1632
106k
                                 : tmp_rowset_meta_pb.creation_time();
1633
106k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1634
106k
    int64_t final_expiration = expiration + config::retention_seconds;
1635
106k
    if (*earlest_ts > final_expiration) {
1636
24
        *earlest_ts = final_expiration;
1637
24
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1638
24
    }
1639
106k
    return final_expiration;
1640
106k
}
1641
1642
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1643
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1644
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1645
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1646
8
        *earlest_ts = final_expiration / 1000;
1647
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1648
8
    }
1649
30.0k
    return final_expiration;
1650
30.0k
}
1651
1652
int64_t calculate_restore_job_expired_time(
1653
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1654
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1655
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1656
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1657
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1658
        // final state, recycle immediately
1659
41
        return 0L;
1660
41
    }
1661
    // not final state, wait much longer than the FE's timeout(1 day)
1662
0
    int64_t last_modified_s =
1663
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1664
0
    int64_t expiration = restore_job.expired_at_s() > 0
1665
0
                                 ? last_modified_s + restore_job.expired_at_s()
1666
0
                                 : last_modified_s;
1667
0
    int64_t final_expiration = expiration + config::retention_seconds;
1668
0
    if (*earlest_ts > final_expiration) {
1669
0
        *earlest_ts = final_expiration;
1670
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1671
0
    }
1672
0
    return final_expiration;
1673
41
}
1674
1675
2
int InstanceRecycler::abort_txn_for_related_rowset(int64_t txn_id) {
1676
2
    AbortTxnRequest req;
1677
2
    TxnInfoPB txn_info;
1678
2
    MetaServiceCode code = MetaServiceCode::OK;
1679
2
    std::string msg;
1680
2
    std::stringstream ss;
1681
2
    std::unique_ptr<Transaction> txn;
1682
2
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1683
2
    if (err != TxnErrorCode::TXN_OK) {
1684
0
        LOG_WARNING("failed to create txn").tag("err", err);
1685
0
        return -1;
1686
0
    }
1687
1688
    // get txn index
1689
2
    TxnIndexPB txn_idx_pb;
1690
2
    auto index_key = txn_index_key({instance_id_, txn_id});
1691
2
    std::string index_val;
1692
2
    err = txn->get(index_key, &index_val);
1693
2
    if (err != TxnErrorCode::TXN_OK) {
1694
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1695
            // maybe recycled
1696
0
            LOG_INFO("txn index not found, txn_id={} instance_id={}", txn_id, instance_id_)
1697
0
                    .tag("key", hex(index_key))
1698
0
                    .tag("txn_id", txn_id);
1699
0
            return 0;
1700
0
        }
1701
0
        LOG_WARNING("failed to get txn index")
1702
0
                .tag("err", err)
1703
0
                .tag("key", hex(index_key))
1704
0
                .tag("txn_id", txn_id);
1705
0
        return -1;
1706
0
    }
1707
2
    if (!txn_idx_pb.ParseFromString(index_val)) {
1708
0
        LOG_WARNING("failed to parse txn index")
1709
0
                .tag("err", err)
1710
0
                .tag("key", hex(index_key))
1711
0
                .tag("txn_id", txn_id);
1712
0
        return -1;
1713
0
    }
1714
1715
2
    auto info_key = txn_info_key({instance_id_, txn_idx_pb.tablet_index().db_id(), txn_id});
1716
2
    std::string info_val;
1717
2
    err = txn->get(info_key, &info_val);
1718
2
    if (err != TxnErrorCode::TXN_OK) {
1719
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1720
            // maybe recycled
1721
0
            LOG_INFO("txn info not found, txn_id={} instance_id={}", txn_id, instance_id_)
1722
0
                    .tag("key", hex(info_key))
1723
0
                    .tag("txn_id", txn_id);
1724
0
            return 0;
1725
0
        }
1726
0
        LOG_WARNING("failed to get txn info")
1727
0
                .tag("err", err)
1728
0
                .tag("key", hex(info_key))
1729
0
                .tag("txn_id", txn_id);
1730
0
        return -1;
1731
0
    }
1732
2
    if (!txn_info.ParseFromString(info_val)) {
1733
0
        LOG_WARNING("failed to parse txn info")
1734
0
                .tag("err", err)
1735
0
                .tag("key", hex(info_key))
1736
0
                .tag("txn_id", txn_id);
1737
0
        return -1;
1738
0
    }
1739
1740
2
    if (txn_info.status() != TxnStatusPB::TXN_STATUS_PREPARED) {
1741
0
        LOG_INFO("txn is not prepared status, txn_id={} status={}", txn_id, txn_info.status())
1742
0
                .tag("key", hex(info_key))
1743
0
                .tag("txn_id", txn_id);
1744
0
        return 0;
1745
0
    }
1746
1747
2
    req.set_txn_id(txn_id);
1748
1749
2
    LOG(INFO) << "begin abort txn for related rowset, txn_id=" << txn_id
1750
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString();
1751
1752
2
    _abort_txn(instance_id_, &req, txn.get(), txn_info, ss, code, msg);
1753
2
    err = txn->commit();
1754
2
    if (err != TxnErrorCode::TXN_OK) {
1755
0
        code = cast_as<ErrCategory::COMMIT>(err);
1756
0
        ss << "failed to commit kv txn, txn_id=" << txn_info.txn_id() << " err=" << err;
1757
0
        msg = ss.str();
1758
0
        return -1;
1759
0
    }
1760
1761
2
    LOG(INFO) << "finish abort txn for related rowset, txn_id=" << txn_id
1762
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString()
1763
2
              << " code=" << code << " msg=" << msg;
1764
1765
2
    return 0;
1766
2
}
1767
1768
4
int InstanceRecycler::abort_job_for_related_rowset(const RowsetMetaCloudPB& rowset_meta) {
1769
4
    FinishTabletJobRequest req;
1770
4
    FinishTabletJobResponse res;
1771
4
    req.set_action(FinishTabletJobRequest::ABORT);
1772
4
    MetaServiceCode code = MetaServiceCode::OK;
1773
4
    std::string msg;
1774
4
    std::stringstream ss;
1775
1776
4
    TabletIndexPB tablet_idx;
1777
4
    int ret = get_tablet_idx(txn_kv_.get(), instance_id_, rowset_meta.tablet_id(), tablet_idx);
1778
4
    if (ret == 1) {
1779
        // tablet maybe recycled, directly return 0
1780
1
        return 0;
1781
3
    } else if (ret != 0) {
1782
0
        LOG(WARNING) << "failed to get tablet index, tablet_id=" << rowset_meta.tablet_id()
1783
0
                     << " instance_id=" << instance_id_ << " ret=" << ret;
1784
0
        return ret;
1785
0
    }
1786
1787
3
    std::unique_ptr<Transaction> txn;
1788
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1789
3
    if (err != TxnErrorCode::TXN_OK) {
1790
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ << " err=" << err;
1791
0
        return -1;
1792
0
    }
1793
1794
3
    std::string job_key =
1795
3
            job_tablet_key({instance_id_, tablet_idx.table_id(), tablet_idx.index_id(),
1796
3
                            tablet_idx.partition_id(), tablet_idx.tablet_id()});
1797
3
    std::string job_val;
1798
3
    err = txn->get(job_key, &job_val);
1799
3
    if (err != TxnErrorCode::TXN_OK) {
1800
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1801
0
            LOG(INFO) << "job not exists, instance_id=" << instance_id_
1802
0
                      << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1803
0
            return 0;
1804
0
        }
1805
0
        LOG(WARNING) << "failed to get job, instance_id=" << instance_id_
1806
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " err=" << err
1807
0
                     << " key=" << hex(job_key);
1808
0
        return -1;
1809
0
    }
1810
1811
3
    TabletJobInfoPB job_pb;
1812
3
    if (!job_pb.ParseFromString(job_val)) {
1813
0
        LOG(WARNING) << "failed to parse job, instance_id=" << instance_id_
1814
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1815
0
        return -1;
1816
0
    }
1817
1818
3
    std::string job_id {};
1819
3
    if (!job_pb.compaction().empty()) {
1820
2
        for (const auto& c : job_pb.compaction()) {
1821
2
            if (c.id() == rowset_meta.job_id()) {
1822
2
                job_id = c.id();
1823
2
                break;
1824
2
            }
1825
2
        }
1826
2
    } else if (job_pb.has_schema_change()) {
1827
1
        job_id = job_pb.schema_change().id();
1828
1
    }
1829
1830
3
    if (!job_id.empty() && rowset_meta.job_id() == job_id) {
1831
3
        LOG(INFO) << "begin to abort job for related rowset, job_id=" << rowset_meta.job_id()
1832
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id();
1833
3
        req.mutable_job()->CopyFrom(job_pb);
1834
3
        req.set_action(FinishTabletJobRequest::ABORT);
1835
3
        _finish_tablet_job(&req, &res, instance_id_, txn, txn_kv_.get(),
1836
3
                           delete_bitmap_lock_white_list_.get(), resource_mgr_.get(), code, msg,
1837
3
                           ss);
1838
3
        if (code != MetaServiceCode::OK) {
1839
0
            LOG(WARNING) << "failed to abort job, instance_id=" << instance_id_
1840
0
                         << " tablet_id=" << tablet_idx.tablet_id() << " code=" << code
1841
0
                         << " msg=" << msg;
1842
0
            return -1;
1843
0
        }
1844
3
        LOG(INFO) << "finish abort job for related rowset, job_id=" << rowset_meta.job_id()
1845
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id()
1846
3
                  << " code=" << code << " msg=" << msg;
1847
3
    } else {
1848
        // clang-format off
1849
0
        LOG(INFO) << "there is no job for related rowset, directly recycle rowset data"
1850
0
                  << ", instance_id=" << instance_id_ 
1851
0
                  << ", tablet_id=" << tablet_idx.tablet_id() 
1852
0
                  << ", job_id=" << job_id
1853
0
                  << ", rowset_id=" << rowset_meta.rowset_id_v2();
1854
        // clang-format on
1855
0
    }
1856
1857
3
    return 0;
1858
3
}
1859
1860
template <typename T>
1861
55.7k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1862
55.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1863
51.9k
        return rowset_meta_pb.mutable_rowset_meta();
1864
51.9k
    } else {
1865
51.9k
        return &rowset_meta_pb;
1866
51.9k
    }
1867
55.7k
}
_ZN5doris5cloud19mutable_rowset_metaINS0_15RecycleRowsetPBEEEPNS_17RowsetMetaCloudPBERT_
Line
Count
Source
1861
3.75k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1862
3.75k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1863
3.75k
        return rowset_meta_pb.mutable_rowset_meta();
1864
3.75k
    } else {
1865
3.75k
        return &rowset_meta_pb;
1866
3.75k
    }
1867
3.75k
}
_ZN5doris5cloud19mutable_rowset_metaINS_17RowsetMetaCloudPBEEEPS2_RT_
Line
Count
Source
1861
51.9k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1862
51.9k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1863
51.9k
        return rowset_meta_pb.mutable_rowset_meta();
1864
51.9k
    } else {
1865
51.9k
        return &rowset_meta_pb;
1866
51.9k
    }
1867
51.9k
}
1868
1869
template <typename T>
1870
223k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1871
223k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1872
212k
        return rowset_meta_pb.rowset_meta();
1873
212k
    } else {
1874
212k
        return rowset_meta_pb;
1875
212k
    }
1876
223k
}
_ZN5doris5cloud11rowset_metaINS0_15RecycleRowsetPBEEERKNS_17RowsetMetaCloudPBERKT_
Line
Count
Source
1870
11.9k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1871
11.9k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1872
11.9k
        return rowset_meta_pb.rowset_meta();
1873
11.9k
    } else {
1874
11.9k
        return rowset_meta_pb;
1875
11.9k
    }
1876
11.9k
}
_ZN5doris5cloud11rowset_metaINS_17RowsetMetaCloudPBEEERKS2_RKT_
Line
Count
Source
1870
212k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1871
212k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1872
212k
        return rowset_meta_pb.rowset_meta();
1873
212k
    } else {
1874
212k
        return rowset_meta_pb;
1875
212k
    }
1876
212k
}
1877
1878
struct DeferredRecycleAbortTask {
1879
    enum class Type : uint8_t {
1880
        TXN,
1881
        JOB,
1882
    };
1883
1884
    Type type = Type::TXN;
1885
    int64_t txn_id = 0;
1886
    int64_t tablet_id = 0;
1887
    int64_t start_version = 0;
1888
    int64_t end_version = 0;
1889
    std::string rowset_id;
1890
    std::string job_id;
1891
};
1892
1893
struct DeferredRecyclePrepareDeleteTask {
1894
    std::string key;
1895
    std::string resource_id;
1896
    std::string rowset_id;
1897
    int64_t tablet_id = 0;
1898
};
1899
1900
template <typename T>
1901
57.7k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1902
57.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1903
3.75k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1904
3.10k
            return std::nullopt;
1905
3.10k
        }
1906
3.75k
    }
1907
1908
654
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1909
654
    DeferredRecycleAbortTask task;
1910
654
    task.tablet_id = rs_meta.tablet_id();
1911
654
    task.start_version = rs_meta.start_version();
1912
654
    task.end_version = rs_meta.end_version();
1913
54.6k
    if (rs_meta.has_load_id()) {
1914
4
        task.type = DeferredRecycleAbortTask::Type::TXN;
1915
4
        task.txn_id = rs_meta.txn_id();
1916
4
        return task;
1917
4
    }
1918
54.6k
    if (rs_meta.has_job_id()) {
1919
6
        task.type = DeferredRecycleAbortTask::Type::JOB;
1920
6
        task.rowset_id = rs_meta.rowset_id_v2();
1921
6
        task.job_id = rs_meta.job_id();
1922
6
        return task;
1923
6
    }
1924
54.6k
    return std::nullopt;
1925
54.6k
}
_ZN5doris5cloud24make_deferred_abort_taskINS0_15RecycleRowsetPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_
Line
Count
Source
1901
3.75k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1902
3.75k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1903
3.75k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1904
3.10k
            return std::nullopt;
1905
3.10k
        }
1906
3.75k
    }
1907
1908
654
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1909
654
    DeferredRecycleAbortTask task;
1910
654
    task.tablet_id = rs_meta.tablet_id();
1911
654
    task.start_version = rs_meta.start_version();
1912
654
    task.end_version = rs_meta.end_version();
1913
654
    if (rs_meta.has_load_id()) {
1914
2
        task.type = DeferredRecycleAbortTask::Type::TXN;
1915
2
        task.txn_id = rs_meta.txn_id();
1916
2
        return task;
1917
2
    }
1918
652
    if (rs_meta.has_job_id()) {
1919
2
        task.type = DeferredRecycleAbortTask::Type::JOB;
1920
2
        task.rowset_id = rs_meta.rowset_id_v2();
1921
2
        task.job_id = rs_meta.job_id();
1922
2
        return task;
1923
2
    }
1924
650
    return std::nullopt;
1925
652
}
_ZN5doris5cloud24make_deferred_abort_taskINS_17RowsetMetaCloudPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_
Line
Count
Source
1901
54.0k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1902
54.0k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1903
54.0k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1904
54.0k
            return std::nullopt;
1905
54.0k
        }
1906
54.0k
    }
1907
1908
54.0k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1909
54.0k
    DeferredRecycleAbortTask task;
1910
54.0k
    task.tablet_id = rs_meta.tablet_id();
1911
54.0k
    task.start_version = rs_meta.start_version();
1912
54.0k
    task.end_version = rs_meta.end_version();
1913
54.0k
    if (rs_meta.has_load_id()) {
1914
2
        task.type = DeferredRecycleAbortTask::Type::TXN;
1915
2
        task.txn_id = rs_meta.txn_id();
1916
2
        return task;
1917
2
    }
1918
54.0k
    if (rs_meta.has_job_id()) {
1919
4
        task.type = DeferredRecycleAbortTask::Type::JOB;
1920
4
        task.rowset_id = rs_meta.rowset_id_v2();
1921
4
        task.job_id = rs_meta.job_id();
1922
4
        return task;
1923
4
    }
1924
54.0k
    return std::nullopt;
1925
54.0k
}
1926
1927
template <typename T>
1928
169k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1929
169k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1930
169k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1931
169k
}
_ZN5doris5cloud28need_mark_rowset_as_recycledINS0_15RecycleRowsetPBEEEbRKT_
Line
Count
Source
1928
11.2k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1929
11.2k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1930
11.2k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1931
11.2k
}
_ZN5doris5cloud28need_mark_rowset_as_recycledINS_17RowsetMetaCloudPBEEEbRKT_
Line
Count
Source
1928
158k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1929
158k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1930
158k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1931
158k
}
1932
1933
template <typename T>
1934
int batch_mark_rowsets_as_recycled(TxnKv* txn_kv, const std::string& instance_id,
1935
42
                                   const std::vector<std::string>& keys) {
1936
42
    std::unique_ptr<Transaction> txn;
1937
42
    TxnErrorCode err = txn_kv->create_txn(&txn);
1938
42
    if (err != TxnErrorCode::TXN_OK) {
1939
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1940
0
        return -1;
1941
0
    }
1942
42
    std::vector<std::optional<std::string>> values;
1943
42
    err = txn->batch_get(&values, keys);
1944
42
    if (err != TxnErrorCode::TXN_OK) {
1945
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
1946
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
1947
0
        return -1;
1948
0
    }
1949
42
    size_t total_keys = keys.size();
1950
55.8k
    for (size_t i = 0; i < total_keys; i++) {
1951
55.7k
        if (!values[i].has_value()) {
1952
            // has already been removed by commit_rowset
1953
0
            continue;
1954
0
        }
1955
55.7k
        auto key = keys[i];
1956
55.7k
        auto val = values[i].value();
1957
55.7k
        T rowset_meta_pb;
1958
55.7k
        if (!rowset_meta_pb.ParseFromString(val)) {
1959
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
1960
0
                         << " key=" << hex(key);
1961
0
            return -1;
1962
0
        }
1963
55.7k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
1964
0
            continue;
1965
0
        }
1966
55.7k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
1967
55.7k
        val.clear();
1968
55.7k
        rowset_meta_pb.SerializeToString(&val);
1969
55.7k
        txn->put(key, val);
1970
55.7k
    }
1971
42
    err = txn->commit();
1972
42
    if (err != TxnErrorCode::TXN_OK) {
1973
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1974
0
        return -1;
1975
0
    }
1976
1977
42
    return 0;
1978
42
}
_ZN5doris5cloud30batch_mark_rowsets_as_recycledINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE
Line
Count
Source
1935
26
                                   const std::vector<std::string>& keys) {
1936
26
    std::unique_ptr<Transaction> txn;
1937
26
    TxnErrorCode err = txn_kv->create_txn(&txn);
1938
26
    if (err != TxnErrorCode::TXN_OK) {
1939
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1940
0
        return -1;
1941
0
    }
1942
26
    std::vector<std::optional<std::string>> values;
1943
26
    err = txn->batch_get(&values, keys);
1944
26
    if (err != TxnErrorCode::TXN_OK) {
1945
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
1946
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
1947
0
        return -1;
1948
0
    }
1949
26
    size_t total_keys = keys.size();
1950
3.78k
    for (size_t i = 0; i < total_keys; i++) {
1951
3.75k
        if (!values[i].has_value()) {
1952
            // has already been removed by commit_rowset
1953
0
            continue;
1954
0
        }
1955
3.75k
        auto key = keys[i];
1956
3.75k
        auto val = values[i].value();
1957
3.75k
        T rowset_meta_pb;
1958
3.75k
        if (!rowset_meta_pb.ParseFromString(val)) {
1959
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
1960
0
                         << " key=" << hex(key);
1961
0
            return -1;
1962
0
        }
1963
3.75k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
1964
0
            continue;
1965
0
        }
1966
3.75k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
1967
3.75k
        val.clear();
1968
3.75k
        rowset_meta_pb.SerializeToString(&val);
1969
3.75k
        txn->put(key, val);
1970
3.75k
    }
1971
26
    err = txn->commit();
1972
26
    if (err != TxnErrorCode::TXN_OK) {
1973
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1974
0
        return -1;
1975
0
    }
1976
1977
26
    return 0;
1978
26
}
_ZN5doris5cloud30batch_mark_rowsets_as_recycledINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE
Line
Count
Source
1935
16
                                   const std::vector<std::string>& keys) {
1936
16
    std::unique_ptr<Transaction> txn;
1937
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
1938
16
    if (err != TxnErrorCode::TXN_OK) {
1939
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1940
0
        return -1;
1941
0
    }
1942
16
    std::vector<std::optional<std::string>> values;
1943
16
    err = txn->batch_get(&values, keys);
1944
16
    if (err != TxnErrorCode::TXN_OK) {
1945
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
1946
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
1947
0
        return -1;
1948
0
    }
1949
16
    size_t total_keys = keys.size();
1950
52.0k
    for (size_t i = 0; i < total_keys; i++) {
1951
52.0k
        if (!values[i].has_value()) {
1952
            // has already been removed by commit_rowset
1953
0
            continue;
1954
0
        }
1955
52.0k
        auto key = keys[i];
1956
52.0k
        auto val = values[i].value();
1957
52.0k
        T rowset_meta_pb;
1958
52.0k
        if (!rowset_meta_pb.ParseFromString(val)) {
1959
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
1960
0
                         << " key=" << hex(key);
1961
0
            return -1;
1962
0
        }
1963
52.0k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
1964
0
            continue;
1965
0
        }
1966
52.0k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
1967
52.0k
        val.clear();
1968
52.0k
        rowset_meta_pb.SerializeToString(&val);
1969
52.0k
        txn->put(key, val);
1970
52.0k
    }
1971
16
    err = txn->commit();
1972
16
    if (err != TxnErrorCode::TXN_OK) {
1973
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1974
0
        return -1;
1975
0
    }
1976
1977
16
    return 0;
1978
16
}
1979
1980
template <typename T>
1981
int collect_deferred_abort_tasks(TxnKv* txn_kv, const std::string& instance_id,
1982
                                 const std::vector<std::string>& keys,
1983
                                 std::vector<DeferredRecycleAbortTask>* abort_tasks,
1984
5
                                 bool skip_base_version) {
1985
5
    constexpr size_t kAbortCheckBatchSize = 256;
1986
10
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
1987
5
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
1988
5
        std::unique_ptr<Transaction> txn;
1989
5
        TxnErrorCode err = txn_kv->create_txn(&txn);
1990
5
        if (err != TxnErrorCode::TXN_OK) {
1991
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1992
0
            return -1;
1993
0
        }
1994
10
        for (size_t idx = offset; idx < limit; ++idx) {
1995
5
            const std::string& key = keys[idx];
1996
5
            std::string val;
1997
5
            err = txn->get(key, &val);
1998
5
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1999
                // has already been removed
2000
0
                continue;
2001
0
            }
2002
5
            if (err != TxnErrorCode::TXN_OK) {
2003
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2004
0
                             << " key=" << hex(key);
2005
0
                return -1;
2006
0
            }
2007
5
            T rowset_meta_pb;
2008
5
            if (!rowset_meta_pb.ParseFromString(val)) {
2009
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2010
0
                             << " key=" << hex(key);
2011
0
                return -1;
2012
0
            }
2013
5
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2014
0
                continue;
2015
0
            }
2016
5
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2017
5
                abort_task.has_value()) {
2018
5
                abort_tasks->emplace_back(std::move(*abort_task));
2019
5
            }
2020
5
        }
2021
5
    }
2022
5
    return 0;
2023
5
}
_ZN5doris5cloud28collect_deferred_abort_tasksINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb
Line
Count
Source
1984
2
                                 bool skip_base_version) {
1985
2
    constexpr size_t kAbortCheckBatchSize = 256;
1986
4
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
1987
2
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
1988
2
        std::unique_ptr<Transaction> txn;
1989
2
        TxnErrorCode err = txn_kv->create_txn(&txn);
1990
2
        if (err != TxnErrorCode::TXN_OK) {
1991
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1992
0
            return -1;
1993
0
        }
1994
4
        for (size_t idx = offset; idx < limit; ++idx) {
1995
2
            const std::string& key = keys[idx];
1996
2
            std::string val;
1997
2
            err = txn->get(key, &val);
1998
2
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1999
                // has already been removed
2000
0
                continue;
2001
0
            }
2002
2
            if (err != TxnErrorCode::TXN_OK) {
2003
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2004
0
                             << " key=" << hex(key);
2005
0
                return -1;
2006
0
            }
2007
2
            T rowset_meta_pb;
2008
2
            if (!rowset_meta_pb.ParseFromString(val)) {
2009
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2010
0
                             << " key=" << hex(key);
2011
0
                return -1;
2012
0
            }
2013
2
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2014
0
                continue;
2015
0
            }
2016
2
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2017
2
                abort_task.has_value()) {
2018
2
                abort_tasks->emplace_back(std::move(*abort_task));
2019
2
            }
2020
2
        }
2021
2
    }
2022
2
    return 0;
2023
2
}
_ZN5doris5cloud28collect_deferred_abort_tasksINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb
Line
Count
Source
1984
3
                                 bool skip_base_version) {
1985
3
    constexpr size_t kAbortCheckBatchSize = 256;
1986
6
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
1987
3
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
1988
3
        std::unique_ptr<Transaction> txn;
1989
3
        TxnErrorCode err = txn_kv->create_txn(&txn);
1990
3
        if (err != TxnErrorCode::TXN_OK) {
1991
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1992
0
            return -1;
1993
0
        }
1994
6
        for (size_t idx = offset; idx < limit; ++idx) {
1995
3
            const std::string& key = keys[idx];
1996
3
            std::string val;
1997
3
            err = txn->get(key, &val);
1998
3
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1999
                // has already been removed
2000
0
                continue;
2001
0
            }
2002
3
            if (err != TxnErrorCode::TXN_OK) {
2003
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2004
0
                             << " key=" << hex(key);
2005
0
                return -1;
2006
0
            }
2007
3
            T rowset_meta_pb;
2008
3
            if (!rowset_meta_pb.ParseFromString(val)) {
2009
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2010
0
                             << " key=" << hex(key);
2011
0
                return -1;
2012
0
            }
2013
3
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2014
0
                continue;
2015
0
            }
2016
3
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2017
3
                abort_task.has_value()) {
2018
3
                abort_tasks->emplace_back(std::move(*abort_task));
2019
3
            }
2020
3
        }
2021
3
    }
2022
3
    return 0;
2023
3
}
2024
2025
template <typename T>
2026
int InstanceRecycler::batch_abort_txn_or_job_for_recycle(const std::vector<std::string>& keys,
2027
5
                                                         bool skip_base_version) {
2028
5
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2029
5
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2030
5
                                        skip_base_version) != 0) {
2031
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2032
0
        return -1;
2033
0
    }
2034
5
    for (const auto& abort_task : abort_tasks) {
2035
5
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2036
5
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2037
5
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2038
5
        int abort_ret = 0;
2039
5
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2040
2
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2041
3
        } else {
2042
3
            RowsetMetaCloudPB rowset_meta;
2043
3
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2044
3
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2045
3
            rowset_meta.set_job_id(abort_task.job_id);
2046
3
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2047
3
        }
2048
5
        if (abort_ret != 0) {
2049
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2050
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2051
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2052
0
            return abort_ret;
2053
0
        }
2054
5
    }
2055
5
    return 0;
2056
5
}
_ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS0_15RecycleRowsetPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb
Line
Count
Source
2027
2
                                                         bool skip_base_version) {
2028
2
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2029
2
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2030
2
                                        skip_base_version) != 0) {
2031
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2032
0
        return -1;
2033
0
    }
2034
2
    for (const auto& abort_task : abort_tasks) {
2035
2
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2036
2
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2037
2
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2038
2
        int abort_ret = 0;
2039
2
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2040
1
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2041
1
        } else {
2042
1
            RowsetMetaCloudPB rowset_meta;
2043
1
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2044
1
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2045
1
            rowset_meta.set_job_id(abort_task.job_id);
2046
1
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2047
1
        }
2048
2
        if (abort_ret != 0) {
2049
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2050
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2051
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2052
0
            return abort_ret;
2053
0
        }
2054
2
    }
2055
2
    return 0;
2056
2
}
_ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS_17RowsetMetaCloudPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb
Line
Count
Source
2027
3
                                                         bool skip_base_version) {
2028
3
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2029
3
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2030
3
                                        skip_base_version) != 0) {
2031
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2032
0
        return -1;
2033
0
    }
2034
3
    for (const auto& abort_task : abort_tasks) {
2035
3
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2036
3
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2037
3
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2038
3
        int abort_ret = 0;
2039
3
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2040
1
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2041
2
        } else {
2042
2
            RowsetMetaCloudPB rowset_meta;
2043
2
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2044
2
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2045
2
            rowset_meta.set_job_id(abort_task.job_id);
2046
2
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2047
2
        }
2048
3
        if (abort_ret != 0) {
2049
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2050
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2051
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2052
0
            return abort_ret;
2053
0
        }
2054
3
    }
2055
3
    return 0;
2056
3
}
2057
2058
int collect_prepare_delete_tasks(TxnKv* txn_kv, const std::string& instance_id,
2059
                                 const std::vector<std::string>& keys,
2060
23
                                 std::vector<DeferredRecyclePrepareDeleteTask>* delete_tasks) {
2061
23
    constexpr size_t kPrepareCheckBatchSize = 256;
2062
46
    for (size_t offset = 0; offset < keys.size(); offset += kPrepareCheckBatchSize) {
2063
23
        size_t limit = std::min(keys.size(), offset + kPrepareCheckBatchSize);
2064
23
        std::unique_ptr<Transaction> txn;
2065
23
        TxnErrorCode err = txn_kv->create_txn(&txn);
2066
23
        if (err != TxnErrorCode::TXN_OK) {
2067
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2068
0
            return -1;
2069
0
        }
2070
675
        for (size_t idx = offset; idx < limit; ++idx) {
2071
652
            const std::string& key = keys[idx];
2072
652
            std::string val;
2073
652
            err = txn->get(key, &val);
2074
652
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2075
                // has already been removed
2076
0
                continue;
2077
0
            }
2078
652
            if (err != TxnErrorCode::TXN_OK) {
2079
0
                LOG(WARNING) << "failed to get recycle rowset, instance_id=" << instance_id
2080
0
                             << " key=" << hex(key);
2081
0
                return -1;
2082
0
            }
2083
652
            RecycleRowsetPB rowset;
2084
652
            if (!rowset.ParseFromString(val)) {
2085
0
                LOG(WARNING) << "failed to parse recycle rowset, instance_id=" << instance_id
2086
0
                             << " key=" << hex(key);
2087
0
                return -1;
2088
0
            }
2089
652
            if (rowset.type() != RecycleRowsetPB::PREPARE) {
2090
0
                continue;
2091
0
            }
2092
652
            const auto& rs_meta = rowset.rowset_meta();
2093
652
            delete_tasks->push_back(
2094
652
                    {key, rs_meta.resource_id(), rs_meta.rowset_id_v2(), rs_meta.tablet_id()});
2095
652
        }
2096
23
    }
2097
23
    return 0;
2098
23
}
2099
2100
1
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
2101
1
    const std::string task_name = "recycle_ref_rowsets";
2102
1
    *has_unrecycled_rowsets = false;
2103
2104
1
    std::string data_rowset_ref_count_key_start =
2105
1
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
2106
1
    std::string data_rowset_ref_count_key_end =
2107
1
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
2108
2109
1
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
2110
2111
1
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2112
1
    register_recycle_task(task_name, start_time);
2113
2114
1
    DORIS_CLOUD_DEFER {
2115
1
        unregister_recycle_task(task_name);
2116
1
        int64_t cost =
2117
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2118
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
2119
1
                .tag("instance_id", instance_id_);
2120
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Line
Count
Source
2114
1
    DORIS_CLOUD_DEFER {
2115
1
        unregister_recycle_task(task_name);
2116
1
        int64_t cost =
2117
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2118
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
2119
1
                .tag("instance_id", instance_id_);
2120
1
    };
2121
2122
    // Phase 1: Scan to collect all tablet_ids that have rowset ref counts
2123
1
    std::set<int64_t> tablets_with_refs;
2124
1
    int64_t num_scanned = 0;
2125
2126
1
    auto scan_func = [&](std::string_view k, std::string_view v) -> int {
2127
0
        ++num_scanned;
2128
0
        int64_t tablet_id;
2129
0
        std::string rowset_id;
2130
0
        std::string_view key(k);
2131
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
2132
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
2133
0
            return 0; // Continue scanning
2134
0
        }
2135
2136
0
        tablets_with_refs.insert(tablet_id);
2137
0
        return 0;
2138
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
2139
2140
1
    if (scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
2141
1
                         std::move(scan_func)) != 0) {
2142
0
        LOG_WARNING("failed to scan data rowset ref count keys");
2143
0
        return -1;
2144
0
    }
2145
2146
1
    LOG_INFO("collected {} tablets with rowset refs, scanned {} ref count keys",
2147
1
             tablets_with_refs.size(), num_scanned)
2148
1
            .tag("instance_id", instance_id_);
2149
2150
    // Phase 2: Recycle each tablet
2151
1
    int64_t num_recycled_tablets = 0;
2152
1
    for (int64_t tablet_id : tablets_with_refs) {
2153
0
        if (stopped()) {
2154
0
            LOG_INFO("recycler stopped, skip remaining tablets")
2155
0
                    .tag("instance_id", instance_id_)
2156
0
                    .tag("tablets_processed", num_recycled_tablets)
2157
0
                    .tag("tablets_remaining", tablets_with_refs.size() - num_recycled_tablets);
2158
0
            break;
2159
0
        }
2160
2161
0
        RecyclerMetricsContext metrics_context(instance_id_, task_name);
2162
0
        if (recycle_versioned_tablet(tablet_id, metrics_context) != 0) {
2163
0
            LOG_WARNING("failed to recycle tablet")
2164
0
                    .tag("instance_id", instance_id_)
2165
0
                    .tag("tablet_id", tablet_id);
2166
0
            return -1;
2167
0
        }
2168
0
        ++num_recycled_tablets;
2169
0
    }
2170
2171
1
    LOG_INFO("recycled {} tablets", num_recycled_tablets)
2172
1
            .tag("instance_id", instance_id_)
2173
1
            .tag("total_tablets", tablets_with_refs.size());
2174
2175
    // Phase 3: Scan again to check if any ref count keys still exist
2176
1
    std::unique_ptr<Transaction> txn;
2177
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
2178
1
    if (err != TxnErrorCode::TXN_OK) {
2179
0
        LOG_WARNING("failed to create txn for final check")
2180
0
                .tag("instance_id", instance_id_)
2181
0
                .tag("err", err);
2182
0
        return -1;
2183
0
    }
2184
2185
1
    std::unique_ptr<RangeGetIterator> iter;
2186
1
    err = txn->get(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end, &iter, true);
2187
1
    if (err != TxnErrorCode::TXN_OK) {
2188
0
        LOG_WARNING("failed to create range iterator for final check")
2189
0
                .tag("instance_id", instance_id_)
2190
0
                .tag("err", err);
2191
0
        return -1;
2192
0
    }
2193
2194
1
    *has_unrecycled_rowsets = iter->has_next();
2195
1
    if (*has_unrecycled_rowsets) {
2196
0
        LOG_INFO("still has unrecycled rowsets after recycle_ref_rowsets")
2197
0
                .tag("instance_id", instance_id_);
2198
0
    }
2199
2200
1
    return 0;
2201
1
}
2202
2203
17
int InstanceRecycler::recycle_indexes() {
2204
17
    const std::string task_name = "recycle_indexes";
2205
17
    int64_t num_scanned = 0;
2206
17
    int64_t num_expired = 0;
2207
17
    int64_t num_recycled = 0;
2208
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2209
2210
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
2211
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
2212
17
    std::string index_key0;
2213
17
    std::string index_key1;
2214
17
    recycle_index_key(index_key_info0, &index_key0);
2215
17
    recycle_index_key(index_key_info1, &index_key1);
2216
2217
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
2218
2219
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2220
17
    register_recycle_task(task_name, start_time);
2221
2222
17
    DORIS_CLOUD_DEFER {
2223
17
        unregister_recycle_task(task_name);
2224
17
        int64_t cost =
2225
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2226
17
        metrics_context.finish_report();
2227
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2228
17
                .tag("instance_id", instance_id_)
2229
17
                .tag("num_scanned", num_scanned)
2230
17
                .tag("num_expired", num_expired)
2231
17
                .tag("num_recycled", num_recycled);
2232
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2222
2
    DORIS_CLOUD_DEFER {
2223
2
        unregister_recycle_task(task_name);
2224
2
        int64_t cost =
2225
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2226
2
        metrics_context.finish_report();
2227
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2228
2
                .tag("instance_id", instance_id_)
2229
2
                .tag("num_scanned", num_scanned)
2230
2
                .tag("num_expired", num_expired)
2231
2
                .tag("num_recycled", num_recycled);
2232
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2222
15
    DORIS_CLOUD_DEFER {
2223
15
        unregister_recycle_task(task_name);
2224
15
        int64_t cost =
2225
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2226
15
        metrics_context.finish_report();
2227
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2228
15
                .tag("instance_id", instance_id_)
2229
15
                .tag("num_scanned", num_scanned)
2230
15
                .tag("num_expired", num_expired)
2231
15
                .tag("num_recycled", num_recycled);
2232
15
    };
2233
2234
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2235
2236
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
2237
17
    std::vector<std::string_view> index_keys;
2238
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2239
10
        ++num_scanned;
2240
10
        RecycleIndexPB index_pb;
2241
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2242
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2243
0
            return -1;
2244
0
        }
2245
10
        int64_t current_time = ::time(nullptr);
2246
10
        if (current_time <
2247
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2248
0
            return 0;
2249
0
        }
2250
10
        ++num_expired;
2251
        // decode index_id
2252
10
        auto k1 = k;
2253
10
        k1.remove_prefix(1);
2254
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2255
10
        decode_key(&k1, &out);
2256
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2257
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2258
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2259
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2260
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2261
        // Change state to RECYCLING
2262
10
        std::unique_ptr<Transaction> txn;
2263
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2264
10
        if (err != TxnErrorCode::TXN_OK) {
2265
0
            LOG_WARNING("failed to create txn").tag("err", err);
2266
0
            return -1;
2267
0
        }
2268
10
        std::string val;
2269
10
        err = txn->get(k, &val);
2270
10
        if (err ==
2271
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2272
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2273
0
            return 0;
2274
0
        }
2275
10
        if (err != TxnErrorCode::TXN_OK) {
2276
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2277
0
            return -1;
2278
0
        }
2279
10
        index_pb.Clear();
2280
10
        if (!index_pb.ParseFromString(val)) {
2281
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2282
0
            return -1;
2283
0
        }
2284
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2285
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2286
9
            txn->put(k, index_pb.SerializeAsString());
2287
9
            err = txn->commit();
2288
9
            if (err != TxnErrorCode::TXN_OK) {
2289
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2290
0
                return -1;
2291
0
            }
2292
9
        }
2293
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2294
1
            LOG_WARNING("failed to recycle tablets under index")
2295
1
                    .tag("table_id", index_pb.table_id())
2296
1
                    .tag("instance_id", instance_id_)
2297
1
                    .tag("index_id", index_id);
2298
1
            return -1;
2299
1
        }
2300
2301
9
        if (index_pb.has_db_id()) {
2302
            // Recycle the versioned keys
2303
3
            std::unique_ptr<Transaction> txn;
2304
3
            err = txn_kv_->create_txn(&txn);
2305
3
            if (err != TxnErrorCode::TXN_OK) {
2306
0
                LOG_WARNING("failed to create txn").tag("err", err);
2307
0
                return -1;
2308
0
            }
2309
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2310
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2311
3
            std::string index_inverted_key = versioned::index_inverted_key(
2312
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2313
3
            versioned_remove_all(txn.get(), meta_key);
2314
3
            txn->remove(index_key);
2315
3
            txn->remove(index_inverted_key);
2316
3
            err = txn->commit();
2317
3
            if (err != TxnErrorCode::TXN_OK) {
2318
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2319
0
                return -1;
2320
0
            }
2321
3
        }
2322
2323
9
        metrics_context.total_recycled_num = ++num_recycled;
2324
9
        metrics_context.report();
2325
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2326
9
        index_keys.push_back(k);
2327
9
        return 0;
2328
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2238
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2239
2
        ++num_scanned;
2240
2
        RecycleIndexPB index_pb;
2241
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2242
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2243
0
            return -1;
2244
0
        }
2245
2
        int64_t current_time = ::time(nullptr);
2246
2
        if (current_time <
2247
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2248
0
            return 0;
2249
0
        }
2250
2
        ++num_expired;
2251
        // decode index_id
2252
2
        auto k1 = k;
2253
2
        k1.remove_prefix(1);
2254
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2255
2
        decode_key(&k1, &out);
2256
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2257
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2258
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2259
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2260
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2261
        // Change state to RECYCLING
2262
2
        std::unique_ptr<Transaction> txn;
2263
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2264
2
        if (err != TxnErrorCode::TXN_OK) {
2265
0
            LOG_WARNING("failed to create txn").tag("err", err);
2266
0
            return -1;
2267
0
        }
2268
2
        std::string val;
2269
2
        err = txn->get(k, &val);
2270
2
        if (err ==
2271
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2272
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2273
0
            return 0;
2274
0
        }
2275
2
        if (err != TxnErrorCode::TXN_OK) {
2276
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2277
0
            return -1;
2278
0
        }
2279
2
        index_pb.Clear();
2280
2
        if (!index_pb.ParseFromString(val)) {
2281
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2282
0
            return -1;
2283
0
        }
2284
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2285
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2286
1
            txn->put(k, index_pb.SerializeAsString());
2287
1
            err = txn->commit();
2288
1
            if (err != TxnErrorCode::TXN_OK) {
2289
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2290
0
                return -1;
2291
0
            }
2292
1
        }
2293
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2294
1
            LOG_WARNING("failed to recycle tablets under index")
2295
1
                    .tag("table_id", index_pb.table_id())
2296
1
                    .tag("instance_id", instance_id_)
2297
1
                    .tag("index_id", index_id);
2298
1
            return -1;
2299
1
        }
2300
2301
1
        if (index_pb.has_db_id()) {
2302
            // Recycle the versioned keys
2303
1
            std::unique_ptr<Transaction> txn;
2304
1
            err = txn_kv_->create_txn(&txn);
2305
1
            if (err != TxnErrorCode::TXN_OK) {
2306
0
                LOG_WARNING("failed to create txn").tag("err", err);
2307
0
                return -1;
2308
0
            }
2309
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2310
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2311
1
            std::string index_inverted_key = versioned::index_inverted_key(
2312
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2313
1
            versioned_remove_all(txn.get(), meta_key);
2314
1
            txn->remove(index_key);
2315
1
            txn->remove(index_inverted_key);
2316
1
            err = txn->commit();
2317
1
            if (err != TxnErrorCode::TXN_OK) {
2318
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2319
0
                return -1;
2320
0
            }
2321
1
        }
2322
2323
1
        metrics_context.total_recycled_num = ++num_recycled;
2324
1
        metrics_context.report();
2325
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2326
1
        index_keys.push_back(k);
2327
1
        return 0;
2328
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2238
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2239
8
        ++num_scanned;
2240
8
        RecycleIndexPB index_pb;
2241
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2242
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2243
0
            return -1;
2244
0
        }
2245
8
        int64_t current_time = ::time(nullptr);
2246
8
        if (current_time <
2247
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2248
0
            return 0;
2249
0
        }
2250
8
        ++num_expired;
2251
        // decode index_id
2252
8
        auto k1 = k;
2253
8
        k1.remove_prefix(1);
2254
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2255
8
        decode_key(&k1, &out);
2256
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2257
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2258
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2259
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2260
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2261
        // Change state to RECYCLING
2262
8
        std::unique_ptr<Transaction> txn;
2263
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2264
8
        if (err != TxnErrorCode::TXN_OK) {
2265
0
            LOG_WARNING("failed to create txn").tag("err", err);
2266
0
            return -1;
2267
0
        }
2268
8
        std::string val;
2269
8
        err = txn->get(k, &val);
2270
8
        if (err ==
2271
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2272
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2273
0
            return 0;
2274
0
        }
2275
8
        if (err != TxnErrorCode::TXN_OK) {
2276
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2277
0
            return -1;
2278
0
        }
2279
8
        index_pb.Clear();
2280
8
        if (!index_pb.ParseFromString(val)) {
2281
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2282
0
            return -1;
2283
0
        }
2284
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2285
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2286
8
            txn->put(k, index_pb.SerializeAsString());
2287
8
            err = txn->commit();
2288
8
            if (err != TxnErrorCode::TXN_OK) {
2289
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2290
0
                return -1;
2291
0
            }
2292
8
        }
2293
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2294
0
            LOG_WARNING("failed to recycle tablets under index")
2295
0
                    .tag("table_id", index_pb.table_id())
2296
0
                    .tag("instance_id", instance_id_)
2297
0
                    .tag("index_id", index_id);
2298
0
            return -1;
2299
0
        }
2300
2301
8
        if (index_pb.has_db_id()) {
2302
            // Recycle the versioned keys
2303
2
            std::unique_ptr<Transaction> txn;
2304
2
            err = txn_kv_->create_txn(&txn);
2305
2
            if (err != TxnErrorCode::TXN_OK) {
2306
0
                LOG_WARNING("failed to create txn").tag("err", err);
2307
0
                return -1;
2308
0
            }
2309
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2310
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2311
2
            std::string index_inverted_key = versioned::index_inverted_key(
2312
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2313
2
            versioned_remove_all(txn.get(), meta_key);
2314
2
            txn->remove(index_key);
2315
2
            txn->remove(index_inverted_key);
2316
2
            err = txn->commit();
2317
2
            if (err != TxnErrorCode::TXN_OK) {
2318
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2319
0
                return -1;
2320
0
            }
2321
2
        }
2322
2323
8
        metrics_context.total_recycled_num = ++num_recycled;
2324
8
        metrics_context.report();
2325
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2326
8
        index_keys.push_back(k);
2327
8
        return 0;
2328
8
    };
2329
2330
17
    auto loop_done = [&index_keys, this]() -> int {
2331
6
        if (index_keys.empty()) return 0;
2332
5
        DORIS_CLOUD_DEFER {
2333
5
            index_keys.clear();
2334
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2332
1
        DORIS_CLOUD_DEFER {
2333
1
            index_keys.clear();
2334
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2332
4
        DORIS_CLOUD_DEFER {
2333
4
            index_keys.clear();
2334
4
        };
2335
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2336
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2337
0
            return -1;
2338
0
        }
2339
5
        return 0;
2340
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2330
2
    auto loop_done = [&index_keys, this]() -> int {
2331
2
        if (index_keys.empty()) return 0;
2332
1
        DORIS_CLOUD_DEFER {
2333
1
            index_keys.clear();
2334
1
        };
2335
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2336
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2337
0
            return -1;
2338
0
        }
2339
1
        return 0;
2340
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2330
4
    auto loop_done = [&index_keys, this]() -> int {
2331
4
        if (index_keys.empty()) return 0;
2332
4
        DORIS_CLOUD_DEFER {
2333
4
            index_keys.clear();
2334
4
        };
2335
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2336
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2337
0
            return -1;
2338
0
        }
2339
4
        return 0;
2340
4
    };
2341
2342
17
    if (config::enable_recycler_stats_metrics) {
2343
0
        scan_and_statistics_indexes();
2344
0
    }
2345
    // recycle_func and loop_done for scan and recycle
2346
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
2347
17
}
2348
2349
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
2350
8.24k
                             int64_t tablet_id) {
2351
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
2352
2353
8.24k
    std::unique_ptr<Transaction> txn;
2354
8.24k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2355
8.24k
    if (err != TxnErrorCode::TXN_OK) {
2356
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
2357
0
                     << " tablet_id=" << tablet_id << " err=" << err;
2358
0
        return false;
2359
0
    }
2360
2361
8.24k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
2362
8.24k
    std::string tablet_idx_val;
2363
8.24k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
2364
8.24k
    if (TxnErrorCode::TXN_OK != err) {
2365
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
2366
0
                     << " tablet_id=" << tablet_id << " err=" << err
2367
0
                     << " key=" << hex(tablet_idx_key);
2368
0
        return false;
2369
0
    }
2370
2371
8.24k
    TabletIndexPB tablet_idx_pb;
2372
8.24k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
2373
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
2374
0
                     << " tablet_id=" << tablet_id;
2375
0
        return false;
2376
0
    }
2377
2378
8.24k
    if (!tablet_idx_pb.has_db_id()) {
2379
        // In the previous version, the db_id was not set in the index_pb.
2380
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2381
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
2382
0
                  << " instance_id=" << instance_id
2383
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
2384
0
        return true;
2385
0
    }
2386
2387
8.24k
    std::string ver_val;
2388
8.24k
    std::string ver_key =
2389
8.24k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
2390
8.24k
                                   tablet_idx_pb.partition_id()});
2391
8.24k
    err = txn->get(ver_key, &ver_val);
2392
2393
8.24k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2394
204
        LOG(INFO) << ""
2395
204
                     "partition version not found, instance_id="
2396
204
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
2397
204
                  << " table_id=" << tablet_idx_pb.table_id()
2398
204
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
2399
204
                  << " key=" << hex(ver_key);
2400
204
        return true;
2401
204
    }
2402
2403
8.03k
    if (TxnErrorCode::TXN_OK != err) {
2404
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
2405
0
                     << " db_id=" << tablet_idx_pb.db_id()
2406
0
                     << " table_id=" << tablet_idx_pb.table_id()
2407
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2408
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
2409
0
        return false;
2410
0
    }
2411
2412
8.03k
    VersionPB version_pb;
2413
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
2414
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
2415
0
                     << " db_id=" << tablet_idx_pb.db_id()
2416
0
                     << " table_id=" << tablet_idx_pb.table_id()
2417
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2418
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
2419
0
        return false;
2420
0
    }
2421
2422
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
2423
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
2424
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
2425
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
2426
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
2427
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
2428
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
2429
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
2430
4.00k
                     << " key=" << hex(ver_key);
2431
4.00k
        return false;
2432
4.00k
    }
2433
4.03k
    return true;
2434
8.03k
}
2435
2436
15
int InstanceRecycler::recycle_partitions() {
2437
15
    const std::string task_name = "recycle_partitions";
2438
15
    int64_t num_scanned = 0;
2439
15
    int64_t num_expired = 0;
2440
15
    int64_t num_recycled = 0;
2441
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2442
2443
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
2444
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
2445
15
    std::string part_key0;
2446
15
    std::string part_key1;
2447
15
    recycle_partition_key(part_key_info0, &part_key0);
2448
15
    recycle_partition_key(part_key_info1, &part_key1);
2449
2450
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
2451
2452
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2453
15
    register_recycle_task(task_name, start_time);
2454
2455
15
    DORIS_CLOUD_DEFER {
2456
15
        unregister_recycle_task(task_name);
2457
15
        int64_t cost =
2458
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2459
15
        metrics_context.finish_report();
2460
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2461
15
                .tag("instance_id", instance_id_)
2462
15
                .tag("num_scanned", num_scanned)
2463
15
                .tag("num_expired", num_expired)
2464
15
                .tag("num_recycled", num_recycled);
2465
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2455
2
    DORIS_CLOUD_DEFER {
2456
2
        unregister_recycle_task(task_name);
2457
2
        int64_t cost =
2458
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2459
2
        metrics_context.finish_report();
2460
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2461
2
                .tag("instance_id", instance_id_)
2462
2
                .tag("num_scanned", num_scanned)
2463
2
                .tag("num_expired", num_expired)
2464
2
                .tag("num_recycled", num_recycled);
2465
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2455
13
    DORIS_CLOUD_DEFER {
2456
13
        unregister_recycle_task(task_name);
2457
13
        int64_t cost =
2458
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2459
13
        metrics_context.finish_report();
2460
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2461
13
                .tag("instance_id", instance_id_)
2462
13
                .tag("num_scanned", num_scanned)
2463
13
                .tag("num_expired", num_expired)
2464
13
                .tag("num_recycled", num_recycled);
2465
13
    };
2466
2467
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2468
2469
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
2470
15
    std::vector<std::string_view> partition_keys;
2471
15
    std::vector<std::string> partition_version_keys;
2472
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2473
9
        ++num_scanned;
2474
9
        RecyclePartitionPB part_pb;
2475
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2476
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2477
0
            return -1;
2478
0
        }
2479
9
        int64_t current_time = ::time(nullptr);
2480
9
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2481
9
                                                            &earlest_ts)) { // not expired
2482
0
            return 0;
2483
0
        }
2484
9
        ++num_expired;
2485
        // decode partition_id
2486
9
        auto k1 = k;
2487
9
        k1.remove_prefix(1);
2488
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2489
9
        decode_key(&k1, &out);
2490
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2491
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2492
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2493
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2494
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2495
        // Change state to RECYCLING
2496
9
        std::unique_ptr<Transaction> txn;
2497
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2498
9
        if (err != TxnErrorCode::TXN_OK) {
2499
0
            LOG_WARNING("failed to create txn").tag("err", err);
2500
0
            return -1;
2501
0
        }
2502
9
        std::string val;
2503
9
        err = txn->get(k, &val);
2504
9
        if (err ==
2505
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2506
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2507
0
            return 0;
2508
0
        }
2509
9
        if (err != TxnErrorCode::TXN_OK) {
2510
0
            LOG_WARNING("failed to get kv");
2511
0
            return -1;
2512
0
        }
2513
9
        part_pb.Clear();
2514
9
        if (!part_pb.ParseFromString(val)) {
2515
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2516
0
            return -1;
2517
0
        }
2518
        // Partitions with PREPARED state MUST have no data
2519
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2520
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2521
8
            txn->put(k, part_pb.SerializeAsString());
2522
8
            err = txn->commit();
2523
8
            if (err != TxnErrorCode::TXN_OK) {
2524
0
                LOG_WARNING("failed to commit txn: {}", err);
2525
0
                return -1;
2526
0
            }
2527
8
        }
2528
2529
9
        int ret = 0;
2530
33
        for (int64_t index_id : part_pb.index_id()) {
2531
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2532
1
                LOG_WARNING("failed to recycle tablets under partition")
2533
1
                        .tag("table_id", part_pb.table_id())
2534
1
                        .tag("instance_id", instance_id_)
2535
1
                        .tag("index_id", index_id)
2536
1
                        .tag("partition_id", partition_id);
2537
1
                ret = -1;
2538
1
            }
2539
33
        }
2540
9
        if (ret == 0 && part_pb.has_db_id()) {
2541
            // Recycle the versioned keys
2542
8
            std::unique_ptr<Transaction> txn;
2543
8
            err = txn_kv_->create_txn(&txn);
2544
8
            if (err != TxnErrorCode::TXN_OK) {
2545
0
                LOG_WARNING("failed to create txn").tag("err", err);
2546
0
                return -1;
2547
0
            }
2548
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2549
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2550
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2551
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2552
8
            std::string partition_version_key =
2553
8
                    versioned::partition_version_key({instance_id_, partition_id});
2554
8
            versioned_remove_all(txn.get(), meta_key);
2555
8
            txn->remove(index_key);
2556
8
            txn->remove(inverted_index_key);
2557
8
            versioned_remove_all(txn.get(), partition_version_key);
2558
8
            err = txn->commit();
2559
8
            if (err != TxnErrorCode::TXN_OK) {
2560
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2561
0
                return -1;
2562
0
            }
2563
8
        }
2564
2565
9
        if (ret == 0) {
2566
8
            ++num_recycled;
2567
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2568
8
            partition_keys.push_back(k);
2569
8
            if (part_pb.db_id() > 0) {
2570
8
                partition_version_keys.push_back(partition_version_key(
2571
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2572
8
            }
2573
8
            metrics_context.total_recycled_num = num_recycled;
2574
8
            metrics_context.report();
2575
8
        }
2576
9
        return ret;
2577
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2472
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2473
2
        ++num_scanned;
2474
2
        RecyclePartitionPB part_pb;
2475
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2476
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2477
0
            return -1;
2478
0
        }
2479
2
        int64_t current_time = ::time(nullptr);
2480
2
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2481
2
                                                            &earlest_ts)) { // not expired
2482
0
            return 0;
2483
0
        }
2484
2
        ++num_expired;
2485
        // decode partition_id
2486
2
        auto k1 = k;
2487
2
        k1.remove_prefix(1);
2488
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2489
2
        decode_key(&k1, &out);
2490
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2491
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2492
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2493
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2494
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2495
        // Change state to RECYCLING
2496
2
        std::unique_ptr<Transaction> txn;
2497
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2498
2
        if (err != TxnErrorCode::TXN_OK) {
2499
0
            LOG_WARNING("failed to create txn").tag("err", err);
2500
0
            return -1;
2501
0
        }
2502
2
        std::string val;
2503
2
        err = txn->get(k, &val);
2504
2
        if (err ==
2505
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2506
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2507
0
            return 0;
2508
0
        }
2509
2
        if (err != TxnErrorCode::TXN_OK) {
2510
0
            LOG_WARNING("failed to get kv");
2511
0
            return -1;
2512
0
        }
2513
2
        part_pb.Clear();
2514
2
        if (!part_pb.ParseFromString(val)) {
2515
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2516
0
            return -1;
2517
0
        }
2518
        // Partitions with PREPARED state MUST have no data
2519
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2520
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2521
1
            txn->put(k, part_pb.SerializeAsString());
2522
1
            err = txn->commit();
2523
1
            if (err != TxnErrorCode::TXN_OK) {
2524
0
                LOG_WARNING("failed to commit txn: {}", err);
2525
0
                return -1;
2526
0
            }
2527
1
        }
2528
2529
2
        int ret = 0;
2530
2
        for (int64_t index_id : part_pb.index_id()) {
2531
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2532
1
                LOG_WARNING("failed to recycle tablets under partition")
2533
1
                        .tag("table_id", part_pb.table_id())
2534
1
                        .tag("instance_id", instance_id_)
2535
1
                        .tag("index_id", index_id)
2536
1
                        .tag("partition_id", partition_id);
2537
1
                ret = -1;
2538
1
            }
2539
2
        }
2540
2
        if (ret == 0 && part_pb.has_db_id()) {
2541
            // Recycle the versioned keys
2542
1
            std::unique_ptr<Transaction> txn;
2543
1
            err = txn_kv_->create_txn(&txn);
2544
1
            if (err != TxnErrorCode::TXN_OK) {
2545
0
                LOG_WARNING("failed to create txn").tag("err", err);
2546
0
                return -1;
2547
0
            }
2548
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2549
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2550
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2551
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2552
1
            std::string partition_version_key =
2553
1
                    versioned::partition_version_key({instance_id_, partition_id});
2554
1
            versioned_remove_all(txn.get(), meta_key);
2555
1
            txn->remove(index_key);
2556
1
            txn->remove(inverted_index_key);
2557
1
            versioned_remove_all(txn.get(), partition_version_key);
2558
1
            err = txn->commit();
2559
1
            if (err != TxnErrorCode::TXN_OK) {
2560
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2561
0
                return -1;
2562
0
            }
2563
1
        }
2564
2565
2
        if (ret == 0) {
2566
1
            ++num_recycled;
2567
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2568
1
            partition_keys.push_back(k);
2569
1
            if (part_pb.db_id() > 0) {
2570
1
                partition_version_keys.push_back(partition_version_key(
2571
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2572
1
            }
2573
1
            metrics_context.total_recycled_num = num_recycled;
2574
1
            metrics_context.report();
2575
1
        }
2576
2
        return ret;
2577
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2472
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2473
7
        ++num_scanned;
2474
7
        RecyclePartitionPB part_pb;
2475
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2476
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2477
0
            return -1;
2478
0
        }
2479
7
        int64_t current_time = ::time(nullptr);
2480
7
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2481
7
                                                            &earlest_ts)) { // not expired
2482
0
            return 0;
2483
0
        }
2484
7
        ++num_expired;
2485
        // decode partition_id
2486
7
        auto k1 = k;
2487
7
        k1.remove_prefix(1);
2488
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2489
7
        decode_key(&k1, &out);
2490
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2491
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2492
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2493
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2494
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2495
        // Change state to RECYCLING
2496
7
        std::unique_ptr<Transaction> txn;
2497
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2498
7
        if (err != TxnErrorCode::TXN_OK) {
2499
0
            LOG_WARNING("failed to create txn").tag("err", err);
2500
0
            return -1;
2501
0
        }
2502
7
        std::string val;
2503
7
        err = txn->get(k, &val);
2504
7
        if (err ==
2505
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2506
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2507
0
            return 0;
2508
0
        }
2509
7
        if (err != TxnErrorCode::TXN_OK) {
2510
0
            LOG_WARNING("failed to get kv");
2511
0
            return -1;
2512
0
        }
2513
7
        part_pb.Clear();
2514
7
        if (!part_pb.ParseFromString(val)) {
2515
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2516
0
            return -1;
2517
0
        }
2518
        // Partitions with PREPARED state MUST have no data
2519
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2520
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2521
7
            txn->put(k, part_pb.SerializeAsString());
2522
7
            err = txn->commit();
2523
7
            if (err != TxnErrorCode::TXN_OK) {
2524
0
                LOG_WARNING("failed to commit txn: {}", err);
2525
0
                return -1;
2526
0
            }
2527
7
        }
2528
2529
7
        int ret = 0;
2530
31
        for (int64_t index_id : part_pb.index_id()) {
2531
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2532
0
                LOG_WARNING("failed to recycle tablets under partition")
2533
0
                        .tag("table_id", part_pb.table_id())
2534
0
                        .tag("instance_id", instance_id_)
2535
0
                        .tag("index_id", index_id)
2536
0
                        .tag("partition_id", partition_id);
2537
0
                ret = -1;
2538
0
            }
2539
31
        }
2540
7
        if (ret == 0 && part_pb.has_db_id()) {
2541
            // Recycle the versioned keys
2542
7
            std::unique_ptr<Transaction> txn;
2543
7
            err = txn_kv_->create_txn(&txn);
2544
7
            if (err != TxnErrorCode::TXN_OK) {
2545
0
                LOG_WARNING("failed to create txn").tag("err", err);
2546
0
                return -1;
2547
0
            }
2548
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2549
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2550
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2551
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2552
7
            std::string partition_version_key =
2553
7
                    versioned::partition_version_key({instance_id_, partition_id});
2554
7
            versioned_remove_all(txn.get(), meta_key);
2555
7
            txn->remove(index_key);
2556
7
            txn->remove(inverted_index_key);
2557
7
            versioned_remove_all(txn.get(), partition_version_key);
2558
7
            err = txn->commit();
2559
7
            if (err != TxnErrorCode::TXN_OK) {
2560
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2561
0
                return -1;
2562
0
            }
2563
7
        }
2564
2565
7
        if (ret == 0) {
2566
7
            ++num_recycled;
2567
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2568
7
            partition_keys.push_back(k);
2569
7
            if (part_pb.db_id() > 0) {
2570
7
                partition_version_keys.push_back(partition_version_key(
2571
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2572
7
            }
2573
7
            metrics_context.total_recycled_num = num_recycled;
2574
7
            metrics_context.report();
2575
7
        }
2576
7
        return ret;
2577
7
    };
2578
2579
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2580
5
        if (partition_keys.empty()) return 0;
2581
4
        DORIS_CLOUD_DEFER {
2582
4
            partition_keys.clear();
2583
4
            partition_version_keys.clear();
2584
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2581
1
        DORIS_CLOUD_DEFER {
2582
1
            partition_keys.clear();
2583
1
            partition_version_keys.clear();
2584
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2581
3
        DORIS_CLOUD_DEFER {
2582
3
            partition_keys.clear();
2583
3
            partition_version_keys.clear();
2584
3
        };
2585
4
        std::unique_ptr<Transaction> txn;
2586
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2587
4
        if (err != TxnErrorCode::TXN_OK) {
2588
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2589
0
            return -1;
2590
0
        }
2591
8
        for (auto& k : partition_keys) {
2592
8
            txn->remove(k);
2593
8
        }
2594
8
        for (auto& k : partition_version_keys) {
2595
8
            txn->remove(k);
2596
8
        }
2597
4
        err = txn->commit();
2598
4
        if (err != TxnErrorCode::TXN_OK) {
2599
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2600
0
                         << " err=" << err;
2601
0
            return -1;
2602
0
        }
2603
4
        return 0;
2604
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2579
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2580
2
        if (partition_keys.empty()) return 0;
2581
1
        DORIS_CLOUD_DEFER {
2582
1
            partition_keys.clear();
2583
1
            partition_version_keys.clear();
2584
1
        };
2585
1
        std::unique_ptr<Transaction> txn;
2586
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2587
1
        if (err != TxnErrorCode::TXN_OK) {
2588
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2589
0
            return -1;
2590
0
        }
2591
1
        for (auto& k : partition_keys) {
2592
1
            txn->remove(k);
2593
1
        }
2594
1
        for (auto& k : partition_version_keys) {
2595
1
            txn->remove(k);
2596
1
        }
2597
1
        err = txn->commit();
2598
1
        if (err != TxnErrorCode::TXN_OK) {
2599
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2600
0
                         << " err=" << err;
2601
0
            return -1;
2602
0
        }
2603
1
        return 0;
2604
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2579
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2580
3
        if (partition_keys.empty()) return 0;
2581
3
        DORIS_CLOUD_DEFER {
2582
3
            partition_keys.clear();
2583
3
            partition_version_keys.clear();
2584
3
        };
2585
3
        std::unique_ptr<Transaction> txn;
2586
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2587
3
        if (err != TxnErrorCode::TXN_OK) {
2588
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2589
0
            return -1;
2590
0
        }
2591
7
        for (auto& k : partition_keys) {
2592
7
            txn->remove(k);
2593
7
        }
2594
7
        for (auto& k : partition_version_keys) {
2595
7
            txn->remove(k);
2596
7
        }
2597
3
        err = txn->commit();
2598
3
        if (err != TxnErrorCode::TXN_OK) {
2599
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2600
0
                         << " err=" << err;
2601
0
            return -1;
2602
0
        }
2603
3
        return 0;
2604
3
    };
2605
2606
15
    if (config::enable_recycler_stats_metrics) {
2607
0
        scan_and_statistics_partitions();
2608
0
    }
2609
    // recycle_func and loop_done for scan and recycle
2610
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2611
15
}
2612
2613
14
int InstanceRecycler::recycle_versions() {
2614
14
    if (should_recycle_versioned_keys()) {
2615
2
        return recycle_orphan_partitions();
2616
2
    }
2617
2618
12
    int64_t num_scanned = 0;
2619
12
    int64_t num_recycled = 0;
2620
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2621
2622
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2623
2624
12
    auto start_time = steady_clock::now();
2625
2626
12
    DORIS_CLOUD_DEFER {
2627
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2628
12
        metrics_context.finish_report();
2629
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2630
12
                .tag("instance_id", instance_id_)
2631
12
                .tag("num_scanned", num_scanned)
2632
12
                .tag("num_recycled", num_recycled);
2633
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2626
12
    DORIS_CLOUD_DEFER {
2627
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2628
12
        metrics_context.finish_report();
2629
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2630
12
                .tag("instance_id", instance_id_)
2631
12
                .tag("num_scanned", num_scanned)
2632
12
                .tag("num_recycled", num_recycled);
2633
12
    };
2634
2635
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2636
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2637
12
    int64_t last_scanned_table_id = 0;
2638
12
    bool is_recycled = false; // Is last scanned kv recycled
2639
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2640
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2641
2
        ++num_scanned;
2642
2
        auto k1 = k;
2643
2
        k1.remove_prefix(1);
2644
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2645
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2646
2
        decode_key(&k1, &out);
2647
2
        DCHECK_EQ(out.size(), 6) << k;
2648
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2649
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2650
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2651
0
            return 0;
2652
0
        }
2653
2
        last_scanned_table_id = table_id;
2654
2
        is_recycled = false;
2655
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2656
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2657
2
        std::unique_ptr<Transaction> txn;
2658
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2659
2
        if (err != TxnErrorCode::TXN_OK) {
2660
0
            return -1;
2661
0
        }
2662
2
        std::unique_ptr<RangeGetIterator> iter;
2663
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2664
2
        if (err != TxnErrorCode::TXN_OK) {
2665
0
            return -1;
2666
0
        }
2667
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2668
1
            return 0;
2669
1
        }
2670
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2671
        // 1. Remove all partition version kvs of this table
2672
1
        auto partition_version_key_begin =
2673
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2674
1
        auto partition_version_key_end =
2675
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2676
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2677
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2678
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2679
1
                     << " table_id=" << table_id;
2680
        // 2. Remove the table version kv of this table
2681
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2682
1
        txn->remove(tbl_version_key);
2683
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2684
        // 3. Remove mow delete bitmap update lock and tablet job lock
2685
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2686
1
        txn->remove(lock_key);
2687
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2688
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2689
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2690
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2691
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2692
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2693
1
                     << " table_id=" << table_id;
2694
1
        err = txn->commit();
2695
1
        if (err != TxnErrorCode::TXN_OK) {
2696
0
            return -1;
2697
0
        }
2698
1
        metrics_context.total_recycled_num = ++num_recycled;
2699
1
        metrics_context.report();
2700
1
        is_recycled = true;
2701
1
        return 0;
2702
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2640
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2641
2
        ++num_scanned;
2642
2
        auto k1 = k;
2643
2
        k1.remove_prefix(1);
2644
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2645
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2646
2
        decode_key(&k1, &out);
2647
2
        DCHECK_EQ(out.size(), 6) << k;
2648
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2649
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2650
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2651
0
            return 0;
2652
0
        }
2653
2
        last_scanned_table_id = table_id;
2654
2
        is_recycled = false;
2655
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2656
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2657
2
        std::unique_ptr<Transaction> txn;
2658
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2659
2
        if (err != TxnErrorCode::TXN_OK) {
2660
0
            return -1;
2661
0
        }
2662
2
        std::unique_ptr<RangeGetIterator> iter;
2663
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2664
2
        if (err != TxnErrorCode::TXN_OK) {
2665
0
            return -1;
2666
0
        }
2667
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2668
1
            return 0;
2669
1
        }
2670
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2671
        // 1. Remove all partition version kvs of this table
2672
1
        auto partition_version_key_begin =
2673
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2674
1
        auto partition_version_key_end =
2675
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2676
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2677
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2678
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2679
1
                     << " table_id=" << table_id;
2680
        // 2. Remove the table version kv of this table
2681
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2682
1
        txn->remove(tbl_version_key);
2683
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2684
        // 3. Remove mow delete bitmap update lock and tablet job lock
2685
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2686
1
        txn->remove(lock_key);
2687
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2688
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2689
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2690
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2691
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2692
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2693
1
                     << " table_id=" << table_id;
2694
1
        err = txn->commit();
2695
1
        if (err != TxnErrorCode::TXN_OK) {
2696
0
            return -1;
2697
0
        }
2698
1
        metrics_context.total_recycled_num = ++num_recycled;
2699
1
        metrics_context.report();
2700
1
        is_recycled = true;
2701
1
        return 0;
2702
1
    };
2703
2704
12
    if (config::enable_recycler_stats_metrics) {
2705
0
        scan_and_statistics_versions();
2706
0
    }
2707
    // recycle_func and loop_done for scan and recycle
2708
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2709
14
}
2710
2711
3
int InstanceRecycler::recycle_orphan_partitions() {
2712
3
    int64_t num_scanned = 0;
2713
3
    int64_t num_recycled = 0;
2714
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2715
2716
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2717
3
            .tag("instance_id", instance_id_);
2718
2719
3
    auto start_time = steady_clock::now();
2720
2721
3
    DORIS_CLOUD_DEFER {
2722
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2723
3
        metrics_context.finish_report();
2724
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2725
3
                .tag("instance_id", instance_id_)
2726
3
                .tag("num_scanned", num_scanned)
2727
3
                .tag("num_recycled", num_recycled);
2728
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2721
3
    DORIS_CLOUD_DEFER {
2722
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2723
3
        metrics_context.finish_report();
2724
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2725
3
                .tag("instance_id", instance_id_)
2726
3
                .tag("num_scanned", num_scanned)
2727
3
                .tag("num_recycled", num_recycled);
2728
3
    };
2729
2730
3
    bool is_empty_table = false;        // whether the table has no indexes
2731
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2732
3
    int64_t current_table_id = 0;       // current scanning table id
2733
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2734
3
                         &current_table_id, &is_table_kvs_recycled,
2735
3
                         this](std::string_view k, std::string_view) {
2736
2
        ++num_scanned;
2737
2738
2
        std::string_view k1(k);
2739
2
        int64_t db_id, table_id, partition_id;
2740
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2741
2
                                                            &partition_id)) {
2742
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2743
0
            return -1;
2744
2
        } else if (table_id != current_table_id) {
2745
2
            current_table_id = table_id;
2746
2
            is_table_kvs_recycled = false;
2747
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2748
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2749
2
            if (err != TxnErrorCode::TXN_OK) {
2750
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2751
0
                             << " table_id=" << table_id << " err=" << err;
2752
0
                return -1;
2753
0
            }
2754
2
        }
2755
2756
2
        if (!is_empty_table) {
2757
            // table is not empty, skip recycle
2758
1
            return 0;
2759
1
        }
2760
2761
1
        std::unique_ptr<Transaction> txn;
2762
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2763
1
        if (err != TxnErrorCode::TXN_OK) {
2764
0
            return -1;
2765
0
        }
2766
2767
        // 1. Remove all partition related kvs
2768
1
        std::string partition_meta_key =
2769
1
                versioned::meta_partition_key({instance_id_, partition_id});
2770
1
        std::string partition_index_key =
2771
1
                versioned::partition_index_key({instance_id_, partition_id});
2772
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2773
1
                {instance_id_, db_id, table_id, partition_id});
2774
1
        std::string partition_version_key =
2775
1
                versioned::partition_version_key({instance_id_, partition_id});
2776
1
        txn->remove(partition_index_key);
2777
1
        txn->remove(partition_inverted_key);
2778
1
        versioned_remove_all(txn.get(), partition_meta_key);
2779
1
        versioned_remove_all(txn.get(), partition_version_key);
2780
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2781
1
                     << " table_id=" << table_id << " db_id=" << db_id
2782
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2783
1
                     << " partition_version_key=" << hex(partition_version_key);
2784
2785
1
        if (!is_table_kvs_recycled) {
2786
1
            is_table_kvs_recycled = true;
2787
2788
            // 2. Remove the table version kv of this table
2789
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2790
1
            versioned_remove_all(txn.get(), table_version_key);
2791
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2792
            // 3. Remove mow delete bitmap update lock and tablet job lock
2793
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2794
1
            txn->remove(lock_key);
2795
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2796
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2797
1
            std::string tablet_job_key_end =
2798
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2799
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2800
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2801
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2802
1
                         << " table_id=" << table_id;
2803
1
        }
2804
2805
1
        err = txn->commit();
2806
1
        if (err != TxnErrorCode::TXN_OK) {
2807
0
            return -1;
2808
0
        }
2809
1
        metrics_context.total_recycled_num = ++num_recycled;
2810
1
        metrics_context.report();
2811
1
        return 0;
2812
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2735
2
                         this](std::string_view k, std::string_view) {
2736
2
        ++num_scanned;
2737
2738
2
        std::string_view k1(k);
2739
2
        int64_t db_id, table_id, partition_id;
2740
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2741
2
                                                            &partition_id)) {
2742
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2743
0
            return -1;
2744
2
        } else if (table_id != current_table_id) {
2745
2
            current_table_id = table_id;
2746
2
            is_table_kvs_recycled = false;
2747
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2748
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2749
2
            if (err != TxnErrorCode::TXN_OK) {
2750
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2751
0
                             << " table_id=" << table_id << " err=" << err;
2752
0
                return -1;
2753
0
            }
2754
2
        }
2755
2756
2
        if (!is_empty_table) {
2757
            // table is not empty, skip recycle
2758
1
            return 0;
2759
1
        }
2760
2761
1
        std::unique_ptr<Transaction> txn;
2762
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2763
1
        if (err != TxnErrorCode::TXN_OK) {
2764
0
            return -1;
2765
0
        }
2766
2767
        // 1. Remove all partition related kvs
2768
1
        std::string partition_meta_key =
2769
1
                versioned::meta_partition_key({instance_id_, partition_id});
2770
1
        std::string partition_index_key =
2771
1
                versioned::partition_index_key({instance_id_, partition_id});
2772
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2773
1
                {instance_id_, db_id, table_id, partition_id});
2774
1
        std::string partition_version_key =
2775
1
                versioned::partition_version_key({instance_id_, partition_id});
2776
1
        txn->remove(partition_index_key);
2777
1
        txn->remove(partition_inverted_key);
2778
1
        versioned_remove_all(txn.get(), partition_meta_key);
2779
1
        versioned_remove_all(txn.get(), partition_version_key);
2780
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2781
1
                     << " table_id=" << table_id << " db_id=" << db_id
2782
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2783
1
                     << " partition_version_key=" << hex(partition_version_key);
2784
2785
1
        if (!is_table_kvs_recycled) {
2786
1
            is_table_kvs_recycled = true;
2787
2788
            // 2. Remove the table version kv of this table
2789
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2790
1
            versioned_remove_all(txn.get(), table_version_key);
2791
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2792
            // 3. Remove mow delete bitmap update lock and tablet job lock
2793
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2794
1
            txn->remove(lock_key);
2795
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2796
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2797
1
            std::string tablet_job_key_end =
2798
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2799
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2800
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2801
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2802
1
                         << " table_id=" << table_id;
2803
1
        }
2804
2805
1
        err = txn->commit();
2806
1
        if (err != TxnErrorCode::TXN_OK) {
2807
0
            return -1;
2808
0
        }
2809
1
        metrics_context.total_recycled_num = ++num_recycled;
2810
1
        metrics_context.report();
2811
1
        return 0;
2812
1
    };
2813
2814
    // recycle_func and loop_done for scan and recycle
2815
3
    return scan_and_recycle(
2816
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2817
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2818
3
            std::move(recycle_func));
2819
3
}
2820
2821
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2822
                                      RecyclerMetricsContext& metrics_context,
2823
49
                                      int64_t partition_id) {
2824
49
    bool is_multi_version =
2825
49
            instance_info_.has_multi_version_status() &&
2826
49
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2827
49
    int64_t num_scanned = 0;
2828
49
    std::atomic_long num_recycled = 0;
2829
2830
49
    std::string tablet_key_begin, tablet_key_end;
2831
49
    std::string stats_key_begin, stats_key_end;
2832
49
    std::string job_key_begin, job_key_end;
2833
2834
49
    std::string tablet_belongs;
2835
49
    if (partition_id > 0) {
2836
        // recycle tablets in a partition belonging to the index
2837
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2838
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2839
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2840
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2841
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2842
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2843
33
        tablet_belongs = "partition";
2844
33
    } else {
2845
        // recycle tablets in the index
2846
16
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2847
16
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2848
16
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2849
16
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2850
16
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2851
16
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2852
16
        tablet_belongs = "index";
2853
16
    }
2854
2855
49
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2856
49
            .tag("table_id", table_id)
2857
49
            .tag("index_id", index_id)
2858
49
            .tag("partition_id", partition_id);
2859
2860
49
    auto start_time = steady_clock::now();
2861
2862
49
    DORIS_CLOUD_DEFER {
2863
49
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2864
49
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2865
49
                .tag("instance_id", instance_id_)
2866
49
                .tag("table_id", table_id)
2867
49
                .tag("index_id", index_id)
2868
49
                .tag("partition_id", partition_id)
2869
49
                .tag("num_scanned", num_scanned)
2870
49
                .tag("num_recycled", num_recycled);
2871
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2862
4
    DORIS_CLOUD_DEFER {
2863
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2864
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2865
4
                .tag("instance_id", instance_id_)
2866
4
                .tag("table_id", table_id)
2867
4
                .tag("index_id", index_id)
2868
4
                .tag("partition_id", partition_id)
2869
4
                .tag("num_scanned", num_scanned)
2870
4
                .tag("num_recycled", num_recycled);
2871
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2862
45
    DORIS_CLOUD_DEFER {
2863
45
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2864
45
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2865
45
                .tag("instance_id", instance_id_)
2866
45
                .tag("table_id", table_id)
2867
45
                .tag("index_id", index_id)
2868
45
                .tag("partition_id", partition_id)
2869
45
                .tag("num_scanned", num_scanned)
2870
45
                .tag("num_recycled", num_recycled);
2871
45
    };
2872
2873
    // The first string_view represents the tablet key which has been recycled
2874
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
2875
49
    using TabletKeyPair = std::pair<std::string_view, bool>;
2876
49
    SyncExecutor<TabletKeyPair> sync_executor(
2877
49
            _thread_pool_group.recycle_tablet_pool,
2878
49
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2879
49
                        index_id, partition_id),
2880
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2880
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2880
237
            [](const TabletKeyPair& k) { return k.first.empty(); });
2881
2882
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
2883
49
    std::vector<std::string> tablet_idx_keys;
2884
49
    std::vector<std::string> restore_job_keys;
2885
49
    std::vector<std::string> init_rs_keys;
2886
49
    std::vector<std::string> tablet_compact_stats_keys;
2887
49
    std::vector<std::string> tablet_load_stats_keys;
2888
49
    std::vector<std::string> versioned_meta_tablet_keys;
2889
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2890
8.24k
        bool use_range_remove = true;
2891
8.24k
        ++num_scanned;
2892
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
2893
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2894
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2895
0
            use_range_remove = false;
2896
0
            return -1;
2897
0
        }
2898
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2899
2900
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2901
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2902
4.00k
            return -1;
2903
4.00k
        }
2904
2905
4.24k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2906
4.24k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2907
4.24k
        if (is_multi_version) {
2908
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2909
6
            tablet_compact_stats_keys.push_back(
2910
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2911
6
            tablet_load_stats_keys.push_back(
2912
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2913
6
            versioned_meta_tablet_keys.push_back(
2914
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2915
6
        }
2916
4.24k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2917
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2918
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2919
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2920
1
                LOG_WARNING("failed to recycle tablet")
2921
1
                        .tag("instance_id", instance_id_)
2922
1
                        .tag("tablet_id", tid);
2923
1
                range_move = false;
2924
1
                return {std::string_view(), range_move};
2925
1
            }
2926
4.23k
            ++num_recycled;
2927
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2928
4.23k
            return {k, range_move};
2929
4.23k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2918
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2919
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2920
0
                LOG_WARNING("failed to recycle tablet")
2921
0
                        .tag("instance_id", instance_id_)
2922
0
                        .tag("tablet_id", tid);
2923
0
                range_move = false;
2924
0
                return {std::string_view(), range_move};
2925
0
            }
2926
4.00k
            ++num_recycled;
2927
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2928
4.00k
            return {k, range_move};
2929
4.00k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2918
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2919
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2920
1
                LOG_WARNING("failed to recycle tablet")
2921
1
                        .tag("instance_id", instance_id_)
2922
1
                        .tag("tablet_id", tid);
2923
1
                range_move = false;
2924
1
                return {std::string_view(), range_move};
2925
1
            }
2926
236
            ++num_recycled;
2927
236
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2928
236
            return {k, range_move};
2929
237
        });
2930
4.23k
        return 0;
2931
4.24k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2889
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2890
8.00k
        bool use_range_remove = true;
2891
8.00k
        ++num_scanned;
2892
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2893
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2894
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2895
0
            use_range_remove = false;
2896
0
            return -1;
2897
0
        }
2898
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2899
2900
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2901
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2902
4.00k
            return -1;
2903
4.00k
        }
2904
2905
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2906
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2907
4.00k
        if (is_multi_version) {
2908
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2909
0
            tablet_compact_stats_keys.push_back(
2910
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2911
0
            tablet_load_stats_keys.push_back(
2912
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2913
0
            versioned_meta_tablet_keys.push_back(
2914
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2915
0
        }
2916
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2917
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2918
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2919
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2920
4.00k
                LOG_WARNING("failed to recycle tablet")
2921
4.00k
                        .tag("instance_id", instance_id_)
2922
4.00k
                        .tag("tablet_id", tid);
2923
4.00k
                range_move = false;
2924
4.00k
                return {std::string_view(), range_move};
2925
4.00k
            }
2926
4.00k
            ++num_recycled;
2927
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2928
4.00k
            return {k, range_move};
2929
4.00k
        });
2930
4.00k
        return 0;
2931
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2889
240
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2890
240
        bool use_range_remove = true;
2891
240
        ++num_scanned;
2892
240
        doris::TabletMetaCloudPB tablet_meta_pb;
2893
240
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2894
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2895
0
            use_range_remove = false;
2896
0
            return -1;
2897
0
        }
2898
240
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2899
2900
240
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2901
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2902
0
            return -1;
2903
0
        }
2904
2905
240
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2906
240
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2907
240
        if (is_multi_version) {
2908
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2909
6
            tablet_compact_stats_keys.push_back(
2910
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2911
6
            tablet_load_stats_keys.push_back(
2912
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2913
6
            versioned_meta_tablet_keys.push_back(
2914
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2915
6
        }
2916
240
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2917
237
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2918
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2919
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2920
237
                LOG_WARNING("failed to recycle tablet")
2921
237
                        .tag("instance_id", instance_id_)
2922
237
                        .tag("tablet_id", tid);
2923
237
                range_move = false;
2924
237
                return {std::string_view(), range_move};
2925
237
            }
2926
237
            ++num_recycled;
2927
237
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2928
237
            return {k, range_move};
2929
237
        });
2930
237
        return 0;
2931
240
    };
2932
2933
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2934
49
    auto loop_done = [&, this]() -> int {
2935
49
        bool finished = true;
2936
49
        auto tablet_keys = sync_executor.when_all(&finished);
2937
49
        if (!finished) {
2938
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2939
1
            return -1;
2940
1
        }
2941
48
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2942
46
        if (!tablet_keys.empty() &&
2943
46
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2943
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2943
42
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2944
0
            return -1;
2945
0
        }
2946
        // sort the vector using key's order
2947
46
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2948
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2948
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2948
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2949
46
        bool use_range_remove = true;
2950
4.23k
        for (auto& [_, remove] : tablet_keys) {
2951
4.23k
            if (!remove) {
2952
0
                use_range_remove = remove;
2953
0
                break;
2954
0
            }
2955
4.23k
        }
2956
46
        DORIS_CLOUD_DEFER {
2957
46
            tablet_idx_keys.clear();
2958
46
            restore_job_keys.clear();
2959
46
            init_rs_keys.clear();
2960
46
            tablet_compact_stats_keys.clear();
2961
46
            tablet_load_stats_keys.clear();
2962
46
            versioned_meta_tablet_keys.clear();
2963
46
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2956
2
        DORIS_CLOUD_DEFER {
2957
2
            tablet_idx_keys.clear();
2958
2
            restore_job_keys.clear();
2959
2
            init_rs_keys.clear();
2960
2
            tablet_compact_stats_keys.clear();
2961
2
            tablet_load_stats_keys.clear();
2962
2
            versioned_meta_tablet_keys.clear();
2963
2
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2956
44
        DORIS_CLOUD_DEFER {
2957
44
            tablet_idx_keys.clear();
2958
44
            restore_job_keys.clear();
2959
44
            init_rs_keys.clear();
2960
44
            tablet_compact_stats_keys.clear();
2961
44
            tablet_load_stats_keys.clear();
2962
44
            versioned_meta_tablet_keys.clear();
2963
44
        };
2964
46
        std::unique_ptr<Transaction> txn;
2965
46
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2966
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2967
0
            return -1;
2968
0
        }
2969
46
        std::string tablet_key_end;
2970
46
        if (!tablet_keys.empty()) {
2971
44
            if (use_range_remove) {
2972
44
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2973
44
                txn->remove(tablet_keys.front().first, tablet_key_end);
2974
44
            } else {
2975
0
                for (auto& [k, _] : tablet_keys) {
2976
0
                    txn->remove(k);
2977
0
                }
2978
0
            }
2979
44
        }
2980
46
        if (is_multi_version) {
2981
6
            for (auto& k : tablet_compact_stats_keys) {
2982
                // Remove all versions of tablet compact stats for recycled tablet
2983
6
                LOG_INFO("remove versioned tablet compact stats key")
2984
6
                        .tag("compact_stats_key", hex(k));
2985
6
                versioned_remove_all(txn.get(), k);
2986
6
            }
2987
6
            for (auto& k : tablet_load_stats_keys) {
2988
                // Remove all versions of tablet load stats for recycled tablet
2989
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2990
6
                versioned_remove_all(txn.get(), k);
2991
6
            }
2992
6
            for (auto& k : versioned_meta_tablet_keys) {
2993
                // Remove all versions of meta tablet for recycled tablet
2994
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2995
6
                versioned_remove_all(txn.get(), k);
2996
6
            }
2997
5
        }
2998
4.24k
        for (auto& k : tablet_idx_keys) {
2999
4.24k
            txn->remove(k);
3000
4.24k
        }
3001
4.24k
        for (auto& k : restore_job_keys) {
3002
4.24k
            txn->remove(k);
3003
4.24k
        }
3004
46
        for (auto& k : init_rs_keys) {
3005
0
            txn->remove(k);
3006
0
        }
3007
46
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3008
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3009
0
                         << ", err=" << err;
3010
0
            return -1;
3011
0
        }
3012
46
        return 0;
3013
46
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2934
4
    auto loop_done = [&, this]() -> int {
2935
4
        bool finished = true;
2936
4
        auto tablet_keys = sync_executor.when_all(&finished);
2937
4
        if (!finished) {
2938
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2939
0
            return -1;
2940
0
        }
2941
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2942
2
        if (!tablet_keys.empty() &&
2943
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2944
0
            return -1;
2945
0
        }
2946
        // sort the vector using key's order
2947
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2948
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2949
2
        bool use_range_remove = true;
2950
4.00k
        for (auto& [_, remove] : tablet_keys) {
2951
4.00k
            if (!remove) {
2952
0
                use_range_remove = remove;
2953
0
                break;
2954
0
            }
2955
4.00k
        }
2956
2
        DORIS_CLOUD_DEFER {
2957
2
            tablet_idx_keys.clear();
2958
2
            restore_job_keys.clear();
2959
2
            init_rs_keys.clear();
2960
2
            tablet_compact_stats_keys.clear();
2961
2
            tablet_load_stats_keys.clear();
2962
2
            versioned_meta_tablet_keys.clear();
2963
2
        };
2964
2
        std::unique_ptr<Transaction> txn;
2965
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2966
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2967
0
            return -1;
2968
0
        }
2969
2
        std::string tablet_key_end;
2970
2
        if (!tablet_keys.empty()) {
2971
2
            if (use_range_remove) {
2972
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2973
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2974
2
            } else {
2975
0
                for (auto& [k, _] : tablet_keys) {
2976
0
                    txn->remove(k);
2977
0
                }
2978
0
            }
2979
2
        }
2980
2
        if (is_multi_version) {
2981
0
            for (auto& k : tablet_compact_stats_keys) {
2982
                // Remove all versions of tablet compact stats for recycled tablet
2983
0
                LOG_INFO("remove versioned tablet compact stats key")
2984
0
                        .tag("compact_stats_key", hex(k));
2985
0
                versioned_remove_all(txn.get(), k);
2986
0
            }
2987
0
            for (auto& k : tablet_load_stats_keys) {
2988
                // Remove all versions of tablet load stats for recycled tablet
2989
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2990
0
                versioned_remove_all(txn.get(), k);
2991
0
            }
2992
0
            for (auto& k : versioned_meta_tablet_keys) {
2993
                // Remove all versions of meta tablet for recycled tablet
2994
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2995
0
                versioned_remove_all(txn.get(), k);
2996
0
            }
2997
0
        }
2998
4.00k
        for (auto& k : tablet_idx_keys) {
2999
4.00k
            txn->remove(k);
3000
4.00k
        }
3001
4.00k
        for (auto& k : restore_job_keys) {
3002
4.00k
            txn->remove(k);
3003
4.00k
        }
3004
2
        for (auto& k : init_rs_keys) {
3005
0
            txn->remove(k);
3006
0
        }
3007
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3008
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3009
0
                         << ", err=" << err;
3010
0
            return -1;
3011
0
        }
3012
2
        return 0;
3013
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2934
45
    auto loop_done = [&, this]() -> int {
2935
45
        bool finished = true;
2936
45
        auto tablet_keys = sync_executor.when_all(&finished);
2937
45
        if (!finished) {
2938
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2939
1
            return -1;
2940
1
        }
2941
44
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2942
44
        if (!tablet_keys.empty() &&
2943
44
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2944
0
            return -1;
2945
0
        }
2946
        // sort the vector using key's order
2947
44
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2948
44
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2949
44
        bool use_range_remove = true;
2950
236
        for (auto& [_, remove] : tablet_keys) {
2951
236
            if (!remove) {
2952
0
                use_range_remove = remove;
2953
0
                break;
2954
0
            }
2955
236
        }
2956
44
        DORIS_CLOUD_DEFER {
2957
44
            tablet_idx_keys.clear();
2958
44
            restore_job_keys.clear();
2959
44
            init_rs_keys.clear();
2960
44
            tablet_compact_stats_keys.clear();
2961
44
            tablet_load_stats_keys.clear();
2962
44
            versioned_meta_tablet_keys.clear();
2963
44
        };
2964
44
        std::unique_ptr<Transaction> txn;
2965
44
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2966
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2967
0
            return -1;
2968
0
        }
2969
44
        std::string tablet_key_end;
2970
44
        if (!tablet_keys.empty()) {
2971
42
            if (use_range_remove) {
2972
42
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2973
42
                txn->remove(tablet_keys.front().first, tablet_key_end);
2974
42
            } else {
2975
0
                for (auto& [k, _] : tablet_keys) {
2976
0
                    txn->remove(k);
2977
0
                }
2978
0
            }
2979
42
        }
2980
44
        if (is_multi_version) {
2981
6
            for (auto& k : tablet_compact_stats_keys) {
2982
                // Remove all versions of tablet compact stats for recycled tablet
2983
6
                LOG_INFO("remove versioned tablet compact stats key")
2984
6
                        .tag("compact_stats_key", hex(k));
2985
6
                versioned_remove_all(txn.get(), k);
2986
6
            }
2987
6
            for (auto& k : tablet_load_stats_keys) {
2988
                // Remove all versions of tablet load stats for recycled tablet
2989
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2990
6
                versioned_remove_all(txn.get(), k);
2991
6
            }
2992
6
            for (auto& k : versioned_meta_tablet_keys) {
2993
                // Remove all versions of meta tablet for recycled tablet
2994
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2995
6
                versioned_remove_all(txn.get(), k);
2996
6
            }
2997
5
        }
2998
239
        for (auto& k : tablet_idx_keys) {
2999
239
            txn->remove(k);
3000
239
        }
3001
239
        for (auto& k : restore_job_keys) {
3002
239
            txn->remove(k);
3003
239
        }
3004
44
        for (auto& k : init_rs_keys) {
3005
0
            txn->remove(k);
3006
0
        }
3007
44
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3008
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3009
0
                         << ", err=" << err;
3010
0
            return -1;
3011
0
        }
3012
44
        return 0;
3013
44
    };
3014
3015
49
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
3016
49
                               std::move(loop_done));
3017
49
    if (ret != 0) {
3018
3
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
3019
3
        return ret;
3020
3
    }
3021
3022
    // directly remove tablet stats and tablet jobs of these dropped index or partition
3023
46
    std::unique_ptr<Transaction> txn;
3024
46
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3025
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
3026
0
        return -1;
3027
0
    }
3028
46
    txn->remove(stats_key_begin, stats_key_end);
3029
46
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
3030
46
                 << " end=" << hex(stats_key_end);
3031
46
    txn->remove(job_key_begin, job_key_end);
3032
46
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
3033
46
    std::string schema_key_begin, schema_key_end;
3034
46
    std::string schema_dict_key;
3035
46
    std::string versioned_schema_key_begin, versioned_schema_key_end;
3036
46
    if (partition_id <= 0) {
3037
        // Delete schema kv of this index
3038
14
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
3039
14
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
3040
14
        txn->remove(schema_key_begin, schema_key_end);
3041
14
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
3042
14
                     << " end=" << hex(schema_key_end);
3043
14
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
3044
14
        txn->remove(schema_dict_key);
3045
14
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
3046
14
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
3047
14
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
3048
14
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
3049
14
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
3050
14
                     << " end=" << hex(versioned_schema_key_end);
3051
14
    }
3052
3053
46
    TxnErrorCode err = txn->commit();
3054
46
    if (err != TxnErrorCode::TXN_OK) {
3055
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
3056
0
                     << " err=" << err;
3057
0
        return -1;
3058
0
    }
3059
3060
46
    return ret;
3061
46
}
3062
3063
5.61k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
3064
5.61k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
3065
5.61k
    int64_t num_segments = rs_meta_pb.num_segments();
3066
5.61k
    if (num_segments <= 0) return 0;
3067
3068
5.61k
    std::vector<std::string> file_paths;
3069
5.61k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
3070
0
        return -1;
3071
0
    }
3072
3073
    // Process inverted indexes
3074
5.61k
    std::vector<std::pair<int64_t, std::string>> index_ids;
3075
    // default format as v1.
3076
5.61k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3077
5.61k
    bool delete_rowset_data_by_prefix = false;
3078
5.61k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3079
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3080
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3081
0
        delete_rowset_data_by_prefix = true;
3082
5.61k
    } else if (rs_meta_pb.has_tablet_schema()) {
3083
10.0k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
3084
10.0k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3085
10.0k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3086
10.0k
            }
3087
10.0k
        }
3088
4.80k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
3089
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
3090
2.00k
        }
3091
4.80k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
3092
        // schema version and index id are not found, delete rowset data by prefix directly.
3093
0
        delete_rowset_data_by_prefix = true;
3094
809
    } else {
3095
        // otherwise, try to get schema kv
3096
809
        InvertedIndexInfo index_info;
3097
809
        int inverted_index_get_ret = inverted_index_id_cache_->get(
3098
809
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
3099
809
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3100
809
                                 &inverted_index_get_ret);
3101
809
        if (inverted_index_get_ret == 0) {
3102
809
            index_format = index_info.first;
3103
809
            index_ids = index_info.second;
3104
809
        } else if (inverted_index_get_ret == 1) {
3105
            // 1. Schema kv not found means tablet has been recycled
3106
            // Maybe some tablet recycle failed by some bugs
3107
            // We need to delete again to double check
3108
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3109
            // because we are uncertain about the inverted index information.
3110
            // If there are inverted indexes, some data might not be deleted,
3111
            // but this is acceptable as we have made our best effort to delete the data.
3112
0
            LOG_INFO(
3113
0
                    "delete rowset data schema kv not found, need to delete again to double "
3114
0
                    "check")
3115
0
                    .tag("instance_id", instance_id_)
3116
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3117
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
3118
            // Currently index_ids is guaranteed to be empty,
3119
            // but we clear it again here as a safeguard against future code changes
3120
            // that might cause index_ids to no longer be empty
3121
0
            index_format = InvertedIndexStorageFormatPB::V2;
3122
0
            index_ids.clear();
3123
0
        } else {
3124
            // failed to get schema kv, delete rowset data by prefix directly.
3125
0
            delete_rowset_data_by_prefix = true;
3126
0
        }
3127
809
    }
3128
3129
5.61k
    if (delete_rowset_data_by_prefix) {
3130
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
3131
0
                                  rs_meta_pb.rowset_id_v2());
3132
0
    }
3133
3134
5.61k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
3135
5.61k
    if (it == accessor_map_.end()) {
3136
1.60k
        LOG_WARNING("instance has no such resource id")
3137
1.60k
                .tag("instance_id", instance_id_)
3138
1.60k
                .tag("resource_id", rs_meta_pb.resource_id());
3139
1.60k
        return -1;
3140
1.60k
    }
3141
4.01k
    auto& accessor = it->second;
3142
3143
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
3144
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
3145
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
3146
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3147
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
3148
40.0k
            for (const auto& index_id : index_ids) {
3149
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
3150
40.0k
                                                            index_id.second));
3151
40.0k
            }
3152
20.0k
        } else if (!index_ids.empty()) {
3153
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3154
0
        }
3155
20.0k
    }
3156
3157
    // Process delete bitmap - check where it's stored.
3158
4.01k
    DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3159
4.01k
    if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3160
4.01k
                                                       &delete_bitmap_storage_type) != 0) {
3161
0
        LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3162
0
                .tag("instance_id", instance_id_)
3163
0
                .tag("tablet_id", tablet_id)
3164
0
                .tag("rowset_id", rowset_id);
3165
0
        return -1;
3166
0
    }
3167
4.01k
    if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3168
2.00k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3169
2.00k
    }
3170
    // TODO(AlexYue): seems could do do batch
3171
4.01k
    return accessor->delete_files(file_paths);
3172
4.01k
}
3173
3174
62.3k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
3175
62.3k
    LOG_INFO("begin process_packed_file_location_index")
3176
62.3k
            .tag("instance_id", instance_id_)
3177
62.3k
            .tag("tablet_id", rs_meta_pb.tablet_id())
3178
62.3k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3179
62.3k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
3180
62.3k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
3181
62.3k
    if (index_map.empty()) {
3182
62.3k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
3183
62.3k
                .tag("instance_id", instance_id_)
3184
62.3k
                .tag("tablet_id", rs_meta_pb.tablet_id())
3185
62.3k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
3186
62.3k
        return 0;
3187
62.3k
    }
3188
3189
13
    struct PackedSmallFileInfo {
3190
13
        std::string small_file_path;
3191
13
    };
3192
13
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
3193
13
    packed_file_updates.reserve(index_map.size());
3194
27
    for (const auto& [small_path, index_pb] : index_map) {
3195
27
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
3196
0
            continue;
3197
0
        }
3198
27
        packed_file_updates[index_pb.packed_file_path()].push_back(
3199
27
                PackedSmallFileInfo {small_path});
3200
27
    }
3201
13
    if (packed_file_updates.empty()) {
3202
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
3203
0
                .tag("instance_id", instance_id_)
3204
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
3205
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3206
0
                .tag("index_map_size", index_map.size());
3207
0
        return 0;
3208
0
    }
3209
3210
13
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3211
13
    int ret = 0;
3212
24
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
3213
24
        if (small_files.empty()) {
3214
0
            continue;
3215
0
        }
3216
3217
24
        bool success = false;
3218
24
        for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3219
24
            std::unique_ptr<Transaction> txn;
3220
24
            TxnErrorCode err = txn_kv_->create_txn(&txn);
3221
24
            if (err != TxnErrorCode::TXN_OK) {
3222
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
3223
0
                        .tag("instance_id", instance_id_)
3224
0
                        .tag("packed_file_path", packed_file_path)
3225
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3226
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3227
0
                        .tag("err", err);
3228
0
                ret = -1;
3229
0
                break;
3230
0
            }
3231
3232
24
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3233
24
            std::string packed_val;
3234
24
            err = txn->get(packed_key, &packed_val);
3235
24
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3236
0
                LOG_WARNING("packed file info not found when recycling rowset")
3237
0
                        .tag("instance_id", instance_id_)
3238
0
                        .tag("packed_file_path", packed_file_path)
3239
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3240
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3241
0
                        .tag("key", hex(packed_key))
3242
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
3243
                // Skip this packed file entry and continue with others
3244
0
                success = true;
3245
0
                break;
3246
0
            }
3247
24
            if (err != TxnErrorCode::TXN_OK) {
3248
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
3249
0
                        .tag("instance_id", instance_id_)
3250
0
                        .tag("packed_file_path", packed_file_path)
3251
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3252
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3253
0
                        .tag("err", err);
3254
0
                ret = -1;
3255
0
                break;
3256
0
            }
3257
3258
24
            cloud::PackedFileInfoPB packed_info;
3259
24
            if (!packed_info.ParseFromString(packed_val)) {
3260
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
3261
0
                        .tag("instance_id", instance_id_)
3262
0
                        .tag("packed_file_path", packed_file_path)
3263
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3264
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3265
0
                ret = -1;
3266
0
                break;
3267
0
            }
3268
3269
24
            LOG_INFO("packed file update check")
3270
24
                    .tag("instance_id", instance_id_)
3271
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3272
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3273
24
                    .tag("merged_file_path", packed_file_path)
3274
24
                    .tag("requested_small_files", small_files.size())
3275
24
                    .tag("merge_entries", packed_info.slices_size());
3276
3277
24
            auto* small_file_entries = packed_info.mutable_slices();
3278
24
            int64_t changed_files = 0;
3279
24
            int64_t missing_entries = 0;
3280
24
            int64_t already_deleted = 0;
3281
27
            for (const auto& small_file_info : small_files) {
3282
27
                bool found = false;
3283
87
                for (auto& small_file_entry : *small_file_entries) {
3284
87
                    if (small_file_entry.path() == small_file_info.small_file_path) {
3285
27
                        if (!small_file_entry.deleted()) {
3286
27
                            small_file_entry.set_deleted(true);
3287
27
                            if (!small_file_entry.corrected()) {
3288
27
                                small_file_entry.set_corrected(true);
3289
27
                            }
3290
27
                            ++changed_files;
3291
27
                        } else {
3292
0
                            ++already_deleted;
3293
0
                        }
3294
27
                        found = true;
3295
27
                        break;
3296
27
                    }
3297
87
                }
3298
27
                if (!found) {
3299
0
                    ++missing_entries;
3300
0
                    LOG_WARNING("packed file info missing small file entry")
3301
0
                            .tag("instance_id", instance_id_)
3302
0
                            .tag("packed_file_path", packed_file_path)
3303
0
                            .tag("small_file_path", small_file_info.small_file_path)
3304
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3305
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
3306
0
                }
3307
27
            }
3308
3309
24
            if (changed_files == 0) {
3310
0
                LOG_INFO("skip merge file update: no merge entries changed")
3311
0
                        .tag("instance_id", instance_id_)
3312
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3313
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3314
0
                        .tag("merged_file_path", packed_file_path)
3315
0
                        .tag("missing_entries", missing_entries)
3316
0
                        .tag("already_deleted", already_deleted)
3317
0
                        .tag("requested_small_files", small_files.size())
3318
0
                        .tag("merge_entries", packed_info.slices_size());
3319
0
                success = true;
3320
0
                break;
3321
0
            }
3322
3323
            // Calculate remaining files
3324
24
            int64_t left_file_count = 0;
3325
24
            int64_t left_file_bytes = 0;
3326
141
            for (const auto& small_file_entry : packed_info.slices()) {
3327
141
                if (!small_file_entry.deleted()) {
3328
57
                    ++left_file_count;
3329
57
                    left_file_bytes += small_file_entry.size();
3330
57
                }
3331
141
            }
3332
24
            packed_info.set_remaining_slice_bytes(left_file_bytes);
3333
24
            packed_info.set_ref_cnt(left_file_count);
3334
24
            LOG_INFO("updated packed file reference info")
3335
24
                    .tag("instance_id", instance_id_)
3336
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3337
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3338
24
                    .tag("packed_file_path", packed_file_path)
3339
24
                    .tag("ref_cnt", left_file_count)
3340
24
                    .tag("left_file_bytes", left_file_bytes);
3341
3342
24
            if (left_file_count == 0) {
3343
7
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3344
7
            }
3345
3346
24
            std::string updated_val;
3347
24
            if (!packed_info.SerializeToString(&updated_val)) {
3348
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
3349
0
                        .tag("instance_id", instance_id_)
3350
0
                        .tag("packed_file_path", packed_file_path)
3351
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3352
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3353
0
                ret = -1;
3354
0
                break;
3355
0
            }
3356
3357
24
            txn->put(packed_key, updated_val);
3358
24
            err = txn->commit();
3359
24
            if (err == TxnErrorCode::TXN_OK) {
3360
24
                success = true;
3361
24
                if (left_file_count == 0) {
3362
7
                    LOG_INFO("packed file ready to delete, deleting immediately")
3363
7
                            .tag("instance_id", instance_id_)
3364
7
                            .tag("packed_file_path", packed_file_path);
3365
7
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3366
0
                        ret = -1;
3367
0
                    }
3368
7
                }
3369
24
                break;
3370
24
            }
3371
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
3372
0
                if (attempt >= max_retry_times) {
3373
0
                    LOG_WARNING("packed file info update conflict after max retry")
3374
0
                            .tag("instance_id", instance_id_)
3375
0
                            .tag("packed_file_path", packed_file_path)
3376
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3377
0
                            .tag("tablet_id", rs_meta_pb.tablet_id())
3378
0
                            .tag("changed_files", changed_files)
3379
0
                            .tag("attempt", attempt);
3380
0
                    ret = -1;
3381
0
                    break;
3382
0
                }
3383
0
                LOG_WARNING("packed file info update conflict, retrying")
3384
0
                        .tag("instance_id", instance_id_)
3385
0
                        .tag("packed_file_path", packed_file_path)
3386
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3387
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3388
0
                        .tag("changed_files", changed_files)
3389
0
                        .tag("attempt", attempt);
3390
0
                sleep_for_packed_file_retry();
3391
0
                continue;
3392
0
            }
3393
3394
0
            LOG_WARNING("failed to commit packed file info update")
3395
0
                    .tag("instance_id", instance_id_)
3396
0
                    .tag("packed_file_path", packed_file_path)
3397
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3398
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3399
0
                    .tag("err", err)
3400
0
                    .tag("changed_files", changed_files);
3401
0
            ret = -1;
3402
0
            break;
3403
0
        }
3404
3405
24
        if (!success) {
3406
0
            ret = -1;
3407
0
        }
3408
24
    }
3409
3410
13
    return ret;
3411
13
}
3412
3413
int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(
3414
        int64_t tablet_id, const std::string& rowset_id,
3415
58.2k
        DeleteBitmapStorageType* out_storage_type) {
3416
58.2k
    if (out_storage_type) {
3417
58.2k
        *out_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3418
58.2k
    }
3419
3420
    // Get delete bitmap storage info from FDB
3421
58.2k
    std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3422
58.2k
    std::unique_ptr<Transaction> txn;
3423
58.2k
    TxnErrorCode err = txn_kv_->create_txn(&txn);
3424
58.2k
    if (err != TxnErrorCode::TXN_OK) {
3425
0
        LOG_WARNING("failed to create txn when getting delete bitmap storage")
3426
0
                .tag("instance_id", instance_id_)
3427
0
                .tag("tablet_id", tablet_id)
3428
0
                .tag("rowset_id", rowset_id)
3429
0
                .tag("err", err);
3430
0
        return -1;
3431
0
    }
3432
3433
58.2k
    std::string dbm_val;
3434
58.2k
    err = txn->get(dbm_key, &dbm_val);
3435
58.2k
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3436
        // No delete bitmap for this rowset, nothing to do
3437
4.63k
        LOG_INFO("delete bitmap not found, skip packed file ref count decrement")
3438
4.63k
                .tag("instance_id", instance_id_)
3439
4.63k
                .tag("tablet_id", tablet_id)
3440
4.63k
                .tag("rowset_id", rowset_id);
3441
4.63k
        return 0;
3442
4.63k
    }
3443
53.5k
    if (err != TxnErrorCode::TXN_OK) {
3444
0
        LOG_WARNING("failed to get delete bitmap storage")
3445
0
                .tag("instance_id", instance_id_)
3446
0
                .tag("tablet_id", tablet_id)
3447
0
                .tag("rowset_id", rowset_id)
3448
0
                .tag("err", err);
3449
0
        return -1;
3450
0
    }
3451
3452
53.5k
    DeleteBitmapStoragePB storage;
3453
53.5k
    if (!storage.ParseFromString(dbm_val)) {
3454
0
        LOG_WARNING("failed to parse delete bitmap storage")
3455
0
                .tag("instance_id", instance_id_)
3456
0
                .tag("tablet_id", tablet_id)
3457
0
                .tag("rowset_id", rowset_id);
3458
0
        return -1;
3459
0
    }
3460
3461
53.5k
    if (storage.store_in_fdb()) {
3462
0
        if (out_storage_type) {
3463
0
            *out_storage_type = DeleteBitmapStorageType::IN_FDB;
3464
0
        }
3465
0
        return 0;
3466
0
    }
3467
3468
    // Check if delete bitmap is stored in standalone file.
3469
53.5k
    if (!storage.has_packed_slice_location() ||
3470
53.5k
        storage.packed_slice_location().packed_file_path().empty()) {
3471
53.5k
        if (out_storage_type) {
3472
53.5k
            *out_storage_type = DeleteBitmapStorageType::STANDALONE_FILE;
3473
53.5k
        }
3474
53.5k
        return 0;
3475
53.5k
    }
3476
3477
18.4E
    if (out_storage_type) {
3478
0
        *out_storage_type = DeleteBitmapStorageType::PACKED_FILE;
3479
0
    }
3480
3481
18.4E
    const auto& packed_loc = storage.packed_slice_location();
3482
18.4E
    const std::string& packed_file_path = packed_loc.packed_file_path();
3483
3484
18.4E
    LOG_INFO("decrementing delete bitmap packed file ref count")
3485
18.4E
            .tag("instance_id", instance_id_)
3486
18.4E
            .tag("tablet_id", tablet_id)
3487
18.4E
            .tag("rowset_id", rowset_id)
3488
18.4E
            .tag("packed_file_path", packed_file_path);
3489
3490
18.4E
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3491
18.4E
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3492
0
        std::unique_ptr<Transaction> update_txn;
3493
0
        err = txn_kv_->create_txn(&update_txn);
3494
0
        if (err != TxnErrorCode::TXN_OK) {
3495
0
            LOG_WARNING("failed to create txn for delete bitmap packed file update")
3496
0
                    .tag("instance_id", instance_id_)
3497
0
                    .tag("tablet_id", tablet_id)
3498
0
                    .tag("rowset_id", rowset_id)
3499
0
                    .tag("err", err);
3500
0
            return -1;
3501
0
        }
3502
3503
0
        std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3504
0
        std::string packed_val;
3505
0
        err = update_txn->get(packed_key, &packed_val);
3506
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3507
0
            LOG_WARNING("packed file info not found for delete bitmap")
3508
0
                    .tag("instance_id", instance_id_)
3509
0
                    .tag("tablet_id", tablet_id)
3510
0
                    .tag("rowset_id", rowset_id)
3511
0
                    .tag("packed_file_path", packed_file_path);
3512
0
            return 0;
3513
0
        }
3514
0
        if (err != TxnErrorCode::TXN_OK) {
3515
0
            LOG_WARNING("failed to get packed file info for delete bitmap")
3516
0
                    .tag("instance_id", instance_id_)
3517
0
                    .tag("tablet_id", tablet_id)
3518
0
                    .tag("rowset_id", rowset_id)
3519
0
                    .tag("packed_file_path", packed_file_path)
3520
0
                    .tag("err", err);
3521
0
            return -1;
3522
0
        }
3523
3524
0
        cloud::PackedFileInfoPB packed_info;
3525
0
        if (!packed_info.ParseFromString(packed_val)) {
3526
0
            LOG_WARNING("failed to parse packed file info for delete bitmap")
3527
0
                    .tag("instance_id", instance_id_)
3528
0
                    .tag("tablet_id", tablet_id)
3529
0
                    .tag("rowset_id", rowset_id)
3530
0
                    .tag("packed_file_path", packed_file_path);
3531
0
            return -1;
3532
0
        }
3533
3534
        // Find and mark the small file entry as deleted
3535
        // Use tablet_id and rowset_id to match entry instead of path,
3536
        // because path format may vary with path_version (with or without shard prefix)
3537
0
        auto* entries = packed_info.mutable_slices();
3538
0
        bool found = false;
3539
0
        bool already_deleted = false;
3540
0
        for (auto& entry : *entries) {
3541
0
            if (entry.tablet_id() == tablet_id && entry.rowset_id() == rowset_id) {
3542
0
                if (!entry.deleted()) {
3543
0
                    entry.set_deleted(true);
3544
0
                    if (!entry.corrected()) {
3545
0
                        entry.set_corrected(true);
3546
0
                    }
3547
0
                } else {
3548
0
                    already_deleted = true;
3549
0
                }
3550
0
                found = true;
3551
0
                break;
3552
0
            }
3553
0
        }
3554
3555
0
        if (!found) {
3556
0
            LOG_WARNING("delete bitmap entry not found in packed file")
3557
0
                    .tag("instance_id", instance_id_)
3558
0
                    .tag("tablet_id", tablet_id)
3559
0
                    .tag("rowset_id", rowset_id)
3560
0
                    .tag("packed_file_path", packed_file_path);
3561
0
            return 0;
3562
0
        }
3563
3564
0
        if (already_deleted) {
3565
0
            LOG_INFO("delete bitmap entry already deleted in packed file")
3566
0
                    .tag("instance_id", instance_id_)
3567
0
                    .tag("tablet_id", tablet_id)
3568
0
                    .tag("rowset_id", rowset_id)
3569
0
                    .tag("packed_file_path", packed_file_path);
3570
0
            return 0;
3571
0
        }
3572
3573
        // Calculate remaining files
3574
0
        int64_t left_file_count = 0;
3575
0
        int64_t left_file_bytes = 0;
3576
0
        for (const auto& entry : packed_info.slices()) {
3577
0
            if (!entry.deleted()) {
3578
0
                ++left_file_count;
3579
0
                left_file_bytes += entry.size();
3580
0
            }
3581
0
        }
3582
0
        packed_info.set_remaining_slice_bytes(left_file_bytes);
3583
0
        packed_info.set_ref_cnt(left_file_count);
3584
3585
0
        if (left_file_count == 0) {
3586
0
            packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3587
0
        }
3588
3589
0
        std::string updated_val;
3590
0
        if (!packed_info.SerializeToString(&updated_val)) {
3591
0
            LOG_WARNING("failed to serialize packed file info for delete bitmap")
3592
0
                    .tag("instance_id", instance_id_)
3593
0
                    .tag("tablet_id", tablet_id)
3594
0
                    .tag("rowset_id", rowset_id)
3595
0
                    .tag("packed_file_path", packed_file_path);
3596
0
            return -1;
3597
0
        }
3598
3599
0
        update_txn->put(packed_key, updated_val);
3600
0
        err = update_txn->commit();
3601
0
        if (err == TxnErrorCode::TXN_OK) {
3602
0
            LOG_INFO("delete bitmap packed file ref count decremented")
3603
0
                    .tag("instance_id", instance_id_)
3604
0
                    .tag("tablet_id", tablet_id)
3605
0
                    .tag("rowset_id", rowset_id)
3606
0
                    .tag("packed_file_path", packed_file_path)
3607
0
                    .tag("left_file_count", left_file_count);
3608
0
            if (left_file_count == 0) {
3609
0
                if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3610
0
                    return -1;
3611
0
                }
3612
0
            }
3613
0
            return 0;
3614
0
        }
3615
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3616
0
            if (attempt >= max_retry_times) {
3617
0
                LOG_WARNING("delete bitmap packed file update conflict after max retry")
3618
0
                        .tag("instance_id", instance_id_)
3619
0
                        .tag("tablet_id", tablet_id)
3620
0
                        .tag("rowset_id", rowset_id)
3621
0
                        .tag("packed_file_path", packed_file_path)
3622
0
                        .tag("attempt", attempt);
3623
0
                return -1;
3624
0
            }
3625
0
            sleep_for_packed_file_retry();
3626
0
            continue;
3627
0
        }
3628
3629
0
        LOG_WARNING("failed to commit delete bitmap packed file update")
3630
0
                .tag("instance_id", instance_id_)
3631
0
                .tag("tablet_id", tablet_id)
3632
0
                .tag("rowset_id", rowset_id)
3633
0
                .tag("packed_file_path", packed_file_path)
3634
0
                .tag("err", err);
3635
0
        return -1;
3636
0
    }
3637
3638
18.4E
    return -1;
3639
18.4E
}
3640
3641
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
3642
                                                const std::string& packed_key,
3643
7
                                                const cloud::PackedFileInfoPB& packed_info) {
3644
7
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
3645
0
        LOG_WARNING("packed file missing resource id when recycling")
3646
0
                .tag("instance_id", instance_id_)
3647
0
                .tag("packed_file_path", packed_file_path);
3648
0
        return -1;
3649
0
    }
3650
3651
7
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
3652
7
    if (!accessor) {
3653
0
        LOG_WARNING("no accessor available to delete packed file")
3654
0
                .tag("instance_id", instance_id_)
3655
0
                .tag("packed_file_path", packed_file_path)
3656
0
                .tag("resource_id", packed_info.resource_id());
3657
0
        return -1;
3658
0
    }
3659
3660
7
    int del_ret = accessor->delete_file(packed_file_path);
3661
7
    if (del_ret != 0 && del_ret != 1) {
3662
0
        LOG_WARNING("failed to delete packed file")
3663
0
                .tag("instance_id", instance_id_)
3664
0
                .tag("packed_file_path", packed_file_path)
3665
0
                .tag("resource_id", resource_id)
3666
0
                .tag("ret", del_ret);
3667
0
        return -1;
3668
0
    }
3669
7
    if (del_ret == 1) {
3670
0
        LOG_INFO("packed file already removed")
3671
0
                .tag("instance_id", instance_id_)
3672
0
                .tag("packed_file_path", packed_file_path)
3673
0
                .tag("resource_id", resource_id);
3674
7
    } else {
3675
7
        LOG_INFO("deleted packed file")
3676
7
                .tag("instance_id", instance_id_)
3677
7
                .tag("packed_file_path", packed_file_path)
3678
7
                .tag("resource_id", resource_id);
3679
7
    }
3680
3681
7
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
3682
7
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3683
7
        std::unique_ptr<Transaction> del_txn;
3684
7
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
3685
7
        if (err != TxnErrorCode::TXN_OK) {
3686
0
            LOG_WARNING("failed to create txn when removing packed file kv")
3687
0
                    .tag("instance_id", instance_id_)
3688
0
                    .tag("packed_file_path", packed_file_path)
3689
0
                    .tag("attempt", attempt)
3690
0
                    .tag("err", err);
3691
0
            return -1;
3692
0
        }
3693
3694
7
        std::string latest_val;
3695
7
        err = del_txn->get(packed_key, &latest_val);
3696
7
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3697
0
            return 0;
3698
0
        }
3699
7
        if (err != TxnErrorCode::TXN_OK) {
3700
0
            LOG_WARNING("failed to re-read packed file kv before removal")
3701
0
                    .tag("instance_id", instance_id_)
3702
0
                    .tag("packed_file_path", packed_file_path)
3703
0
                    .tag("attempt", attempt)
3704
0
                    .tag("err", err);
3705
0
            return -1;
3706
0
        }
3707
3708
7
        cloud::PackedFileInfoPB latest_info;
3709
7
        if (!latest_info.ParseFromString(latest_val)) {
3710
0
            LOG_WARNING("failed to parse packed file info before removal")
3711
0
                    .tag("instance_id", instance_id_)
3712
0
                    .tag("packed_file_path", packed_file_path)
3713
0
                    .tag("attempt", attempt);
3714
0
            return -1;
3715
0
        }
3716
3717
7
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
3718
7
              latest_info.ref_cnt() == 0)) {
3719
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
3720
0
                    .tag("instance_id", instance_id_)
3721
0
                    .tag("packed_file_path", packed_file_path)
3722
0
                    .tag("attempt", attempt);
3723
0
            return 0;
3724
0
        }
3725
3726
7
        del_txn->remove(packed_key);
3727
7
        err = del_txn->commit();
3728
7
        if (err == TxnErrorCode::TXN_OK) {
3729
7
            LOG_INFO("removed packed file metadata")
3730
7
                    .tag("instance_id", instance_id_)
3731
7
                    .tag("packed_file_path", packed_file_path);
3732
7
            return 0;
3733
7
        }
3734
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3735
0
            if (attempt >= max_retry_times) {
3736
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
3737
0
                        .tag("instance_id", instance_id_)
3738
0
                        .tag("packed_file_path", packed_file_path)
3739
0
                        .tag("attempt", attempt);
3740
0
                return -1;
3741
0
            }
3742
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
3743
0
                    .tag("instance_id", instance_id_)
3744
0
                    .tag("packed_file_path", packed_file_path)
3745
0
                    .tag("attempt", attempt);
3746
0
            sleep_for_packed_file_retry();
3747
0
            continue;
3748
0
        }
3749
0
        LOG_WARNING("failed to remove packed file kv")
3750
0
                .tag("instance_id", instance_id_)
3751
0
                .tag("packed_file_path", packed_file_path)
3752
0
                .tag("attempt", attempt)
3753
0
                .tag("err", err);
3754
0
        return -1;
3755
0
    }
3756
0
    return -1;
3757
7
}
3758
3759
int InstanceRecycler::delete_rowset_data(
3760
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
3761
98
        RecyclerMetricsContext& metrics_context) {
3762
98
    int ret = 0;
3763
    // resource_id -> file_paths
3764
98
    std::map<std::string, std::vector<std::string>> resource_file_paths;
3765
    // (resource_id, tablet_id, rowset_id)
3766
98
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
3767
98
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
3768
3769
57.1k
    for (const auto& [_, rs] : rowsets) {
3770
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
3771
        // due to aborted schema change.
3772
57.1k
        if (is_formal_rowset) {
3773
3.16k
            std::lock_guard lock(recycled_tablets_mtx_);
3774
3.16k
            if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) {
3775
                // Tablet has been recycled and this rowset has no packed slices, so file data
3776
                // should already be gone; skip to avoid redundant deletes. Rowsets with packed
3777
                // slice info must still run to decrement packed file ref counts.
3778
0
                continue;
3779
0
            }
3780
3.16k
        }
3781
3782
57.1k
        int64_t num_segments = rs.num_segments();
3783
        // Check num_segments before accessor lookup, because empty rowsets
3784
        // (e.g. base compaction output of empty rowsets) may have no resource_id
3785
        // set. Skipping them early avoids a spurious "no such resource id" error
3786
        // that marks the entire batch as failed and prevents txn_remove from
3787
        // cleaning up recycle KV keys.
3788
57.1k
        if (num_segments <= 0) {
3789
0
            metrics_context.total_recycled_num++;
3790
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
3791
0
            continue;
3792
0
        }
3793
3794
57.1k
        auto it = accessor_map_.find(rs.resource_id());
3795
        // possible if the accessor is not initilized correctly
3796
57.1k
        if (it == accessor_map_.end()) [[unlikely]] {
3797
3.00k
            LOG_WARNING("instance has no such resource id")
3798
3.00k
                    .tag("instance_id", instance_id_)
3799
3.00k
                    .tag("resource_id", rs.resource_id());
3800
3.00k
            ret = -1;
3801
3.00k
            continue;
3802
3.00k
        }
3803
3804
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
3805
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
3806
54.1k
        int64_t tablet_id = rs.tablet_id();
3807
54.1k
        LOG_INFO("recycle rowset merge index size")
3808
54.1k
                .tag("instance_id", instance_id_)
3809
54.1k
                .tag("tablet_id", tablet_id)
3810
54.1k
                .tag("rowset_id", rowset_id)
3811
54.1k
                .tag("merge_index_size", rs.packed_slice_locations_size());
3812
54.1k
        if (decrement_packed_file_ref_counts(rs) != 0) {
3813
0
            ret = -1;
3814
0
            continue;
3815
0
        }
3816
3817
        // Process delete bitmap - check where it's stored.
3818
54.1k
        DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3819
54.1k
        if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3820
54.1k
                                                           &delete_bitmap_storage_type) != 0) {
3821
0
            LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3822
0
                    .tag("instance_id", instance_id_)
3823
0
                    .tag("tablet_id", tablet_id)
3824
0
                    .tag("rowset_id", rowset_id);
3825
0
            ret = -1;
3826
0
            continue;
3827
0
        }
3828
54.1k
        if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3829
51.5k
            file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3830
51.5k
        }
3831
3832
        // Process inverted indexes
3833
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
3834
        // default format as v1.
3835
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3836
54.1k
        int inverted_index_get_ret = 0;
3837
54.1k
        if (rs.has_tablet_schema()) {
3838
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
3839
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3840
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3841
53.5k
                }
3842
53.5k
            }
3843
26.6k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
3844
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
3845
26.5k
            }
3846
27.5k
        } else {
3847
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
3848
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
3849
0
                                "instance_id="
3850
0
                             << instance_id_ << " tablet_id=" << tablet_id
3851
0
                             << " rowset_id=" << rowset_id;
3852
0
                ret = -1;
3853
0
                continue;
3854
0
            }
3855
27.5k
            InvertedIndexInfo index_info;
3856
27.5k
            inverted_index_get_ret =
3857
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
3858
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3859
27.5k
                                     &inverted_index_get_ret);
3860
27.5k
            if (inverted_index_get_ret == 0) {
3861
27.0k
                index_format = index_info.first;
3862
27.0k
                index_ids = index_info.second;
3863
27.0k
            } else if (inverted_index_get_ret == 1) {
3864
                // 1. Schema kv not found means tablet has been recycled
3865
                // Maybe some tablet recycle failed by some bugs
3866
                // We need to delete again to double check
3867
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3868
                // because we are uncertain about the inverted index information.
3869
                // If there are inverted indexes, some data might not be deleted,
3870
                // but this is acceptable as we have made our best effort to delete the data.
3871
503
                LOG_INFO(
3872
503
                        "delete rowset data schema kv not found, need to delete again to "
3873
503
                        "double "
3874
503
                        "check")
3875
503
                        .tag("instance_id", instance_id_)
3876
503
                        .tag("tablet_id", tablet_id)
3877
503
                        .tag("rowset", rs.ShortDebugString());
3878
                // Currently index_ids is guaranteed to be empty,
3879
                // but we clear it again here as a safeguard against future code changes
3880
                // that might cause index_ids to no longer be empty
3881
503
                index_format = InvertedIndexStorageFormatPB::V2;
3882
503
                index_ids.clear();
3883
18.4E
            } else {
3884
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
3885
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3886
18.4E
                ret = -1;
3887
18.4E
                continue;
3888
18.4E
            }
3889
27.5k
        }
3890
54.2k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3891
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3892
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3893
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3894
5
            continue;
3895
5
        }
3896
323k
        for (int64_t i = 0; i < num_segments; ++i) {
3897
269k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3898
269k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3899
532k
                for (const auto& index_id : index_ids) {
3900
532k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
3901
532k
                                                                index_id.first, index_id.second));
3902
532k
                }
3903
266k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3904
                // try to recycle inverted index v2 when get_ret == 1
3905
                // we treat schema not found as if it has a v2 format inverted index
3906
                // to reduce chance of data leakage
3907
2.50k
                if (inverted_index_get_ret == 1) {
3908
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3909
2.50k
                            .tag("instance_id", instance_id_)
3910
2.50k
                            .tag("inverted index v2 path",
3911
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3912
2.50k
                }
3913
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3914
2.50k
            }
3915
269k
        }
3916
54.1k
    }
3917
3918
98
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3919
98
                                                 "delete_rowset_data",
3920
98
                                                 [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3920
5
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3920
51
                                                 [](const int& ret) { return ret != 0; });
3921
98
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3922
51
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3923
51
            DCHECK(accessor_map_.count(*rid))
3924
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3925
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3926
51
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3927
51
                                     &accessor_map_);
3928
51
            if (!accessor_map_.contains(*rid)) {
3929
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3930
0
                        .tag("resource_id", resource_id)
3931
0
                        .tag("instance_id", instance_id_);
3932
0
                return -1;
3933
0
            }
3934
51
            auto& accessor = accessor_map_[*rid];
3935
51
            int ret = accessor->delete_files(*paths);
3936
51
            if (!ret) {
3937
                // deduplication of different files with the same rowset id
3938
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3939
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3940
51
                std::set<std::string> deleted_rowset_id;
3941
3942
51
                std::for_each(paths->begin(), paths->end(),
3943
51
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3944
858k
                               this](const std::string& path) {
3945
858k
                                  std::vector<std::string> str;
3946
858k
                                  butil::SplitString(path, '/', &str);
3947
858k
                                  std::string rowset_id;
3948
858k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3949
852k
                                      rowset_id = str.back().substr(0, pos);
3950
852k
                                  } else {
3951
5.87k
                                      if (path.find("packed_file/") != std::string::npos) {
3952
0
                                          return; // packed files do not have rowset_id encoded
3953
0
                                      }
3954
5.87k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3955
5.87k
                                      return;
3956
5.87k
                                  }
3957
852k
                                  auto rs_meta = rowsets.find(rowset_id);
3958
852k
                                  if (rs_meta != rowsets.end() &&
3959
857k
                                      !deleted_rowset_id.contains(rowset_id)) {
3960
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3961
54.1k
                                      metrics_context.total_recycled_data_size +=
3962
54.1k
                                              rs_meta->second.total_disk_size();
3963
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3964
54.1k
                                              rs_meta->second.num_segments();
3965
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3966
54.1k
                                              rs_meta->second.total_disk_size();
3967
54.1k
                                      metrics_context.total_recycled_num++;
3968
54.1k
                                  }
3969
852k
                              });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3944
7
                               this](const std::string& path) {
3945
7
                                  std::vector<std::string> str;
3946
7
                                  butil::SplitString(path, '/', &str);
3947
7
                                  std::string rowset_id;
3948
7
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3949
7
                                      rowset_id = str.back().substr(0, pos);
3950
7
                                  } else {
3951
0
                                      if (path.find("packed_file/") != std::string::npos) {
3952
0
                                          return; // packed files do not have rowset_id encoded
3953
0
                                      }
3954
0
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3955
0
                                      return;
3956
0
                                  }
3957
7
                                  auto rs_meta = rowsets.find(rowset_id);
3958
7
                                  if (rs_meta != rowsets.end() &&
3959
7
                                      !deleted_rowset_id.contains(rowset_id)) {
3960
7
                                      deleted_rowset_id.emplace(rowset_id);
3961
7
                                      metrics_context.total_recycled_data_size +=
3962
7
                                              rs_meta->second.total_disk_size();
3963
7
                                      segment_metrics_context_.total_recycled_num +=
3964
7
                                              rs_meta->second.num_segments();
3965
7
                                      segment_metrics_context_.total_recycled_data_size +=
3966
7
                                              rs_meta->second.total_disk_size();
3967
7
                                      metrics_context.total_recycled_num++;
3968
7
                                  }
3969
7
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3944
858k
                               this](const std::string& path) {
3945
858k
                                  std::vector<std::string> str;
3946
858k
                                  butil::SplitString(path, '/', &str);
3947
858k
                                  std::string rowset_id;
3948
858k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3949
852k
                                      rowset_id = str.back().substr(0, pos);
3950
852k
                                  } else {
3951
5.87k
                                      if (path.find("packed_file/") != std::string::npos) {
3952
0
                                          return; // packed files do not have rowset_id encoded
3953
0
                                      }
3954
5.87k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3955
5.87k
                                      return;
3956
5.87k
                                  }
3957
852k
                                  auto rs_meta = rowsets.find(rowset_id);
3958
852k
                                  if (rs_meta != rowsets.end() &&
3959
857k
                                      !deleted_rowset_id.contains(rowset_id)) {
3960
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3961
54.1k
                                      metrics_context.total_recycled_data_size +=
3962
54.1k
                                              rs_meta->second.total_disk_size();
3963
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3964
54.1k
                                              rs_meta->second.num_segments();
3965
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3966
54.1k
                                              rs_meta->second.total_disk_size();
3967
54.1k
                                      metrics_context.total_recycled_num++;
3968
54.1k
                                  }
3969
852k
                              });
3970
51
            }
3971
51
            return ret;
3972
51
        });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3922
5
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3923
5
            DCHECK(accessor_map_.count(*rid))
3924
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3925
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3926
5
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3927
5
                                     &accessor_map_);
3928
5
            if (!accessor_map_.contains(*rid)) {
3929
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3930
0
                        .tag("resource_id", resource_id)
3931
0
                        .tag("instance_id", instance_id_);
3932
0
                return -1;
3933
0
            }
3934
5
            auto& accessor = accessor_map_[*rid];
3935
5
            int ret = accessor->delete_files(*paths);
3936
5
            if (!ret) {
3937
                // deduplication of different files with the same rowset id
3938
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3939
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3940
5
                std::set<std::string> deleted_rowset_id;
3941
3942
5
                std::for_each(paths->begin(), paths->end(),
3943
5
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3944
5
                               this](const std::string& path) {
3945
5
                                  std::vector<std::string> str;
3946
5
                                  butil::SplitString(path, '/', &str);
3947
5
                                  std::string rowset_id;
3948
5
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3949
5
                                      rowset_id = str.back().substr(0, pos);
3950
5
                                  } else {
3951
5
                                      if (path.find("packed_file/") != std::string::npos) {
3952
5
                                          return; // packed files do not have rowset_id encoded
3953
5
                                      }
3954
5
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3955
5
                                      return;
3956
5
                                  }
3957
5
                                  auto rs_meta = rowsets.find(rowset_id);
3958
5
                                  if (rs_meta != rowsets.end() &&
3959
5
                                      !deleted_rowset_id.contains(rowset_id)) {
3960
5
                                      deleted_rowset_id.emplace(rowset_id);
3961
5
                                      metrics_context.total_recycled_data_size +=
3962
5
                                              rs_meta->second.total_disk_size();
3963
5
                                      segment_metrics_context_.total_recycled_num +=
3964
5
                                              rs_meta->second.num_segments();
3965
5
                                      segment_metrics_context_.total_recycled_data_size +=
3966
5
                                              rs_meta->second.total_disk_size();
3967
5
                                      metrics_context.total_recycled_num++;
3968
5
                                  }
3969
5
                              });
3970
5
            }
3971
5
            return ret;
3972
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3922
46
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3923
46
            DCHECK(accessor_map_.count(*rid))
3924
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3925
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3926
46
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3927
46
                                     &accessor_map_);
3928
46
            if (!accessor_map_.contains(*rid)) {
3929
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3930
0
                        .tag("resource_id", resource_id)
3931
0
                        .tag("instance_id", instance_id_);
3932
0
                return -1;
3933
0
            }
3934
46
            auto& accessor = accessor_map_[*rid];
3935
46
            int ret = accessor->delete_files(*paths);
3936
46
            if (!ret) {
3937
                // deduplication of different files with the same rowset id
3938
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3939
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3940
46
                std::set<std::string> deleted_rowset_id;
3941
3942
46
                std::for_each(paths->begin(), paths->end(),
3943
46
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3944
46
                               this](const std::string& path) {
3945
46
                                  std::vector<std::string> str;
3946
46
                                  butil::SplitString(path, '/', &str);
3947
46
                                  std::string rowset_id;
3948
46
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3949
46
                                      rowset_id = str.back().substr(0, pos);
3950
46
                                  } else {
3951
46
                                      if (path.find("packed_file/") != std::string::npos) {
3952
46
                                          return; // packed files do not have rowset_id encoded
3953
46
                                      }
3954
46
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3955
46
                                      return;
3956
46
                                  }
3957
46
                                  auto rs_meta = rowsets.find(rowset_id);
3958
46
                                  if (rs_meta != rowsets.end() &&
3959
46
                                      !deleted_rowset_id.contains(rowset_id)) {
3960
46
                                      deleted_rowset_id.emplace(rowset_id);
3961
46
                                      metrics_context.total_recycled_data_size +=
3962
46
                                              rs_meta->second.total_disk_size();
3963
46
                                      segment_metrics_context_.total_recycled_num +=
3964
46
                                              rs_meta->second.num_segments();
3965
46
                                      segment_metrics_context_.total_recycled_data_size +=
3966
46
                                              rs_meta->second.total_disk_size();
3967
46
                                      metrics_context.total_recycled_num++;
3968
46
                                  }
3969
46
                              });
3970
46
            }
3971
46
            return ret;
3972
46
        });
3973
51
    }
3974
98
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
3975
5
        LOG_INFO(
3976
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
3977
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
3978
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
3979
5
        concurrent_delete_executor.add([&]() -> int {
3980
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3981
5
            if (!ret) {
3982
5
                auto rs = rowsets.at(rowset_id);
3983
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3984
5
                metrics_context.total_recycled_num++;
3985
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3986
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3987
5
            }
3988
5
            return ret;
3989
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
3979
5
        concurrent_delete_executor.add([&]() -> int {
3980
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3981
5
            if (!ret) {
3982
5
                auto rs = rowsets.at(rowset_id);
3983
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3984
5
                metrics_context.total_recycled_num++;
3985
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3986
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3987
5
            }
3988
5
            return ret;
3989
5
        });
3990
5
    }
3991
3992
98
    bool finished = true;
3993
98
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3994
98
    for (int r : rets) {
3995
56
        if (r != 0) {
3996
0
            ret = -1;
3997
0
            break;
3998
0
        }
3999
56
    }
4000
98
    ret = finished ? ret : -1;
4001
98
    return ret;
4002
98
}
4003
4004
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
4005
3.30k
                                         const std::string& rowset_id) {
4006
3.30k
    auto it = accessor_map_.find(resource_id);
4007
3.30k
    if (it == accessor_map_.end()) {
4008
400
        LOG_WARNING("instance has no such resource id")
4009
400
                .tag("instance_id", instance_id_)
4010
400
                .tag("resource_id", resource_id)
4011
400
                .tag("tablet_id", tablet_id)
4012
400
                .tag("rowset_id", rowset_id);
4013
400
        return -1;
4014
400
    }
4015
2.90k
    auto& accessor = it->second;
4016
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
4017
3.30k
}
4018
4019
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
4020
4
    if (key.empty()) {
4021
0
        return false;
4022
0
    }
4023
4
    std::string_view key_view = key;
4024
4
    key_view.remove_prefix(1); // remove keyspace prefix
4025
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
4026
4
    if (decode_key(&key_view, &decoded) != 0) {
4027
0
        return false;
4028
0
    }
4029
4
    if (decoded.size() < 4) {
4030
0
        return false;
4031
0
    }
4032
4
    try {
4033
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
4034
4
    } catch (const std::bad_variant_access&) {
4035
0
        return false;
4036
0
    }
4037
4
    return true;
4038
4
}
4039
4040
14
int InstanceRecycler::recycle_packed_files() {
4041
14
    const std::string task_name = "recycle_packed_files";
4042
14
    auto start_tp = steady_clock::now();
4043
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
4044
14
    int ret = 0;
4045
14
    PackedFileRecycleStats stats;
4046
4047
14
    register_recycle_task(task_name, start_time);
4048
14
    DORIS_CLOUD_DEFER {
4049
14
        unregister_recycle_task(task_name);
4050
14
        int64_t cost =
4051
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4052
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
4053
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
4054
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
4055
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
4056
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
4057
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
4058
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
4059
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
4060
14
                                                             stats.bytes_object_deleted);
4061
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
4062
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
4063
14
                .tag("instance_id", instance_id_)
4064
14
                .tag("num_scanned", stats.num_scanned)
4065
14
                .tag("num_corrected", stats.num_corrected)
4066
14
                .tag("num_deleted", stats.num_deleted)
4067
14
                .tag("num_failed", stats.num_failed)
4068
14
                .tag("num_objects_deleted", stats.num_object_deleted)
4069
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
4070
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
4071
14
                .tag("bytes_deleted", stats.bytes_deleted)
4072
14
                .tag("ret", ret);
4073
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
4048
14
    DORIS_CLOUD_DEFER {
4049
14
        unregister_recycle_task(task_name);
4050
14
        int64_t cost =
4051
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4052
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
4053
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
4054
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
4055
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
4056
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
4057
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
4058
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
4059
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
4060
14
                                                             stats.bytes_object_deleted);
4061
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
4062
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
4063
14
                .tag("instance_id", instance_id_)
4064
14
                .tag("num_scanned", stats.num_scanned)
4065
14
                .tag("num_corrected", stats.num_corrected)
4066
14
                .tag("num_deleted", stats.num_deleted)
4067
14
                .tag("num_failed", stats.num_failed)
4068
14
                .tag("num_objects_deleted", stats.num_object_deleted)
4069
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
4070
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
4071
14
                .tag("bytes_deleted", stats.bytes_deleted)
4072
14
                .tag("ret", ret);
4073
14
    };
4074
4075
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
4076
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
4077
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
4078
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
4075
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
4076
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
4077
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
4078
4
    };
4079
4080
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
4081
4082
14
    std::string begin = packed_file_key({instance_id_, ""});
4083
14
    std::string end = packed_file_key({instance_id_, "\xff"});
4084
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
4085
0
        ret = -1;
4086
0
    }
4087
4088
14
    return ret;
4089
14
}
4090
4091
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
4092
                                                  RecyclerMetricsContext& metrics_context,
4093
0
                                                  int64_t partition_id, bool is_empty_tablet) {
4094
0
    std::string tablet_key_begin, tablet_key_end;
4095
4096
0
    if (partition_id > 0) {
4097
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
4098
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
4099
0
    } else {
4100
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
4101
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
4102
0
    }
4103
    // for calculate the total num or bytes of recyled objects
4104
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
4105
0
                                                          std::string_view v) -> int {
4106
0
        doris::TabletMetaCloudPB tablet_meta_pb;
4107
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
4108
0
            return 0;
4109
0
        }
4110
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
4111
4112
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
4113
0
            return 0;
4114
0
        }
4115
4116
0
        if (!is_empty_tablet) {
4117
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
4118
0
                return 0;
4119
0
            }
4120
0
            tablet_metrics_context_.total_need_recycle_num++;
4121
0
        }
4122
0
        return 0;
4123
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
4124
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
4125
0
    metrics_context.report(true);
4126
0
    tablet_metrics_context_.report(true);
4127
0
    segment_metrics_context_.report(true);
4128
0
    return ret;
4129
0
}
4130
4131
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
4132
0
                                                 RecyclerMetricsContext& metrics_context) {
4133
0
    int ret = 0;
4134
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
4135
0
    std::unique_ptr<Transaction> txn;
4136
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4137
0
        LOG_WARNING("failed to recycle tablet ")
4138
0
                .tag("tablet id", tablet_id)
4139
0
                .tag("instance_id", instance_id_)
4140
0
                .tag("reason", "failed to create txn");
4141
0
        ret = -1;
4142
0
    }
4143
0
    GetRowsetResponse resp;
4144
0
    std::string msg;
4145
0
    MetaServiceCode code = MetaServiceCode::OK;
4146
    // get rowsets in tablet
4147
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4148
0
                        tablet_id, code, msg, &resp);
4149
0
    if (code != MetaServiceCode::OK) {
4150
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4151
0
                .tag("tablet id", tablet_id)
4152
0
                .tag("msg", msg)
4153
0
                .tag("code", code)
4154
0
                .tag("instance id", instance_id_);
4155
0
        ret = -1;
4156
0
    }
4157
0
    for (const auto& rs_meta : resp.rowset_meta()) {
4158
        /*
4159
        * For compatibility, we skip the loop for [0-1] here.
4160
        * The purpose of this loop is to delete object files,
4161
        * and since [0-1] only has meta and doesn't have object files,
4162
        * skipping it doesn't affect system correctness.
4163
        *
4164
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
4165
        * would return error -1 directly, causing the recycle operation to fail.
4166
        *
4167
        * [0-1] doesn't have resource id is a bug.
4168
        * In the future, we will fix this problem, after that,
4169
        * we can remove this if statement.
4170
        *
4171
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
4172
        */
4173
4174
0
        if (rs_meta.end_version() == 1) {
4175
            // Assert that [0-1] has no resource_id to make sure
4176
            // this if statement will not be forgetted to remove
4177
            // when the resource id bug is fixed
4178
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4179
0
            continue;
4180
0
        }
4181
0
        if (!rs_meta.has_resource_id()) {
4182
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4183
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4184
0
                    .tag("instance_id", instance_id_)
4185
0
                    .tag("tablet_id", tablet_id);
4186
0
            continue;
4187
0
        }
4188
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4189
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4190
        // possible if the accessor is not initilized correctly
4191
0
        if (it == accessor_map_.end()) [[unlikely]] {
4192
0
            LOG_WARNING(
4193
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4194
0
                    "recycle process")
4195
0
                    .tag("tablet id", tablet_id)
4196
0
                    .tag("instance_id", instance_id_)
4197
0
                    .tag("resource_id", rs_meta.resource_id())
4198
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4199
0
            continue;
4200
0
        }
4201
4202
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
4203
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4204
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4205
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
4206
0
    }
4207
0
    return ret;
4208
0
}
4209
4210
4.25k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
4211
4.25k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
4212
4.25k
            .tag("instance_id", instance_id_)
4213
4.25k
            .tag("tablet_id", tablet_id);
4214
4215
4.25k
    if (should_recycle_versioned_keys()) {
4216
11
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
4217
11
        if (ret != 0) {
4218
0
            return ret;
4219
0
        }
4220
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
4221
        // during the recycle_versioned_tablet process.
4222
        //
4223
        // .. And remove restore job rowsets of this tablet too
4224
11
    }
4225
4226
4.25k
    int ret = 0;
4227
4.25k
    auto start_time = steady_clock::now();
4228
4229
4.25k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4230
4231
    // collect resource ids
4232
248
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4233
248
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4234
248
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4235
248
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4236
248
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4237
248
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4238
4239
248
    std::set<std::string> resource_ids;
4240
248
    int64_t recycle_rowsets_number = 0;
4241
248
    int64_t recycle_segments_number = 0;
4242
248
    int64_t recycle_rowsets_data_size = 0;
4243
248
    int64_t recycle_rowsets_index_size = 0;
4244
248
    int64_t recycle_restore_job_rowsets_number = 0;
4245
248
    int64_t recycle_restore_job_segments_number = 0;
4246
248
    int64_t recycle_restore_job_rowsets_data_size = 0;
4247
248
    int64_t recycle_restore_job_rowsets_index_size = 0;
4248
248
    int64_t max_rowset_version = 0;
4249
248
    int64_t min_rowset_creation_time = INT64_MAX;
4250
248
    int64_t max_rowset_creation_time = 0;
4251
248
    int64_t min_rowset_expiration_time = INT64_MAX;
4252
248
    int64_t max_rowset_expiration_time = 0;
4253
4254
248
    DORIS_CLOUD_DEFER {
4255
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4256
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4257
248
                .tag("instance_id", instance_id_)
4258
248
                .tag("tablet_id", tablet_id)
4259
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4260
248
                .tag("recycle segments number", recycle_segments_number)
4261
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4262
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4263
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4264
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4265
248
                .tag("all restore job rowsets recycle data size",
4266
248
                     recycle_restore_job_rowsets_data_size)
4267
248
                .tag("all restore job rowsets recycle index size",
4268
248
                     recycle_restore_job_rowsets_index_size)
4269
248
                .tag("max rowset version", max_rowset_version)
4270
248
                .tag("min rowset creation time", min_rowset_creation_time)
4271
248
                .tag("max rowset creation time", max_rowset_creation_time)
4272
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4273
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4274
248
                .tag("task type", metrics_context.operation_type)
4275
248
                .tag("ret", ret);
4276
248
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4254
248
    DORIS_CLOUD_DEFER {
4255
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4256
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4257
248
                .tag("instance_id", instance_id_)
4258
248
                .tag("tablet_id", tablet_id)
4259
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4260
248
                .tag("recycle segments number", recycle_segments_number)
4261
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4262
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4263
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4264
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4265
248
                .tag("all restore job rowsets recycle data size",
4266
248
                     recycle_restore_job_rowsets_data_size)
4267
248
                .tag("all restore job rowsets recycle index size",
4268
248
                     recycle_restore_job_rowsets_index_size)
4269
248
                .tag("max rowset version", max_rowset_version)
4270
248
                .tag("min rowset creation time", min_rowset_creation_time)
4271
248
                .tag("max rowset creation time", max_rowset_creation_time)
4272
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4273
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4274
248
                .tag("task type", metrics_context.operation_type)
4275
248
                .tag("ret", ret);
4276
248
    };
4277
4278
248
    std::unique_ptr<Transaction> txn;
4279
248
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4280
0
        LOG_WARNING("failed to recycle tablet ")
4281
0
                .tag("tablet id", tablet_id)
4282
0
                .tag("instance_id", instance_id_)
4283
0
                .tag("reason", "failed to create txn");
4284
0
        ret = -1;
4285
0
    }
4286
248
    GetRowsetResponse resp;
4287
248
    std::string msg;
4288
248
    MetaServiceCode code = MetaServiceCode::OK;
4289
    // get rowsets in tablet
4290
248
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4291
248
                        tablet_id, code, msg, &resp);
4292
248
    if (code != MetaServiceCode::OK) {
4293
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4294
0
                .tag("tablet id", tablet_id)
4295
0
                .tag("msg", msg)
4296
0
                .tag("code", code)
4297
0
                .tag("instance id", instance_id_);
4298
0
        ret = -1;
4299
0
    }
4300
248
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
4301
4302
2.51k
    for (const auto& rs_meta : resp.rowset_meta()) {
4303
        // The rowset has no resource id and segments when it was generated by compaction
4304
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
4305
2.51k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
4306
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
4307
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4308
0
                    .tag("instance_id", instance_id_)
4309
0
                    .tag("tablet_id", tablet_id);
4310
0
            recycle_rowsets_number += 1;
4311
0
            continue;
4312
0
        }
4313
2.51k
        if (!rs_meta.has_resource_id()) {
4314
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4315
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
4316
1
                    .tag("instance_id", instance_id_)
4317
1
                    .tag("tablet_id", tablet_id);
4318
1
            return -1;
4319
1
        }
4320
2.51k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4321
2.51k
        auto it = accessor_map_.find(rs_meta.resource_id());
4322
        // possible if the accessor is not initilized correctly
4323
2.51k
        if (it == accessor_map_.end()) [[unlikely]] {
4324
1
            LOG_WARNING(
4325
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4326
1
                    "recycle process")
4327
1
                    .tag("tablet id", tablet_id)
4328
1
                    .tag("instance_id", instance_id_)
4329
1
                    .tag("resource_id", rs_meta.resource_id())
4330
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4331
1
            return -1;
4332
1
        }
4333
2.51k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4334
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
4335
0
                    .tag("instance_id", instance_id_)
4336
0
                    .tag("tablet_id", tablet_id)
4337
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4338
0
            return -1;
4339
0
        }
4340
2.51k
        recycle_rowsets_number += 1;
4341
2.51k
        recycle_segments_number += rs_meta.num_segments();
4342
2.51k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4343
2.51k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4344
2.51k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4345
2.51k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4346
2.51k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4347
2.51k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4348
2.51k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4349
2.51k
        resource_ids.emplace(rs_meta.resource_id());
4350
2.51k
    }
4351
4352
    // get restore job rowset in tablet
4353
246
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
4354
246
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
4355
246
    if (code != MetaServiceCode::OK) {
4356
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
4357
0
                .tag("tablet id", tablet_id)
4358
0
                .tag("msg", msg)
4359
0
                .tag("code", code)
4360
0
                .tag("instance id", instance_id_);
4361
0
        return -1;
4362
0
    }
4363
4364
246
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
4365
0
        if (!rs_meta.has_resource_id()) {
4366
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4367
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4368
0
                    .tag("instance_id", instance_id_)
4369
0
                    .tag("tablet_id", tablet_id);
4370
0
            return -1;
4371
0
        }
4372
4373
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4374
        // possible if the accessor is not initilized correctly
4375
0
        if (it == accessor_map_.end()) [[unlikely]] {
4376
0
            LOG_WARNING(
4377
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4378
0
                    "recycle process")
4379
0
                    .tag("tablet id", tablet_id)
4380
0
                    .tag("instance_id", instance_id_)
4381
0
                    .tag("resource_id", rs_meta.resource_id())
4382
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4383
0
            return -1;
4384
0
        }
4385
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4386
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
4387
0
                    .tag("instance_id", instance_id_)
4388
0
                    .tag("tablet_id", tablet_id)
4389
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4390
0
            return -1;
4391
0
        }
4392
0
        recycle_restore_job_rowsets_number += 1;
4393
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
4394
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
4395
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
4396
0
        resource_ids.emplace(rs_meta.resource_id());
4397
0
    }
4398
4399
246
    LOG_INFO("recycle tablet start to delete object")
4400
246
            .tag("instance id", instance_id_)
4401
246
            .tag("tablet id", tablet_id)
4402
246
            .tag("recycle tablet resource ids are",
4403
246
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
4404
246
                                 [](std::string rs_id, const auto& it) {
4405
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4406
206
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
4404
206
                                 [](std::string rs_id, const auto& it) {
4405
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4406
206
                                 }));
4407
4408
246
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
4409
246
            _thread_pool_group.s3_producer_pool,
4410
246
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4411
246
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
4411
206
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
4412
4413
    // delete all rowset data in this tablet
4414
    // ATTN: there may be data leak if not all accessor initilized successfully
4415
    //       partial data deleted if the tablet is stored cross-storage vault
4416
    //       vault id is not attached to TabletMeta...
4417
246
    for (const auto& resource_id : resource_ids) {
4418
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
4419
206
        concurrent_delete_executor.add(
4420
206
                [&, rs_id = resource_id,
4421
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4422
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4423
206
                    if (res != 0) {
4424
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4425
2
                                     << " path=" << accessor_ptr->uri()
4426
2
                                     << " task type=" << metrics_context.operation_type;
4427
2
                        return std::make_pair(-1, rs_id);
4428
2
                    }
4429
204
                    return std::make_pair(0, rs_id);
4430
206
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
4421
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4422
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4423
206
                    if (res != 0) {
4424
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4425
2
                                     << " path=" << accessor_ptr->uri()
4426
2
                                     << " task type=" << metrics_context.operation_type;
4427
2
                        return std::make_pair(-1, rs_id);
4428
2
                    }
4429
204
                    return std::make_pair(0, rs_id);
4430
206
                });
4431
206
    }
4432
4433
246
    bool finished = true;
4434
246
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
4435
246
    for (auto& r : rets) {
4436
206
        if (r.first != 0) {
4437
2
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
4438
2
            ret = -1;
4439
2
        }
4440
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
4441
206
    }
4442
246
    ret = finished ? ret : -1;
4443
4444
246
    if (ret != 0) { // failed recycle tablet data
4445
2
        LOG_WARNING("ret!=0")
4446
2
                .tag("finished", finished)
4447
2
                .tag("ret", ret)
4448
2
                .tag("instance_id", instance_id_)
4449
2
                .tag("tablet_id", tablet_id);
4450
2
        return ret;
4451
2
    }
4452
4453
244
    tablet_metrics_context_.total_recycled_data_size +=
4454
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4455
244
    tablet_metrics_context_.total_recycled_num += 1;
4456
244
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4457
244
    segment_metrics_context_.total_recycled_data_size +=
4458
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4459
244
    metrics_context.total_recycled_data_size +=
4460
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4461
244
    tablet_metrics_context_.report();
4462
244
    segment_metrics_context_.report();
4463
244
    metrics_context.report();
4464
4465
244
    txn.reset();
4466
244
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4467
0
        LOG_WARNING("failed to recycle tablet ")
4468
0
                .tag("tablet id", tablet_id)
4469
0
                .tag("instance_id", instance_id_)
4470
0
                .tag("reason", "failed to create txn");
4471
0
        ret = -1;
4472
0
    }
4473
    // delete all rowset kv in this tablet
4474
244
    txn->remove(rs_key0, rs_key1);
4475
244
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4476
244
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4477
4478
    // remove delete bitmap for MoW table
4479
244
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4480
244
    txn->remove(pending_key);
4481
244
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4482
244
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4483
244
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4484
4485
244
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4486
244
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4487
244
    txn->remove(dbm_start_key, dbm_end_key);
4488
244
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4489
244
              << " end=" << hex(dbm_end_key);
4490
4491
244
    TxnErrorCode err = txn->commit();
4492
244
    if (err != TxnErrorCode::TXN_OK) {
4493
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4494
0
        ret = -1;
4495
0
    }
4496
4497
244
    if (ret == 0) {
4498
        // All object files under tablet have been deleted
4499
244
        std::lock_guard lock(recycled_tablets_mtx_);
4500
244
        recycled_tablets_.insert(tablet_id);
4501
244
    }
4502
4503
244
    return ret;
4504
246
}
4505
4506
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
4507
11
                                               RecyclerMetricsContext& metrics_context) {
4508
11
    int ret = 0;
4509
11
    auto start_time = steady_clock::now();
4510
4511
11
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4512
4513
    // collect resource ids
4514
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4515
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4516
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4517
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4518
4519
11
    int64_t recycle_rowsets_number = 0;
4520
11
    int64_t recycle_segments_number = 0;
4521
11
    int64_t recycle_rowsets_data_size = 0;
4522
11
    int64_t recycle_rowsets_index_size = 0;
4523
11
    int64_t max_rowset_version = 0;
4524
11
    int64_t min_rowset_creation_time = INT64_MAX;
4525
11
    int64_t max_rowset_creation_time = 0;
4526
11
    int64_t min_rowset_expiration_time = INT64_MAX;
4527
11
    int64_t max_rowset_expiration_time = 0;
4528
4529
11
    DORIS_CLOUD_DEFER {
4530
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4531
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4532
11
                .tag("instance_id", instance_id_)
4533
11
                .tag("tablet_id", tablet_id)
4534
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4535
11
                .tag("recycle segments number", recycle_segments_number)
4536
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4537
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4538
11
                .tag("max rowset version", max_rowset_version)
4539
11
                .tag("min rowset creation time", min_rowset_creation_time)
4540
11
                .tag("max rowset creation time", max_rowset_creation_time)
4541
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4542
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4543
11
                .tag("ret", ret);
4544
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4529
11
    DORIS_CLOUD_DEFER {
4530
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4531
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4532
11
                .tag("instance_id", instance_id_)
4533
11
                .tag("tablet_id", tablet_id)
4534
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4535
11
                .tag("recycle segments number", recycle_segments_number)
4536
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4537
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4538
11
                .tag("max rowset version", max_rowset_version)
4539
11
                .tag("min rowset creation time", min_rowset_creation_time)
4540
11
                .tag("max rowset creation time", max_rowset_creation_time)
4541
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4542
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4543
11
                .tag("ret", ret);
4544
11
    };
4545
4546
11
    std::unique_ptr<Transaction> txn;
4547
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4548
0
        LOG_WARNING("failed to recycle tablet ")
4549
0
                .tag("tablet id", tablet_id)
4550
0
                .tag("instance_id", instance_id_)
4551
0
                .tag("reason", "failed to create txn");
4552
0
        ret = -1;
4553
0
    }
4554
4555
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
4556
    // by the related operation logs.
4557
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
4558
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
4559
11
    MetaReader meta_reader(instance_id_);
4560
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
4561
11
    if (err == TxnErrorCode::TXN_OK) {
4562
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
4563
11
    }
4564
11
    if (err != TxnErrorCode::TXN_OK) {
4565
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4566
0
                .tag("tablet id", tablet_id)
4567
0
                .tag("err", err)
4568
0
                .tag("instance id", instance_id_);
4569
0
        ret = -1;
4570
0
    }
4571
4572
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
4573
11
             load_rowset_metas.size(), compact_rowset_metas.size())
4574
11
            .tag("instance_id", instance_id_)
4575
11
            .tag("tablet_id", tablet_id);
4576
4577
11
    SyncExecutor<int> concurrent_delete_executor(
4578
11
            _thread_pool_group.s3_producer_pool,
4579
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4580
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
4581
4582
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4583
60
        recycle_rowsets_number += 1;
4584
60
        recycle_segments_number += rs_meta.num_segments();
4585
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4586
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4587
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4588
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4589
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4590
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4591
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4592
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
4582
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4583
60
        recycle_rowsets_number += 1;
4584
60
        recycle_segments_number += rs_meta.num_segments();
4585
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4586
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4587
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4588
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4589
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4590
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4591
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4592
60
    };
4593
4594
11
    std::vector<RowsetDeleteTask> all_tasks;
4595
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
4596
60
        update_rowset_stats(rs_meta);
4597
        // Version 0-1 rowset has no resource_id and no actual data files,
4598
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4599
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4600
60
        RowsetDeleteTask task;
4601
60
        task.rowset_meta = rs_meta;
4602
60
        task.versioned_rowset_key =
4603
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
4604
60
        task.non_versioned_rowset_key =
4605
60
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4606
60
        task.versionstamp = versionstamp;
4607
60
        all_tasks.push_back(std::move(task));
4608
60
    }
4609
4610
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
4611
0
        update_rowset_stats(rs_meta);
4612
        // Version 0-1 rowset has no resource_id and no actual data files,
4613
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4614
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4615
0
        RowsetDeleteTask task;
4616
0
        task.rowset_meta = rs_meta;
4617
0
        task.versioned_rowset_key = versioned::meta_rowset_compact_key(
4618
0
                {instance_id_, tablet_id, rs_meta.end_version()});
4619
0
        task.non_versioned_rowset_key =
4620
0
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4621
0
        task.versionstamp = versionstamp;
4622
0
        all_tasks.push_back(std::move(task));
4623
0
    }
4624
4625
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
4626
0
        RecycleRowsetPB recycle_rowset;
4627
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
4628
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4629
0
            return -1;
4630
0
        }
4631
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
4632
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
4633
                // in old version, keep this key-value pair and it needs to be checked manually
4634
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4635
0
                return -1;
4636
0
            }
4637
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
4638
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4639
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4640
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
4641
0
                return -1;
4642
0
            }
4643
            // decode rowset_id
4644
0
            auto k1 = k;
4645
0
            k1.remove_prefix(1);
4646
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4647
0
            decode_key(&k1, &out);
4648
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4649
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4650
0
            LOG_INFO("delete old-version rowset data")
4651
0
                    .tag("instance_id", instance_id_)
4652
0
                    .tag("tablet_id", tablet_id)
4653
0
                    .tag("rowset_id", rowset_id);
4654
4655
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
4656
            // so we must use prefix deletion directly instead of batch delete.
4657
0
            concurrent_delete_executor.add(
4658
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
4659
                        // delete by prefix, the recycle rowset key will be deleted by range later.
4660
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
4661
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
4662
0
        } else {
4663
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
4664
            // Version 0-1 rowset has no resource_id and no actual data files,
4665
            // but still needs ref_count key cleanup, so we add it to all_tasks.
4666
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
4667
0
            RowsetDeleteTask task;
4668
0
            task.rowset_meta = rowset_meta;
4669
0
            task.recycle_rowset_key = k;
4670
0
            all_tasks.push_back(std::move(task));
4671
0
        }
4672
0
        return 0;
4673
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
4674
4675
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
4676
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
4677
0
                .tag("tablet id", tablet_id)
4678
0
                .tag("instance_id", instance_id_)
4679
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
4680
0
        ret = -1;
4681
0
    }
4682
4683
    // Phase 1: Classify tasks by ref_count
4684
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
4685
60
    for (auto& task : all_tasks) {
4686
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
4687
60
        if (classify_ret < 0) {
4688
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
4689
0
                    .tag("instance_id", instance_id_)
4690
0
                    .tag("tablet_id", tablet_id)
4691
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4692
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
4693
0
                return recycle_rowset_meta_and_data(t);
4694
0
            });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
4695
0
        }
4696
60
    }
4697
4698
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
4699
4700
11
    LOG_INFO("batch delete plan created")
4701
11
            .tag("instance_id", instance_id_)
4702
11
            .tag("tablet_id", tablet_id)
4703
11
            .tag("plan_count", batch_delete_tasks.size());
4704
4705
    // Phase 2: Execute batch delete using existing delete_rowset_data
4706
11
    if (!batch_delete_tasks.empty()) {
4707
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
4708
49
        for (const auto& task : batch_delete_tasks) {
4709
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
4710
49
            if (task.rowset_meta.resource_id().empty()) {
4711
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
4712
10
                        .tag("instance_id", instance_id_)
4713
10
                        .tag("tablet_id", tablet_id)
4714
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4715
10
                continue;
4716
10
            }
4717
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
4718
39
        }
4719
4720
        // Only call delete_rowset_data if there are rowsets with actual data to delete
4721
10
        bool delete_success = true;
4722
10
        if (!rowsets_to_delete.empty()) {
4723
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
4724
9
                                                         "batch_delete_versioned_tablet");
4725
9
            int delete_ret = delete_rowset_data(
4726
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
4727
9
            if (delete_ret != 0) {
4728
0
                LOG_WARNING("batch delete execution failed")
4729
0
                        .tag("instance_id", instance_id_)
4730
0
                        .tag("tablet_id", tablet_id);
4731
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
4732
0
                ret = -1;
4733
0
                delete_success = false;
4734
0
            }
4735
9
        }
4736
4737
        // Phase 3: Only cleanup metadata if data deletion succeeded.
4738
        // If deletion failed, keep recycle_rowset_key so next round will retry.
4739
10
        if (delete_success) {
4740
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
4741
10
            if (cleanup_ret != 0) {
4742
0
                LOG_WARNING("batch delete cleanup failed")
4743
0
                        .tag("instance_id", instance_id_)
4744
0
                        .tag("tablet_id", tablet_id);
4745
0
                ret = -1;
4746
0
            }
4747
10
        }
4748
10
    }
4749
4750
    // Always wait for fallback tasks to complete before returning
4751
11
    bool finished = true;
4752
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4753
11
    for (int r : rets) {
4754
0
        if (r != 0) {
4755
0
            ret = -1;
4756
0
        }
4757
0
    }
4758
4759
11
    ret = finished ? ret : -1;
4760
4761
11
    if (ret != 0) { // failed recycle tablet data
4762
0
        LOG_WARNING("recycle versioned tablet failed")
4763
0
                .tag("finished", finished)
4764
0
                .tag("ret", ret)
4765
0
                .tag("instance_id", instance_id_)
4766
0
                .tag("tablet_id", tablet_id);
4767
0
        return ret;
4768
0
    }
4769
4770
11
    tablet_metrics_context_.total_recycled_data_size +=
4771
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4772
11
    tablet_metrics_context_.total_recycled_num += 1;
4773
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4774
11
    segment_metrics_context_.total_recycled_data_size +=
4775
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4776
11
    metrics_context.total_recycled_data_size +=
4777
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4778
11
    tablet_metrics_context_.report();
4779
11
    segment_metrics_context_.report();
4780
11
    metrics_context.report();
4781
4782
11
    txn.reset();
4783
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4784
0
        LOG_WARNING("failed to recycle tablet ")
4785
0
                .tag("tablet id", tablet_id)
4786
0
                .tag("instance_id", instance_id_)
4787
0
                .tag("reason", "failed to create txn");
4788
0
        ret = -1;
4789
0
    }
4790
    // delete all rowset kv in this tablet
4791
11
    txn->remove(rs_key0, rs_key1);
4792
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4793
4794
    // remove delete bitmap for MoW table
4795
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4796
11
    txn->remove(pending_key);
4797
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4798
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4799
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4800
4801
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4802
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4803
11
    txn->remove(dbm_start_key, dbm_end_key);
4804
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4805
11
              << " end=" << hex(dbm_end_key);
4806
4807
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
4808
11
    std::string tablet_index_val;
4809
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
4810
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
4811
0
        LOG_WARNING("failed to get tablet index kv")
4812
0
                .tag("instance_id", instance_id_)
4813
0
                .tag("tablet_id", tablet_id)
4814
0
                .tag("err", err);
4815
0
        ret = -1;
4816
11
    } else if (err == TxnErrorCode::TXN_OK) {
4817
        // If the tablet index kv exists, we need to delete it
4818
10
        TabletIndexPB tablet_index_pb;
4819
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
4820
0
            LOG_WARNING("failed to parse tablet index pb")
4821
0
                    .tag("instance_id", instance_id_)
4822
0
                    .tag("tablet_id", tablet_id);
4823
0
            ret = -1;
4824
10
        } else {
4825
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
4826
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
4827
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
4828
10
            txn->remove(versioned_inverted_idx_key);
4829
10
            txn->remove(versioned_idx_key);
4830
10
        }
4831
10
    }
4832
4833
11
    err = txn->commit();
4834
11
    if (err != TxnErrorCode::TXN_OK) {
4835
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4836
0
        ret = -1;
4837
0
    }
4838
4839
11
    if (ret == 0) {
4840
        // All object files under tablet have been deleted
4841
11
        std::lock_guard lock(recycled_tablets_mtx_);
4842
11
        recycled_tablets_.insert(tablet_id);
4843
11
    }
4844
4845
11
    return ret;
4846
11
}
4847
4848
27
int InstanceRecycler::recycle_rowsets() {
4849
27
    if (should_recycle_versioned_keys()) {
4850
5
        return recycle_versioned_rowsets();
4851
5
    }
4852
4853
22
    const std::string task_name = "recycle_rowsets";
4854
22
    int64_t num_scanned = 0;
4855
22
    int64_t num_expired = 0;
4856
22
    int64_t num_prepare = 0;
4857
22
    int64_t num_compacted = 0;
4858
22
    int64_t num_empty_rowset = 0;
4859
22
    size_t total_rowset_key_size = 0;
4860
22
    size_t total_rowset_value_size = 0;
4861
22
    size_t expired_rowset_size = 0;
4862
22
    std::atomic_long num_recycled = 0;
4863
22
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4864
4865
22
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4866
22
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4867
22
    std::string recyc_rs_key0;
4868
22
    std::string recyc_rs_key1;
4869
22
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4870
22
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4871
4872
22
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4873
4874
22
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4875
22
    register_recycle_task(task_name, start_time);
4876
4877
22
    DORIS_CLOUD_DEFER {
4878
22
        unregister_recycle_task(task_name);
4879
22
        int64_t cost =
4880
22
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4881
22
        metrics_context.finish_report();
4882
22
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4883
22
                .tag("instance_id", instance_id_)
4884
22
                .tag("num_scanned", num_scanned)
4885
22
                .tag("num_expired", num_expired)
4886
22
                .tag("num_recycled", num_recycled)
4887
22
                .tag("num_recycled.prepare", num_prepare)
4888
22
                .tag("num_recycled.compacted", num_compacted)
4889
22
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4890
22
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4891
22
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4892
22
                .tag("expired_rowset_meta_size", expired_rowset_size);
4893
22
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4877
7
    DORIS_CLOUD_DEFER {
4878
7
        unregister_recycle_task(task_name);
4879
7
        int64_t cost =
4880
7
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4881
7
        metrics_context.finish_report();
4882
7
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4883
7
                .tag("instance_id", instance_id_)
4884
7
                .tag("num_scanned", num_scanned)
4885
7
                .tag("num_expired", num_expired)
4886
7
                .tag("num_recycled", num_recycled)
4887
7
                .tag("num_recycled.prepare", num_prepare)
4888
7
                .tag("num_recycled.compacted", num_compacted)
4889
7
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4890
7
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4891
7
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4892
7
                .tag("expired_rowset_meta_size", expired_rowset_size);
4893
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4877
15
    DORIS_CLOUD_DEFER {
4878
15
        unregister_recycle_task(task_name);
4879
15
        int64_t cost =
4880
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4881
15
        metrics_context.finish_report();
4882
15
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4883
15
                .tag("instance_id", instance_id_)
4884
15
                .tag("num_scanned", num_scanned)
4885
15
                .tag("num_expired", num_expired)
4886
15
                .tag("num_recycled", num_recycled)
4887
15
                .tag("num_recycled.prepare", num_prepare)
4888
15
                .tag("num_recycled.compacted", num_compacted)
4889
15
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4890
15
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4891
15
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4892
15
                .tag("expired_rowset_meta_size", expired_rowset_size);
4893
15
    };
4894
4895
22
    std::vector<std::string> rowset_keys;
4896
22
    std::vector<std::string> rowset_keys_to_mark_recycled;
4897
22
    std::vector<std::string> rowset_keys_to_abort;
4898
22
    std::vector<std::string> prepare_rowset_keys_to_delete;
4899
    // rowset_id -> rowset_meta
4900
    // store rowset id and meta for statistics rs size when delete
4901
22
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
4902
4903
    // Store keys of rowset recycled by background workers
4904
22
    std::mutex async_recycled_rowset_keys_mutex;
4905
22
    std::vector<std::string> async_recycled_rowset_keys;
4906
22
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4907
22
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4908
22
    worker_pool->start();
4909
    // TODO bacth delete
4910
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4911
4.00k
        std::string dbm_start_key =
4912
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4913
4.00k
        std::string dbm_end_key = dbm_start_key;
4914
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4915
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4916
4.00k
        if (ret != 0) {
4917
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4918
0
                         << instance_id_;
4919
0
        }
4920
4.00k
        return ret;
4921
4.00k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4910
2
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4911
2
        std::string dbm_start_key =
4912
2
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4913
2
        std::string dbm_end_key = dbm_start_key;
4914
2
        encode_int64(INT64_MAX, &dbm_end_key);
4915
2
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4916
2
        if (ret != 0) {
4917
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4918
0
                         << instance_id_;
4919
0
        }
4920
2
        return ret;
4921
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4910
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4911
4.00k
        std::string dbm_start_key =
4912
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4913
4.00k
        std::string dbm_end_key = dbm_start_key;
4914
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4915
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4916
4.00k
        if (ret != 0) {
4917
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4918
0
                         << instance_id_;
4919
0
        }
4920
4.00k
        return ret;
4921
4.00k
    };
4922
22
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4923
250
                                            int64_t tablet_id, const std::string& rowset_id) {
4924
        // Try to delete rowset data in background thread
4925
250
        int ret = worker_pool->submit_with_timeout(
4926
250
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4927
246
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4928
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4929
0
                        return;
4930
0
                    }
4931
246
                    std::vector<std::string> keys;
4932
246
                    {
4933
246
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4934
246
                        async_recycled_rowset_keys.push_back(std::move(key));
4935
246
                        if (async_recycled_rowset_keys.size() > 100) {
4936
2
                            keys.swap(async_recycled_rowset_keys);
4937
2
                        }
4938
246
                    }
4939
246
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4940
246
                    if (keys.empty()) return;
4941
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4942
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4943
0
                                     << instance_id_;
4944
2
                    } else {
4945
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4946
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4947
2
                                           num_recycled, start_time);
4948
2
                    }
4949
2
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4926
246
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4927
246
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4928
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4929
0
                        return;
4930
0
                    }
4931
246
                    std::vector<std::string> keys;
4932
246
                    {
4933
246
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4934
246
                        async_recycled_rowset_keys.push_back(std::move(key));
4935
246
                        if (async_recycled_rowset_keys.size() > 100) {
4936
2
                            keys.swap(async_recycled_rowset_keys);
4937
2
                        }
4938
246
                    }
4939
246
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4940
246
                    if (keys.empty()) return;
4941
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4942
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4943
0
                                     << instance_id_;
4944
2
                    } else {
4945
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4946
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4947
2
                                           num_recycled, start_time);
4948
2
                    }
4949
2
                },
4950
250
                0);
4951
250
        if (ret == 0) return 0;
4952
        // Submit task failed, delete rowset data in current thread
4953
4
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4954
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4955
0
            return -1;
4956
0
        }
4957
4
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4958
0
            return -1;
4959
0
        }
4960
4
        rowset_keys.push_back(std::move(key));
4961
4
        return 0;
4962
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4923
250
                                            int64_t tablet_id, const std::string& rowset_id) {
4924
        // Try to delete rowset data in background thread
4925
250
        int ret = worker_pool->submit_with_timeout(
4926
250
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4927
250
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4928
250
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4929
250
                        return;
4930
250
                    }
4931
250
                    std::vector<std::string> keys;
4932
250
                    {
4933
250
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4934
250
                        async_recycled_rowset_keys.push_back(std::move(key));
4935
250
                        if (async_recycled_rowset_keys.size() > 100) {
4936
250
                            keys.swap(async_recycled_rowset_keys);
4937
250
                        }
4938
250
                    }
4939
250
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4940
250
                    if (keys.empty()) return;
4941
250
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4942
250
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4943
250
                                     << instance_id_;
4944
250
                    } else {
4945
250
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4946
250
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4947
250
                                           num_recycled, start_time);
4948
250
                    }
4949
250
                },
4950
250
                0);
4951
250
        if (ret == 0) return 0;
4952
        // Submit task failed, delete rowset data in current thread
4953
4
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4954
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4955
0
            return -1;
4956
0
        }
4957
4
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4958
0
            return -1;
4959
0
        }
4960
4
        rowset_keys.push_back(std::move(key));
4961
4
        return 0;
4962
4
    };
4963
4964
22
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4965
4966
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4967
7.75k
        ++num_scanned;
4968
7.75k
        total_rowset_key_size += k.size();
4969
7.75k
        total_rowset_value_size += v.size();
4970
7.75k
        RecycleRowsetPB rowset;
4971
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4972
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4973
0
            return -1;
4974
0
        }
4975
4976
7.75k
        int64_t current_time = ::time(nullptr);
4977
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4978
4979
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4980
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4981
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4982
7.75k
        if (current_time < expiration) { // not expired
4983
0
            return 0;
4984
0
        }
4985
7.75k
        ++num_expired;
4986
7.75k
        expired_rowset_size += v.size();
4987
4988
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4989
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4990
                // in old version, keep this key-value pair and it needs to be checked manually
4991
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4992
0
                return -1;
4993
0
            }
4994
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4995
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4996
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4997
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4998
0
                rowset_keys.emplace_back(k);
4999
0
                return -1;
5000
0
            }
5001
            // decode rowset_id
5002
250
            auto k1 = k;
5003
250
            k1.remove_prefix(1);
5004
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5005
250
            decode_key(&k1, &out);
5006
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5007
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5008
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5009
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5010
250
                      << " task_type=" << metrics_context.operation_type;
5011
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5012
250
                                             rowset.tablet_id(), rowset_id) != 0) {
5013
0
                return -1;
5014
0
            }
5015
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5016
250
            metrics_context.total_recycled_num++;
5017
250
            segment_metrics_context_.total_recycled_data_size +=
5018
250
                    rowset.rowset_meta().total_disk_size();
5019
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5020
250
            return 0;
5021
250
        }
5022
5023
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
5024
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
5025
7.50k
            if (need_mark_rowset_as_recycled(rowset)) {
5026
3.75k
                rowset_keys_to_mark_recycled.emplace_back(k);
5027
3.75k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5028
3.75k
                             "at next turn, instance_id="
5029
3.75k
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5030
3.75k
                          << " version=[" << rowset_meta->start_version() << '-'
5031
3.75k
                          << rowset_meta->end_version() << "]";
5032
3.75k
                return 0;
5033
3.75k
            }
5034
7.50k
        }
5035
5036
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5037
3.75k
            rowset_meta->end_version() != 1) {
5038
3.75k
            if (make_deferred_abort_task(rowset).has_value()) {
5039
2
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5040
2
                             "instance_id="
5041
2
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5042
2
                          << " version=[" << rowset_meta->start_version() << '-'
5043
2
                          << rowset_meta->end_version() << "]";
5044
2
                rowset_keys_to_abort.emplace_back(k);
5045
2
            }
5046
3.75k
        }
5047
5048
        // TODO(plat1ko): check rowset not referenced
5049
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5050
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5051
0
                LOG_INFO("recycle rowset that has empty resource id");
5052
0
            } else {
5053
                // other situations, keep this key-value pair and it needs to be checked manually
5054
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5055
0
                return -1;
5056
0
            }
5057
0
        }
5058
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5059
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
5060
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5061
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5062
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
5063
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5064
3.75k
                  << " rowset_meta_size=" << v.size()
5065
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
5066
3.75k
                  << " task_type=" << metrics_context.operation_type;
5067
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5068
            // unable to calculate file path, can only be deleted by rowset id prefix
5069
652
            num_prepare += 1;
5070
652
            prepare_rowset_keys_to_delete.emplace_back(k);
5071
3.10k
        } else {
5072
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5073
3.10k
            rowset_keys.emplace_back(k);
5074
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
5075
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
5076
3.10k
                ++num_empty_rowset;
5077
3.10k
            }
5078
3.10k
        }
5079
3.75k
        return 0;
5080
3.75k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4966
7
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4967
7
        ++num_scanned;
4968
7
        total_rowset_key_size += k.size();
4969
7
        total_rowset_value_size += v.size();
4970
7
        RecycleRowsetPB rowset;
4971
7
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4972
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4973
0
            return -1;
4974
0
        }
4975
4976
7
        int64_t current_time = ::time(nullptr);
4977
7
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4978
4979
7
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4980
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4981
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4982
7
        if (current_time < expiration) { // not expired
4983
0
            return 0;
4984
0
        }
4985
7
        ++num_expired;
4986
7
        expired_rowset_size += v.size();
4987
4988
7
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4989
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4990
                // in old version, keep this key-value pair and it needs to be checked manually
4991
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4992
0
                return -1;
4993
0
            }
4994
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4995
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4996
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4997
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4998
0
                rowset_keys.emplace_back(k);
4999
0
                return -1;
5000
0
            }
5001
            // decode rowset_id
5002
0
            auto k1 = k;
5003
0
            k1.remove_prefix(1);
5004
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5005
0
            decode_key(&k1, &out);
5006
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5007
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5008
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5009
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5010
0
                      << " task_type=" << metrics_context.operation_type;
5011
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5012
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5013
0
                return -1;
5014
0
            }
5015
0
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5016
0
            metrics_context.total_recycled_num++;
5017
0
            segment_metrics_context_.total_recycled_data_size +=
5018
0
                    rowset.rowset_meta().total_disk_size();
5019
0
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5020
0
            return 0;
5021
0
        }
5022
5023
7
        auto* rowset_meta = rowset.mutable_rowset_meta();
5024
7
        if (config::enable_mark_delete_rowset_before_recycle) {
5025
7
            if (need_mark_rowset_as_recycled(rowset)) {
5026
5
                rowset_keys_to_mark_recycled.emplace_back(k);
5027
5
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5028
5
                             "at next turn, instance_id="
5029
5
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5030
5
                          << " version=[" << rowset_meta->start_version() << '-'
5031
5
                          << rowset_meta->end_version() << "]";
5032
5
                return 0;
5033
5
            }
5034
7
        }
5035
5036
2
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5037
2
            rowset_meta->end_version() != 1) {
5038
2
            if (make_deferred_abort_task(rowset).has_value()) {
5039
2
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5040
2
                             "instance_id="
5041
2
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5042
2
                          << " version=[" << rowset_meta->start_version() << '-'
5043
2
                          << rowset_meta->end_version() << "]";
5044
2
                rowset_keys_to_abort.emplace_back(k);
5045
2
            }
5046
2
        }
5047
5048
        // TODO(plat1ko): check rowset not referenced
5049
2
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5050
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5051
0
                LOG_INFO("recycle rowset that has empty resource id");
5052
0
            } else {
5053
                // other situations, keep this key-value pair and it needs to be checked manually
5054
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5055
0
                return -1;
5056
0
            }
5057
0
        }
5058
2
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5059
2
                  << " tablet_id=" << rowset_meta->tablet_id()
5060
2
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5061
2
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5062
2
                  << "] txn_id=" << rowset_meta->txn_id()
5063
2
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5064
2
                  << " rowset_meta_size=" << v.size()
5065
2
                  << " creation_time=" << rowset_meta->creation_time()
5066
2
                  << " task_type=" << metrics_context.operation_type;
5067
2
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5068
            // unable to calculate file path, can only be deleted by rowset id prefix
5069
2
            num_prepare += 1;
5070
2
            prepare_rowset_keys_to_delete.emplace_back(k);
5071
2
        } else {
5072
0
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5073
0
            rowset_keys.emplace_back(k);
5074
0
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
5075
0
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
5076
0
                ++num_empty_rowset;
5077
0
            }
5078
0
        }
5079
2
        return 0;
5080
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4966
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4967
7.75k
        ++num_scanned;
4968
7.75k
        total_rowset_key_size += k.size();
4969
7.75k
        total_rowset_value_size += v.size();
4970
7.75k
        RecycleRowsetPB rowset;
4971
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4972
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4973
0
            return -1;
4974
0
        }
4975
4976
7.75k
        int64_t current_time = ::time(nullptr);
4977
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4978
4979
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4980
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4981
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4982
7.75k
        if (current_time < expiration) { // not expired
4983
0
            return 0;
4984
0
        }
4985
7.75k
        ++num_expired;
4986
7.75k
        expired_rowset_size += v.size();
4987
4988
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4989
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4990
                // in old version, keep this key-value pair and it needs to be checked manually
4991
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4992
0
                return -1;
4993
0
            }
4994
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4995
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4996
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4997
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4998
0
                rowset_keys.emplace_back(k);
4999
0
                return -1;
5000
0
            }
5001
            // decode rowset_id
5002
250
            auto k1 = k;
5003
250
            k1.remove_prefix(1);
5004
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5005
250
            decode_key(&k1, &out);
5006
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5007
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5008
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5009
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5010
250
                      << " task_type=" << metrics_context.operation_type;
5011
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5012
250
                                             rowset.tablet_id(), rowset_id) != 0) {
5013
0
                return -1;
5014
0
            }
5015
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5016
250
            metrics_context.total_recycled_num++;
5017
250
            segment_metrics_context_.total_recycled_data_size +=
5018
250
                    rowset.rowset_meta().total_disk_size();
5019
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5020
250
            return 0;
5021
250
        }
5022
5023
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
5024
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
5025
7.50k
            if (need_mark_rowset_as_recycled(rowset)) {
5026
3.75k
                rowset_keys_to_mark_recycled.emplace_back(k);
5027
3.75k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5028
3.75k
                             "at next turn, instance_id="
5029
3.75k
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5030
3.75k
                          << " version=[" << rowset_meta->start_version() << '-'
5031
3.75k
                          << rowset_meta->end_version() << "]";
5032
3.75k
                return 0;
5033
3.75k
            }
5034
7.50k
        }
5035
5036
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5037
3.75k
            rowset_meta->end_version() != 1) {
5038
3.75k
            if (make_deferred_abort_task(rowset).has_value()) {
5039
0
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5040
0
                             "instance_id="
5041
0
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5042
0
                          << " version=[" << rowset_meta->start_version() << '-'
5043
0
                          << rowset_meta->end_version() << "]";
5044
0
                rowset_keys_to_abort.emplace_back(k);
5045
0
            }
5046
3.75k
        }
5047
5048
        // TODO(plat1ko): check rowset not referenced
5049
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5050
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5051
0
                LOG_INFO("recycle rowset that has empty resource id");
5052
0
            } else {
5053
                // other situations, keep this key-value pair and it needs to be checked manually
5054
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5055
0
                return -1;
5056
0
            }
5057
0
        }
5058
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5059
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
5060
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5061
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5062
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
5063
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5064
3.75k
                  << " rowset_meta_size=" << v.size()
5065
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
5066
3.75k
                  << " task_type=" << metrics_context.operation_type;
5067
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5068
            // unable to calculate file path, can only be deleted by rowset id prefix
5069
650
            num_prepare += 1;
5070
650
            prepare_rowset_keys_to_delete.emplace_back(k);
5071
3.10k
        } else {
5072
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5073
3.10k
            rowset_keys.emplace_back(k);
5074
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
5075
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
5076
3.10k
                ++num_empty_rowset;
5077
3.10k
            }
5078
3.10k
        }
5079
3.75k
        return 0;
5080
3.75k
    };
5081
5082
49
    auto loop_done = [&]() -> int {
5083
49
        std::vector<std::string> rowset_keys_to_delete;
5084
49
        std::vector<std::string> mark_keys_to_process;
5085
49
        std::vector<std::string> abort_keys_to_process;
5086
49
        std::vector<std::string> prepare_keys_to_process;
5087
        // rowset_id -> rowset_meta
5088
        // store rowset id and meta for statistics rs size when delete
5089
49
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5090
49
        rowset_keys_to_delete.swap(rowset_keys);
5091
49
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5092
49
        abort_keys_to_process.swap(rowset_keys_to_abort);
5093
49
        prepare_keys_to_process.swap(prepare_rowset_keys_to_delete);
5094
49
        rowsets_to_delete.swap(rowsets);
5095
49
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
5096
49
                             rowsets_to_delete = std::move(rowsets_to_delete),
5097
49
                             prepare_keys_to_process = std::move(prepare_keys_to_process),
5098
49
                             mark_keys_to_process = std::move(mark_keys_to_process),
5099
49
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5100
49
            if (!mark_keys_to_process.empty() &&
5101
49
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5102
26
                                                                mark_keys_to_process) != 0) {
5103
0
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5104
0
                             << instance_id_;
5105
0
                return;
5106
0
            }
5107
49
            if (!abort_keys_to_process.empty() &&
5108
49
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5109
2
                        0) {
5110
0
                return;
5111
0
            }
5112
49
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5113
49
            if (!prepare_keys_to_process.empty() &&
5114
49
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5115
23
                                             &prepare_delete_tasks) != 0) {
5116
0
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5117
0
                             << instance_id_;
5118
0
                return;
5119
0
            }
5120
49
            if (!prepare_delete_tasks.empty()) {
5121
23
                std::vector<std::string> prepare_rowset_keys_to_delete;
5122
23
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5123
652
                for (const auto& task : prepare_delete_tasks) {
5124
652
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5125
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5126
0
                        return;
5127
0
                    }
5128
652
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5129
0
                        return;
5130
0
                    }
5131
652
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5132
652
                }
5133
23
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5134
0
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5135
0
                                 << instance_id_;
5136
0
                    return;
5137
0
                }
5138
23
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5139
23
                                       std::memory_order_relaxed);
5140
23
            }
5141
49
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5142
49
                                   metrics_context) != 0) {
5143
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5144
0
                return;
5145
0
            }
5146
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
5147
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5148
0
                    return;
5149
0
                }
5150
3.10k
            }
5151
49
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5152
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5153
0
                return;
5154
0
            }
5155
49
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5156
49
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5099
7
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5100
7
            if (!mark_keys_to_process.empty() &&
5101
7
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5102
5
                                                                mark_keys_to_process) != 0) {
5103
0
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5104
0
                             << instance_id_;
5105
0
                return;
5106
0
            }
5107
7
            if (!abort_keys_to_process.empty() &&
5108
7
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5109
2
                        0) {
5110
0
                return;
5111
0
            }
5112
7
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5113
7
            if (!prepare_keys_to_process.empty() &&
5114
7
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5115
2
                                             &prepare_delete_tasks) != 0) {
5116
0
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5117
0
                             << instance_id_;
5118
0
                return;
5119
0
            }
5120
7
            if (!prepare_delete_tasks.empty()) {
5121
2
                std::vector<std::string> prepare_rowset_keys_to_delete;
5122
2
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5123
2
                for (const auto& task : prepare_delete_tasks) {
5124
2
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5125
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5126
0
                        return;
5127
0
                    }
5128
2
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5129
0
                        return;
5130
0
                    }
5131
2
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5132
2
                }
5133
2
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5134
0
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5135
0
                                 << instance_id_;
5136
0
                    return;
5137
0
                }
5138
2
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5139
2
                                       std::memory_order_relaxed);
5140
2
            }
5141
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5142
7
                                   metrics_context) != 0) {
5143
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5144
0
                return;
5145
0
            }
5146
7
            for (const auto& [_, rs] : rowsets_to_delete) {
5147
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5148
0
                    return;
5149
0
                }
5150
0
            }
5151
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5152
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5153
0
                return;
5154
0
            }
5155
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5156
7
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5099
42
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5100
42
            if (!mark_keys_to_process.empty() &&
5101
42
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5102
21
                                                                mark_keys_to_process) != 0) {
5103
0
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5104
0
                             << instance_id_;
5105
0
                return;
5106
0
            }
5107
42
            if (!abort_keys_to_process.empty() &&
5108
42
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5109
0
                        0) {
5110
0
                return;
5111
0
            }
5112
42
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5113
42
            if (!prepare_keys_to_process.empty() &&
5114
42
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5115
21
                                             &prepare_delete_tasks) != 0) {
5116
0
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5117
0
                             << instance_id_;
5118
0
                return;
5119
0
            }
5120
42
            if (!prepare_delete_tasks.empty()) {
5121
21
                std::vector<std::string> prepare_rowset_keys_to_delete;
5122
21
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5123
650
                for (const auto& task : prepare_delete_tasks) {
5124
650
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5125
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5126
0
                        return;
5127
0
                    }
5128
650
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5129
0
                        return;
5130
0
                    }
5131
650
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5132
650
                }
5133
21
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5134
0
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5135
0
                                 << instance_id_;
5136
0
                    return;
5137
0
                }
5138
21
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5139
21
                                       std::memory_order_relaxed);
5140
21
            }
5141
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5142
42
                                   metrics_context) != 0) {
5143
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5144
0
                return;
5145
0
            }
5146
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
5147
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5148
0
                    return;
5149
0
                }
5150
3.10k
            }
5151
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5152
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5153
0
                return;
5154
0
            }
5155
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5156
42
        });
5157
49
        return 0;
5158
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
5082
7
    auto loop_done = [&]() -> int {
5083
7
        std::vector<std::string> rowset_keys_to_delete;
5084
7
        std::vector<std::string> mark_keys_to_process;
5085
7
        std::vector<std::string> abort_keys_to_process;
5086
7
        std::vector<std::string> prepare_keys_to_process;
5087
        // rowset_id -> rowset_meta
5088
        // store rowset id and meta for statistics rs size when delete
5089
7
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5090
7
        rowset_keys_to_delete.swap(rowset_keys);
5091
7
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5092
7
        abort_keys_to_process.swap(rowset_keys_to_abort);
5093
7
        prepare_keys_to_process.swap(prepare_rowset_keys_to_delete);
5094
7
        rowsets_to_delete.swap(rowsets);
5095
7
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
5096
7
                             rowsets_to_delete = std::move(rowsets_to_delete),
5097
7
                             prepare_keys_to_process = std::move(prepare_keys_to_process),
5098
7
                             mark_keys_to_process = std::move(mark_keys_to_process),
5099
7
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5100
7
            if (!mark_keys_to_process.empty() &&
5101
7
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5102
7
                                                                mark_keys_to_process) != 0) {
5103
7
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5104
7
                             << instance_id_;
5105
7
                return;
5106
7
            }
5107
7
            if (!abort_keys_to_process.empty() &&
5108
7
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5109
7
                        0) {
5110
7
                return;
5111
7
            }
5112
7
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5113
7
            if (!prepare_keys_to_process.empty() &&
5114
7
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5115
7
                                             &prepare_delete_tasks) != 0) {
5116
7
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5117
7
                             << instance_id_;
5118
7
                return;
5119
7
            }
5120
7
            if (!prepare_delete_tasks.empty()) {
5121
7
                std::vector<std::string> prepare_rowset_keys_to_delete;
5122
7
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5123
7
                for (const auto& task : prepare_delete_tasks) {
5124
7
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5125
7
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5126
7
                        return;
5127
7
                    }
5128
7
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5129
7
                        return;
5130
7
                    }
5131
7
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5132
7
                }
5133
7
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5134
7
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5135
7
                                 << instance_id_;
5136
7
                    return;
5137
7
                }
5138
7
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5139
7
                                       std::memory_order_relaxed);
5140
7
            }
5141
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5142
7
                                   metrics_context) != 0) {
5143
7
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5144
7
                return;
5145
7
            }
5146
7
            for (const auto& [_, rs] : rowsets_to_delete) {
5147
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5148
7
                    return;
5149
7
                }
5150
7
            }
5151
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5152
7
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5153
7
                return;
5154
7
            }
5155
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5156
7
        });
5157
7
        return 0;
5158
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
5082
42
    auto loop_done = [&]() -> int {
5083
42
        std::vector<std::string> rowset_keys_to_delete;
5084
42
        std::vector<std::string> mark_keys_to_process;
5085
42
        std::vector<std::string> abort_keys_to_process;
5086
42
        std::vector<std::string> prepare_keys_to_process;
5087
        // rowset_id -> rowset_meta
5088
        // store rowset id and meta for statistics rs size when delete
5089
42
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5090
42
        rowset_keys_to_delete.swap(rowset_keys);
5091
42
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5092
42
        abort_keys_to_process.swap(rowset_keys_to_abort);
5093
42
        prepare_keys_to_process.swap(prepare_rowset_keys_to_delete);
5094
42
        rowsets_to_delete.swap(rowsets);
5095
42
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
5096
42
                             rowsets_to_delete = std::move(rowsets_to_delete),
5097
42
                             prepare_keys_to_process = std::move(prepare_keys_to_process),
5098
42
                             mark_keys_to_process = std::move(mark_keys_to_process),
5099
42
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5100
42
            if (!mark_keys_to_process.empty() &&
5101
42
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5102
42
                                                                mark_keys_to_process) != 0) {
5103
42
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5104
42
                             << instance_id_;
5105
42
                return;
5106
42
            }
5107
42
            if (!abort_keys_to_process.empty() &&
5108
42
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5109
42
                        0) {
5110
42
                return;
5111
42
            }
5112
42
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5113
42
            if (!prepare_keys_to_process.empty() &&
5114
42
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5115
42
                                             &prepare_delete_tasks) != 0) {
5116
42
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5117
42
                             << instance_id_;
5118
42
                return;
5119
42
            }
5120
42
            if (!prepare_delete_tasks.empty()) {
5121
42
                std::vector<std::string> prepare_rowset_keys_to_delete;
5122
42
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5123
42
                for (const auto& task : prepare_delete_tasks) {
5124
42
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5125
42
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5126
42
                        return;
5127
42
                    }
5128
42
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5129
42
                        return;
5130
42
                    }
5131
42
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5132
42
                }
5133
42
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5134
42
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5135
42
                                 << instance_id_;
5136
42
                    return;
5137
42
                }
5138
42
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5139
42
                                       std::memory_order_relaxed);
5140
42
            }
5141
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5142
42
                                   metrics_context) != 0) {
5143
42
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5144
42
                return;
5145
42
            }
5146
42
            for (const auto& [_, rs] : rowsets_to_delete) {
5147
42
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5148
42
                    return;
5149
42
                }
5150
42
            }
5151
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5152
42
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5153
42
                return;
5154
42
            }
5155
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5156
42
        });
5157
42
        return 0;
5158
42
    };
5159
5160
22
    if (config::enable_recycler_stats_metrics) {
5161
0
        scan_and_statistics_rowsets();
5162
0
    }
5163
    // recycle_func and loop_done for scan and recycle
5164
22
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5165
22
                               std::move(loop_done));
5166
5167
22
    worker_pool->stop();
5168
5169
22
    if (!async_recycled_rowset_keys.empty()) {
5170
1
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5171
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5172
0
            return -1;
5173
1
        } else {
5174
1
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5175
1
        }
5176
1
    }
5177
5178
    // Report final metrics after all concurrent tasks completed
5179
22
    segment_metrics_context_.report();
5180
22
    metrics_context.report();
5181
5182
22
    return ret;
5183
22
}
5184
5185
13
int InstanceRecycler::recycle_restore_jobs() {
5186
13
    const std::string task_name = "recycle_restore_jobs";
5187
13
    int64_t num_scanned = 0;
5188
13
    int64_t num_expired = 0;
5189
13
    int64_t num_recycled = 0;
5190
13
    int64_t num_aborted = 0;
5191
5192
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5193
5194
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
5195
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
5196
13
    std::string restore_job_key0;
5197
13
    std::string restore_job_key1;
5198
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
5199
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
5200
5201
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
5202
5203
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5204
13
    register_recycle_task(task_name, start_time);
5205
5206
13
    DORIS_CLOUD_DEFER {
5207
13
        unregister_recycle_task(task_name);
5208
13
        int64_t cost =
5209
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5210
13
        metrics_context.finish_report();
5211
5212
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5213
13
                .tag("instance_id", instance_id_)
5214
13
                .tag("num_scanned", num_scanned)
5215
13
                .tag("num_expired", num_expired)
5216
13
                .tag("num_recycled", num_recycled)
5217
13
                .tag("num_aborted", num_aborted);
5218
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
5206
13
    DORIS_CLOUD_DEFER {
5207
13
        unregister_recycle_task(task_name);
5208
13
        int64_t cost =
5209
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5210
13
        metrics_context.finish_report();
5211
5212
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5213
13
                .tag("instance_id", instance_id_)
5214
13
                .tag("num_scanned", num_scanned)
5215
13
                .tag("num_expired", num_expired)
5216
13
                .tag("num_recycled", num_recycled)
5217
13
                .tag("num_aborted", num_aborted);
5218
13
    };
5219
5220
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5221
5222
13
    std::vector<std::string_view> restore_job_keys;
5223
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5224
41
        ++num_scanned;
5225
41
        RestoreJobCloudPB restore_job_pb;
5226
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5227
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5228
0
            return -1;
5229
0
        }
5230
41
        int64_t expiration =
5231
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5232
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5233
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5234
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5235
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5236
0
                   << " state=" << restore_job_pb.state();
5237
41
        int64_t current_time = ::time(nullptr);
5238
41
        if (current_time < expiration) { // not expired
5239
0
            return 0;
5240
0
        }
5241
41
        ++num_expired;
5242
5243
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5244
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5245
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5246
5247
41
        std::unique_ptr<Transaction> txn;
5248
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5249
41
        if (err != TxnErrorCode::TXN_OK) {
5250
0
            LOG_WARNING("failed to recycle restore job")
5251
0
                    .tag("err", err)
5252
0
                    .tag("tablet id", tablet_id)
5253
0
                    .tag("instance_id", instance_id_)
5254
0
                    .tag("reason", "failed to create txn");
5255
0
            return -1;
5256
0
        }
5257
5258
41
        std::string val;
5259
41
        err = txn->get(k, &val);
5260
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5261
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5262
0
            return 0;
5263
0
        }
5264
41
        if (err != TxnErrorCode::TXN_OK) {
5265
0
            LOG_WARNING("failed to get kv");
5266
0
            return -1;
5267
0
        }
5268
41
        restore_job_pb.Clear();
5269
41
        if (!restore_job_pb.ParseFromString(val)) {
5270
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5271
0
            return -1;
5272
0
        }
5273
5274
        // PREPARED or COMMITTED, change state to DROPPED and return
5275
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5276
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5277
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5278
0
            restore_job_pb.set_need_recycle_data(true);
5279
0
            txn->put(k, restore_job_pb.SerializeAsString());
5280
0
            err = txn->commit();
5281
0
            if (err != TxnErrorCode::TXN_OK) {
5282
0
                LOG_WARNING("failed to commit txn: {}", err);
5283
0
                return -1;
5284
0
            }
5285
0
            num_aborted++;
5286
0
            return 0;
5287
0
        }
5288
5289
        // Change state to RECYCLING
5290
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5291
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5292
21
            txn->put(k, restore_job_pb.SerializeAsString());
5293
21
            err = txn->commit();
5294
21
            if (err != TxnErrorCode::TXN_OK) {
5295
0
                LOG_WARNING("failed to commit txn: {}", err);
5296
0
                return -1;
5297
0
            }
5298
21
            return 0;
5299
21
        }
5300
5301
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5302
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5303
5304
        // Recycle all data associated with the restore job.
5305
        // This includes rowsets, segments, and related resources.
5306
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5307
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5308
0
            LOG_WARNING("failed to recycle tablet")
5309
0
                    .tag("tablet_id", tablet_id)
5310
0
                    .tag("instance_id", instance_id_);
5311
0
            return -1;
5312
0
        }
5313
5314
        // delete all restore job rowset kv
5315
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5316
5317
20
        err = txn->commit();
5318
20
        if (err != TxnErrorCode::TXN_OK) {
5319
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5320
0
                    .tag("err", err)
5321
0
                    .tag("tablet id", tablet_id)
5322
0
                    .tag("instance_id", instance_id_)
5323
0
                    .tag("reason", "failed to commit txn");
5324
0
            return -1;
5325
0
        }
5326
5327
20
        metrics_context.total_recycled_num = ++num_recycled;
5328
20
        metrics_context.report();
5329
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5330
20
        restore_job_keys.push_back(k);
5331
5332
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5333
20
                  << " tablet_id=" << tablet_id;
5334
20
        return 0;
5335
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5223
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5224
41
        ++num_scanned;
5225
41
        RestoreJobCloudPB restore_job_pb;
5226
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5227
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5228
0
            return -1;
5229
0
        }
5230
41
        int64_t expiration =
5231
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5232
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5233
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5234
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5235
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5236
0
                   << " state=" << restore_job_pb.state();
5237
41
        int64_t current_time = ::time(nullptr);
5238
41
        if (current_time < expiration) { // not expired
5239
0
            return 0;
5240
0
        }
5241
41
        ++num_expired;
5242
5243
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5244
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5245
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5246
5247
41
        std::unique_ptr<Transaction> txn;
5248
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5249
41
        if (err != TxnErrorCode::TXN_OK) {
5250
0
            LOG_WARNING("failed to recycle restore job")
5251
0
                    .tag("err", err)
5252
0
                    .tag("tablet id", tablet_id)
5253
0
                    .tag("instance_id", instance_id_)
5254
0
                    .tag("reason", "failed to create txn");
5255
0
            return -1;
5256
0
        }
5257
5258
41
        std::string val;
5259
41
        err = txn->get(k, &val);
5260
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5261
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5262
0
            return 0;
5263
0
        }
5264
41
        if (err != TxnErrorCode::TXN_OK) {
5265
0
            LOG_WARNING("failed to get kv");
5266
0
            return -1;
5267
0
        }
5268
41
        restore_job_pb.Clear();
5269
41
        if (!restore_job_pb.ParseFromString(val)) {
5270
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5271
0
            return -1;
5272
0
        }
5273
5274
        // PREPARED or COMMITTED, change state to DROPPED and return
5275
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5276
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5277
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5278
0
            restore_job_pb.set_need_recycle_data(true);
5279
0
            txn->put(k, restore_job_pb.SerializeAsString());
5280
0
            err = txn->commit();
5281
0
            if (err != TxnErrorCode::TXN_OK) {
5282
0
                LOG_WARNING("failed to commit txn: {}", err);
5283
0
                return -1;
5284
0
            }
5285
0
            num_aborted++;
5286
0
            return 0;
5287
0
        }
5288
5289
        // Change state to RECYCLING
5290
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5291
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5292
21
            txn->put(k, restore_job_pb.SerializeAsString());
5293
21
            err = txn->commit();
5294
21
            if (err != TxnErrorCode::TXN_OK) {
5295
0
                LOG_WARNING("failed to commit txn: {}", err);
5296
0
                return -1;
5297
0
            }
5298
21
            return 0;
5299
21
        }
5300
5301
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5302
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5303
5304
        // Recycle all data associated with the restore job.
5305
        // This includes rowsets, segments, and related resources.
5306
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5307
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5308
0
            LOG_WARNING("failed to recycle tablet")
5309
0
                    .tag("tablet_id", tablet_id)
5310
0
                    .tag("instance_id", instance_id_);
5311
0
            return -1;
5312
0
        }
5313
5314
        // delete all restore job rowset kv
5315
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5316
5317
20
        err = txn->commit();
5318
20
        if (err != TxnErrorCode::TXN_OK) {
5319
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5320
0
                    .tag("err", err)
5321
0
                    .tag("tablet id", tablet_id)
5322
0
                    .tag("instance_id", instance_id_)
5323
0
                    .tag("reason", "failed to commit txn");
5324
0
            return -1;
5325
0
        }
5326
5327
20
        metrics_context.total_recycled_num = ++num_recycled;
5328
20
        metrics_context.report();
5329
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5330
20
        restore_job_keys.push_back(k);
5331
5332
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5333
20
                  << " tablet_id=" << tablet_id;
5334
20
        return 0;
5335
20
    };
5336
5337
13
    auto loop_done = [&restore_job_keys, this]() -> int {
5338
3
        if (restore_job_keys.empty()) return 0;
5339
1
        DORIS_CLOUD_DEFER {
5340
1
            restore_job_keys.clear();
5341
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5339
1
        DORIS_CLOUD_DEFER {
5340
1
            restore_job_keys.clear();
5341
1
        };
5342
5343
1
        std::unique_ptr<Transaction> txn;
5344
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5345
1
        if (err != TxnErrorCode::TXN_OK) {
5346
0
            LOG_WARNING("failed to recycle restore job")
5347
0
                    .tag("err", err)
5348
0
                    .tag("instance_id", instance_id_)
5349
0
                    .tag("reason", "failed to create txn");
5350
0
            return -1;
5351
0
        }
5352
20
        for (auto& k : restore_job_keys) {
5353
20
            txn->remove(k);
5354
20
        }
5355
1
        err = txn->commit();
5356
1
        if (err != TxnErrorCode::TXN_OK) {
5357
0
            LOG_WARNING("failed to recycle restore job")
5358
0
                    .tag("err", err)
5359
0
                    .tag("instance_id", instance_id_)
5360
0
                    .tag("reason", "failed to commit txn");
5361
0
            return -1;
5362
0
        }
5363
1
        return 0;
5364
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
5337
3
    auto loop_done = [&restore_job_keys, this]() -> int {
5338
3
        if (restore_job_keys.empty()) return 0;
5339
1
        DORIS_CLOUD_DEFER {
5340
1
            restore_job_keys.clear();
5341
1
        };
5342
5343
1
        std::unique_ptr<Transaction> txn;
5344
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5345
1
        if (err != TxnErrorCode::TXN_OK) {
5346
0
            LOG_WARNING("failed to recycle restore job")
5347
0
                    .tag("err", err)
5348
0
                    .tag("instance_id", instance_id_)
5349
0
                    .tag("reason", "failed to create txn");
5350
0
            return -1;
5351
0
        }
5352
20
        for (auto& k : restore_job_keys) {
5353
20
            txn->remove(k);
5354
20
        }
5355
1
        err = txn->commit();
5356
1
        if (err != TxnErrorCode::TXN_OK) {
5357
0
            LOG_WARNING("failed to recycle restore job")
5358
0
                    .tag("err", err)
5359
0
                    .tag("instance_id", instance_id_)
5360
0
                    .tag("reason", "failed to commit txn");
5361
0
            return -1;
5362
0
        }
5363
1
        return 0;
5364
1
    };
5365
5366
13
    if (config::enable_recycler_stats_metrics) {
5367
0
        scan_and_statistics_restore_jobs();
5368
0
    }
5369
5370
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
5371
13
                            std::move(loop_done));
5372
13
}
5373
5374
10
int InstanceRecycler::recycle_versioned_rowsets() {
5375
10
    const std::string task_name = "recycle_rowsets";
5376
10
    int64_t num_scanned = 0;
5377
10
    int64_t num_expired = 0;
5378
10
    int64_t num_prepare = 0;
5379
10
    int64_t num_compacted = 0;
5380
10
    int64_t num_empty_rowset = 0;
5381
10
    size_t total_rowset_key_size = 0;
5382
10
    size_t total_rowset_value_size = 0;
5383
10
    size_t expired_rowset_size = 0;
5384
10
    std::atomic_long num_recycled = 0;
5385
10
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5386
5387
10
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
5388
10
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
5389
10
    std::string recyc_rs_key0;
5390
10
    std::string recyc_rs_key1;
5391
10
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
5392
10
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
5393
5394
10
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
5395
5396
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5397
10
    register_recycle_task(task_name, start_time);
5398
5399
10
    DORIS_CLOUD_DEFER {
5400
10
        unregister_recycle_task(task_name);
5401
10
        int64_t cost =
5402
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5403
10
        metrics_context.finish_report();
5404
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5405
10
                .tag("instance_id", instance_id_)
5406
10
                .tag("num_scanned", num_scanned)
5407
10
                .tag("num_expired", num_expired)
5408
10
                .tag("num_recycled", num_recycled)
5409
10
                .tag("num_recycled.prepare", num_prepare)
5410
10
                .tag("num_recycled.compacted", num_compacted)
5411
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5412
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5413
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5414
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5415
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
5399
10
    DORIS_CLOUD_DEFER {
5400
10
        unregister_recycle_task(task_name);
5401
10
        int64_t cost =
5402
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5403
10
        metrics_context.finish_report();
5404
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5405
10
                .tag("instance_id", instance_id_)
5406
10
                .tag("num_scanned", num_scanned)
5407
10
                .tag("num_expired", num_expired)
5408
10
                .tag("num_recycled", num_recycled)
5409
10
                .tag("num_recycled.prepare", num_prepare)
5410
10
                .tag("num_recycled.compacted", num_compacted)
5411
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5412
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5413
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5414
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5415
10
    };
5416
5417
10
    std::vector<std::string> orphan_rowset_keys;
5418
5419
    // Store keys of rowset recycled by background workers
5420
10
    std::mutex async_recycled_rowset_keys_mutex;
5421
10
    std::vector<std::string> async_recycled_rowset_keys;
5422
10
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5423
10
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
5424
10
    worker_pool->start();
5425
10
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
5426
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5427
        // Try to delete rowset data in background thread
5428
400
        int ret = worker_pool->submit_with_timeout(
5429
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5430
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5431
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5432
400
                        return;
5433
400
                    }
5434
                    // The async recycled rowsets are staled format or has not been used,
5435
                    // so we don't need to check the rowset ref count key.
5436
0
                    std::vector<std::string> keys;
5437
0
                    {
5438
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5439
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5440
0
                        if (async_recycled_rowset_keys.size() > 100) {
5441
0
                            keys.swap(async_recycled_rowset_keys);
5442
0
                        }
5443
0
                    }
5444
0
                    if (keys.empty()) return;
5445
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5446
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5447
0
                                     << instance_id_;
5448
0
                    } else {
5449
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5450
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5451
0
                                           num_recycled, start_time);
5452
0
                    }
5453
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5429
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5430
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5431
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5432
400
                        return;
5433
400
                    }
5434
                    // The async recycled rowsets are staled format or has not been used,
5435
                    // so we don't need to check the rowset ref count key.
5436
0
                    std::vector<std::string> keys;
5437
0
                    {
5438
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5439
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5440
0
                        if (async_recycled_rowset_keys.size() > 100) {
5441
0
                            keys.swap(async_recycled_rowset_keys);
5442
0
                        }
5443
0
                    }
5444
0
                    if (keys.empty()) return;
5445
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5446
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5447
0
                                     << instance_id_;
5448
0
                    } else {
5449
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5450
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5451
0
                                           num_recycled, start_time);
5452
0
                    }
5453
0
                },
5454
400
                0);
5455
400
        if (ret == 0) return 0;
5456
        // Submit task failed, delete rowset data in current thread
5457
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5458
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5459
0
            return -1;
5460
0
        }
5461
0
        orphan_rowset_keys.push_back(std::move(key));
5462
0
        return 0;
5463
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5426
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5427
        // Try to delete rowset data in background thread
5428
400
        int ret = worker_pool->submit_with_timeout(
5429
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5430
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5431
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5432
400
                        return;
5433
400
                    }
5434
                    // The async recycled rowsets are staled format or has not been used,
5435
                    // so we don't need to check the rowset ref count key.
5436
400
                    std::vector<std::string> keys;
5437
400
                    {
5438
400
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5439
400
                        async_recycled_rowset_keys.push_back(std::move(key));
5440
400
                        if (async_recycled_rowset_keys.size() > 100) {
5441
400
                            keys.swap(async_recycled_rowset_keys);
5442
400
                        }
5443
400
                    }
5444
400
                    if (keys.empty()) return;
5445
400
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5446
400
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5447
400
                                     << instance_id_;
5448
400
                    } else {
5449
400
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5450
400
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5451
400
                                           num_recycled, start_time);
5452
400
                    }
5453
400
                },
5454
400
                0);
5455
400
        if (ret == 0) return 0;
5456
        // Submit task failed, delete rowset data in current thread
5457
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5458
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5459
0
            return -1;
5460
0
        }
5461
0
        orphan_rowset_keys.push_back(std::move(key));
5462
0
        return 0;
5463
0
    };
5464
5465
10
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5466
5467
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5468
2.01k
        ++num_scanned;
5469
2.01k
        total_rowset_key_size += k.size();
5470
2.01k
        total_rowset_value_size += v.size();
5471
2.01k
        RecycleRowsetPB rowset;
5472
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5473
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5474
0
            return -1;
5475
0
        }
5476
5477
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5478
5479
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5480
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5481
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5482
2.01k
        int64_t current_time = ::time(nullptr);
5483
2.01k
        if (current_time < final_expiration) { // not expired
5484
0
            return 0;
5485
0
        }
5486
2.01k
        ++num_expired;
5487
2.01k
        expired_rowset_size += v.size();
5488
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5489
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5490
                // in old version, keep this key-value pair and it needs to be checked manually
5491
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5492
0
                return -1;
5493
0
            }
5494
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5495
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5496
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5497
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5498
0
                orphan_rowset_keys.emplace_back(k);
5499
0
                return -1;
5500
0
            }
5501
            // decode rowset_id
5502
0
            auto k1 = k;
5503
0
            k1.remove_prefix(1);
5504
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5505
0
            decode_key(&k1, &out);
5506
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5507
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5508
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5509
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5510
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5511
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5512
0
                return -1;
5513
0
            }
5514
0
            return 0;
5515
0
        }
5516
        // TODO(plat1ko): check rowset not referenced
5517
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5518
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5519
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5520
0
                LOG_INFO("recycle rowset that has empty resource id");
5521
0
            } else {
5522
                // other situations, keep this key-value pair and it needs to be checked manually
5523
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5524
0
                return -1;
5525
0
            }
5526
0
        }
5527
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5528
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5529
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5530
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5531
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5532
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5533
2.01k
                  << " rowset_meta_size=" << v.size()
5534
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5535
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5536
            // unable to calculate file path, can only be deleted by rowset id prefix
5537
400
            num_prepare += 1;
5538
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5539
400
                                             rowset_meta->tablet_id(),
5540
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5541
0
                return -1;
5542
0
            }
5543
1.61k
        } else {
5544
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5545
1.61k
            worker_pool->submit(
5546
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5547
                        // The load & compact rowset keys are recycled during recycling operation logs.
5548
1.61k
                        RowsetDeleteTask task;
5549
1.61k
                        task.rowset_meta = rowset_meta;
5550
1.61k
                        task.recycle_rowset_key = k;
5551
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5552
1.59k
                            return;
5553
1.59k
                        }
5554
14
                        num_compacted += is_compacted;
5555
14
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5556
14
                        if (rowset_meta.num_segments() == 0) {
5557
0
                            ++num_empty_rowset;
5558
0
                        }
5559
14
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
5546
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5547
                        // The load & compact rowset keys are recycled during recycling operation logs.
5548
1.61k
                        RowsetDeleteTask task;
5549
1.61k
                        task.rowset_meta = rowset_meta;
5550
1.61k
                        task.recycle_rowset_key = k;
5551
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5552
1.59k
                            return;
5553
1.59k
                        }
5554
14
                        num_compacted += is_compacted;
5555
14
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5556
14
                        if (rowset_meta.num_segments() == 0) {
5557
0
                            ++num_empty_rowset;
5558
0
                        }
5559
14
                    });
5560
1.61k
        }
5561
2.01k
        return 0;
5562
2.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5467
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5468
2.01k
        ++num_scanned;
5469
2.01k
        total_rowset_key_size += k.size();
5470
2.01k
        total_rowset_value_size += v.size();
5471
2.01k
        RecycleRowsetPB rowset;
5472
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5473
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5474
0
            return -1;
5475
0
        }
5476
5477
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5478
5479
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5480
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5481
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5482
2.01k
        int64_t current_time = ::time(nullptr);
5483
2.01k
        if (current_time < final_expiration) { // not expired
5484
0
            return 0;
5485
0
        }
5486
2.01k
        ++num_expired;
5487
2.01k
        expired_rowset_size += v.size();
5488
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5489
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5490
                // in old version, keep this key-value pair and it needs to be checked manually
5491
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5492
0
                return -1;
5493
0
            }
5494
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5495
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5496
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5497
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5498
0
                orphan_rowset_keys.emplace_back(k);
5499
0
                return -1;
5500
0
            }
5501
            // decode rowset_id
5502
0
            auto k1 = k;
5503
0
            k1.remove_prefix(1);
5504
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5505
0
            decode_key(&k1, &out);
5506
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5507
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5508
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5509
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5510
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5511
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5512
0
                return -1;
5513
0
            }
5514
0
            return 0;
5515
0
        }
5516
        // TODO(plat1ko): check rowset not referenced
5517
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5518
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5519
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5520
0
                LOG_INFO("recycle rowset that has empty resource id");
5521
0
            } else {
5522
                // other situations, keep this key-value pair and it needs to be checked manually
5523
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5524
0
                return -1;
5525
0
            }
5526
0
        }
5527
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5528
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5529
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5530
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5531
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5532
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5533
2.01k
                  << " rowset_meta_size=" << v.size()
5534
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5535
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5536
            // unable to calculate file path, can only be deleted by rowset id prefix
5537
400
            num_prepare += 1;
5538
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5539
400
                                             rowset_meta->tablet_id(),
5540
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5541
0
                return -1;
5542
0
            }
5543
1.61k
        } else {
5544
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5545
1.61k
            worker_pool->submit(
5546
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5547
                        // The load & compact rowset keys are recycled during recycling operation logs.
5548
1.61k
                        RowsetDeleteTask task;
5549
1.61k
                        task.rowset_meta = rowset_meta;
5550
1.61k
                        task.recycle_rowset_key = k;
5551
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5552
1.61k
                            return;
5553
1.61k
                        }
5554
1.61k
                        num_compacted += is_compacted;
5555
1.61k
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5556
1.61k
                        if (rowset_meta.num_segments() == 0) {
5557
1.61k
                            ++num_empty_rowset;
5558
1.61k
                        }
5559
1.61k
                    });
5560
1.61k
        }
5561
2.01k
        return 0;
5562
2.01k
    };
5563
5564
10
    if (config::enable_recycler_stats_metrics) {
5565
0
        scan_and_statistics_rowsets();
5566
0
    }
5567
5568
10
    auto loop_done = [&]() -> int {
5569
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5570
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5571
0
        }
5572
6
        orphan_rowset_keys.clear();
5573
6
        return 0;
5574
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
5568
6
    auto loop_done = [&]() -> int {
5569
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5570
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5571
0
        }
5572
6
        orphan_rowset_keys.clear();
5573
6
        return 0;
5574
6
    };
5575
5576
    // recycle_func and loop_done for scan and recycle
5577
10
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5578
10
                               std::move(loop_done));
5579
5580
10
    worker_pool->stop();
5581
5582
10
    if (!async_recycled_rowset_keys.empty()) {
5583
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5584
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5585
0
            return -1;
5586
0
        } else {
5587
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5588
0
        }
5589
0
    }
5590
5591
    // Report final metrics after all concurrent tasks completed
5592
10
    segment_metrics_context_.report();
5593
10
    metrics_context.report();
5594
5595
10
    return ret;
5596
10
}
5597
5598
1.61k
int InstanceRecycler::recycle_rowset_meta_and_data(const RowsetDeleteTask& task) {
5599
1.61k
    constexpr int MAX_RETRY = 10;
5600
1.61k
    const RowsetMetaCloudPB& rowset_meta = task.rowset_meta;
5601
1.61k
    int64_t tablet_id = rowset_meta.tablet_id();
5602
1.61k
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
5603
1.61k
    std::string_view reference_instance_id = instance_id_;
5604
1.61k
    if (rowset_meta.has_reference_instance_id()) {
5605
8
        reference_instance_id = rowset_meta.reference_instance_id();
5606
8
    }
5607
5608
1.61k
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
5609
1.61k
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
5610
1.61k
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(task.recycle_rowset_key));
5611
1.61k
    AnnotateTag instance_id_tag("instance_id", instance_id_);
5612
1.61k
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
5613
1.61k
    for (int i = 0; i < MAX_RETRY; ++i) {
5614
1.61k
        std::unique_ptr<Transaction> txn;
5615
1.61k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5616
1.61k
        if (err != TxnErrorCode::TXN_OK) {
5617
0
            LOG_WARNING("failed to create txn").tag("err", err);
5618
0
            return -1;
5619
0
        }
5620
5621
1.61k
        std::string rowset_ref_count_key =
5622
1.61k
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
5623
1.61k
        int64_t ref_count = 0;
5624
1.61k
        {
5625
1.61k
            std::string value;
5626
1.61k
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
5627
1.61k
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5628
                // This is the old version rowset, we could recycle it directly.
5629
1.60k
                ref_count = 1;
5630
1.60k
            } else if (err != TxnErrorCode::TXN_OK) {
5631
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
5632
0
                return -1;
5633
10
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
5634
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
5635
0
                return -1;
5636
0
            }
5637
1.61k
        }
5638
5639
1.61k
        if (ref_count == 1) {
5640
            // It would not be added since it is recycling.
5641
1.61k
            if (delete_rowset_data(rowset_meta) != 0) {
5642
1.60k
                LOG_WARNING("failed to delete rowset data");
5643
1.60k
                return -1;
5644
1.60k
            }
5645
5646
            // Reset the transaction to avoid timeout.
5647
10
            err = txn_kv_->create_txn(&txn);
5648
10
            if (err != TxnErrorCode::TXN_OK) {
5649
0
                LOG_WARNING("failed to create txn").tag("err", err);
5650
0
                return -1;
5651
0
            }
5652
10
            txn->remove(rowset_ref_count_key);
5653
10
            LOG_INFO("delete rowset data ref count key")
5654
10
                    .tag("txn_id", rowset_meta.txn_id())
5655
10
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5656
5657
10
            std::string dbm_start_key =
5658
10
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
5659
10
            std::string dbm_end_key = meta_delete_bitmap_key(
5660
10
                    {reference_instance_id, tablet_id, rowset_id,
5661
10
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
5662
10
            txn->remove(dbm_start_key, dbm_end_key);
5663
10
            LOG_INFO("remove delete bitmap kv")
5664
10
                    .tag("begin", hex(dbm_start_key))
5665
10
                    .tag("end", hex(dbm_end_key));
5666
5667
10
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
5668
10
                    {reference_instance_id, tablet_id, rowset_id});
5669
10
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
5670
10
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
5671
10
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5672
10
            LOG_INFO("remove versioned delete bitmap kv")
5673
10
                    .tag("begin", hex(versioned_dbm_start_key))
5674
10
                    .tag("end", hex(versioned_dbm_end_key));
5675
10
        } else {
5676
            // Decrease the rowset ref count.
5677
            //
5678
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
5679
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
5680
2
            txn->atomic_add(rowset_ref_count_key, -1);
5681
2
            LOG_INFO("decrease rowset data ref count")
5682
2
                    .tag("txn_id", rowset_meta.txn_id())
5683
2
                    .tag("ref_count", ref_count - 1)
5684
2
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5685
2
        }
5686
5687
12
        if (!task.versioned_rowset_key.empty()) {
5688
0
            versioned::document_remove<RowsetMetaCloudPB>(txn.get(), task.versioned_rowset_key,
5689
0
                                                          task.versionstamp);
5690
0
            LOG_INFO("remove versioned meta rowset key").tag("key", hex(task.versioned_rowset_key));
5691
0
        }
5692
5693
12
        if (!task.non_versioned_rowset_key.empty()) {
5694
0
            txn->remove(task.non_versioned_rowset_key);
5695
0
            LOG_INFO("remove non versioned rowset key")
5696
0
                    .tag("key", hex(task.non_versioned_rowset_key));
5697
0
        }
5698
5699
        // empty when recycle ref rowsets for deleted instance
5700
13
        if (!task.recycle_rowset_key.empty()) {
5701
13
            txn->remove(task.recycle_rowset_key);
5702
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(task.recycle_rowset_key));
5703
13
        }
5704
5705
12
        err = txn->commit();
5706
12
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
5707
            // The rowset ref count key has been changed, we need to retry.
5708
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
5709
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5710
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
5711
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5712
0
            continue;
5713
12
        } else if (err != TxnErrorCode::TXN_OK) {
5714
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
5715
0
            return -1;
5716
0
        }
5717
12
        LOG_INFO("recycle rowset meta and data success");
5718
12
        return 0;
5719
12
    }
5720
1
    LOG_WARNING("failed to recycle rowset meta and data after retry")
5721
1
            .tag("tablet_id", tablet_id)
5722
1
            .tag("rowset_id", rowset_id)
5723
1
            .tag("retry", MAX_RETRY);
5724
1
    return -1;
5725
1.61k
}
5726
5727
39
int InstanceRecycler::recycle_tmp_rowsets() {
5728
39
    const std::string task_name = "recycle_tmp_rowsets";
5729
39
    int64_t num_scanned = 0;
5730
39
    int64_t num_expired = 0;
5731
39
    std::atomic_long num_recycled = 0;
5732
39
    size_t expired_rowset_size = 0;
5733
39
    size_t total_rowset_key_size = 0;
5734
39
    size_t total_rowset_value_size = 0;
5735
39
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5736
5737
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5738
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5739
39
    std::string tmp_rs_key0;
5740
39
    std::string tmp_rs_key1;
5741
39
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5742
39
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5743
5744
39
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
5745
5746
39
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5747
39
    register_recycle_task(task_name, start_time);
5748
5749
39
    DORIS_CLOUD_DEFER {
5750
39
        unregister_recycle_task(task_name);
5751
39
        int64_t cost =
5752
39
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5753
39
        metrics_context.finish_report();
5754
39
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5755
39
                .tag("instance_id", instance_id_)
5756
39
                .tag("num_scanned", num_scanned)
5757
39
                .tag("num_expired", num_expired)
5758
39
                .tag("num_recycled", num_recycled)
5759
39
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5760
39
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5761
39
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5762
39
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5749
12
    DORIS_CLOUD_DEFER {
5750
12
        unregister_recycle_task(task_name);
5751
12
        int64_t cost =
5752
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5753
12
        metrics_context.finish_report();
5754
12
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5755
12
                .tag("instance_id", instance_id_)
5756
12
                .tag("num_scanned", num_scanned)
5757
12
                .tag("num_expired", num_expired)
5758
12
                .tag("num_recycled", num_recycled)
5759
12
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5760
12
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5761
12
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5762
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5749
27
    DORIS_CLOUD_DEFER {
5750
27
        unregister_recycle_task(task_name);
5751
27
        int64_t cost =
5752
27
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5753
27
        metrics_context.finish_report();
5754
27
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5755
27
                .tag("instance_id", instance_id_)
5756
27
                .tag("num_scanned", num_scanned)
5757
27
                .tag("num_expired", num_expired)
5758
27
                .tag("num_recycled", num_recycled)
5759
27
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5760
27
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5761
27
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5762
27
    };
5763
5764
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
5765
5766
39
    std::vector<std::string> tmp_rowset_keys;
5767
39
    std::vector<std::string> tmp_rowset_ref_count_keys;
5768
39
    std::vector<std::string> tmp_rowset_keys_to_mark_recycled;
5769
39
    std::vector<std::string> tmp_rowset_keys_to_abort;
5770
5771
    // rowset_id -> rowset_meta
5772
    // store tmp_rowset id and meta for statistics rs size when delete
5773
39
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
5774
39
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5775
39
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
5776
39
    worker_pool->start();
5777
5778
39
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5779
5780
39
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
5781
39
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
5782
39
                             &earlest_ts, &tmp_rowset_ref_count_keys,
5783
39
                             &tmp_rowset_keys_to_mark_recycled, &tmp_rowset_keys_to_abort, this,
5784
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5785
106k
        ++num_scanned;
5786
106k
        total_rowset_key_size += k.size();
5787
106k
        total_rowset_value_size += v.size();
5788
106k
        doris::RowsetMetaCloudPB rowset;
5789
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5790
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5791
0
            return -1;
5792
0
        }
5793
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5794
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5795
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5796
0
                   << " txn_expiration=" << rowset.txn_expiration()
5797
0
                   << " rowset_creation_time=" << rowset.creation_time();
5798
106k
        int64_t current_time = ::time(nullptr);
5799
106k
        if (current_time < expiration) { // not expired
5800
0
            return 0;
5801
0
        }
5802
5803
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5804
106k
            if (need_mark_rowset_as_recycled(rowset)) {
5805
52.0k
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5806
52.0k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5807
52.0k
                             "at next turn, instance_id="
5808
52.0k
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5809
52.0k
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5810
52.0k
                return 0;
5811
52.0k
            }
5812
106k
        }
5813
5814
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5815
54.0k
            if (make_deferred_abort_task(rowset).has_value()) {
5816
3
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5817
3
                             "instance_id="
5818
3
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5819
3
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5820
3
                tmp_rowset_keys_to_abort.emplace_back(k);
5821
3
            }
5822
54.0k
        }
5823
5824
54.0k
        ++num_expired;
5825
54.0k
        expired_rowset_size += v.size();
5826
54.0k
        if (!rowset.has_resource_id()) {
5827
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5828
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5829
0
                return -1;
5830
0
            }
5831
            // might be a delete pred rowset
5832
0
            tmp_rowset_keys.emplace_back(k);
5833
0
            return 0;
5834
0
        }
5835
        // TODO(plat1ko): check rowset not referenced
5836
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5837
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5838
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5839
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5840
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5841
54.0k
                  << " num_expired=" << num_expired
5842
54.0k
                  << " task_type=" << metrics_context.operation_type;
5843
5844
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5845
        // Remove the rowset ref count key directly since it has not been used.
5846
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5847
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5848
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5849
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5850
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5851
5852
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5853
54.0k
        return 0;
5854
54.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5784
16
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5785
16
        ++num_scanned;
5786
16
        total_rowset_key_size += k.size();
5787
16
        total_rowset_value_size += v.size();
5788
16
        doris::RowsetMetaCloudPB rowset;
5789
16
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5790
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5791
0
            return -1;
5792
0
        }
5793
16
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5794
16
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5795
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5796
0
                   << " txn_expiration=" << rowset.txn_expiration()
5797
0
                   << " rowset_creation_time=" << rowset.creation_time();
5798
16
        int64_t current_time = ::time(nullptr);
5799
16
        if (current_time < expiration) { // not expired
5800
0
            return 0;
5801
0
        }
5802
5803
16
        if (config::enable_mark_delete_rowset_before_recycle) {
5804
16
            if (need_mark_rowset_as_recycled(rowset)) {
5805
9
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5806
9
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5807
9
                             "at next turn, instance_id="
5808
9
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5809
9
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5810
9
                return 0;
5811
9
            }
5812
16
        }
5813
5814
7
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5815
7
            if (make_deferred_abort_task(rowset).has_value()) {
5816
3
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5817
3
                             "instance_id="
5818
3
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5819
3
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5820
3
                tmp_rowset_keys_to_abort.emplace_back(k);
5821
3
            }
5822
7
        }
5823
5824
7
        ++num_expired;
5825
7
        expired_rowset_size += v.size();
5826
7
        if (!rowset.has_resource_id()) {
5827
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5828
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5829
0
                return -1;
5830
0
            }
5831
            // might be a delete pred rowset
5832
0
            tmp_rowset_keys.emplace_back(k);
5833
0
            return 0;
5834
0
        }
5835
        // TODO(plat1ko): check rowset not referenced
5836
7
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5837
7
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5838
7
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5839
7
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5840
7
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5841
7
                  << " num_expired=" << num_expired
5842
7
                  << " task_type=" << metrics_context.operation_type;
5843
5844
7
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5845
        // Remove the rowset ref count key directly since it has not been used.
5846
7
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5847
7
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5848
7
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5849
7
                  << "key=" << hex(rowset_ref_count_key);
5850
7
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5851
5852
7
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5853
7
        return 0;
5854
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5784
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5785
106k
        ++num_scanned;
5786
106k
        total_rowset_key_size += k.size();
5787
106k
        total_rowset_value_size += v.size();
5788
106k
        doris::RowsetMetaCloudPB rowset;
5789
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5790
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5791
0
            return -1;
5792
0
        }
5793
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5794
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5795
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5796
0
                   << " txn_expiration=" << rowset.txn_expiration()
5797
0
                   << " rowset_creation_time=" << rowset.creation_time();
5798
106k
        int64_t current_time = ::time(nullptr);
5799
106k
        if (current_time < expiration) { // not expired
5800
0
            return 0;
5801
0
        }
5802
5803
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5804
106k
            if (need_mark_rowset_as_recycled(rowset)) {
5805
52.0k
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5806
52.0k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5807
52.0k
                             "at next turn, instance_id="
5808
52.0k
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5809
52.0k
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5810
52.0k
                return 0;
5811
52.0k
            }
5812
106k
        }
5813
5814
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5815
54.0k
            if (make_deferred_abort_task(rowset).has_value()) {
5816
0
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5817
0
                             "instance_id="
5818
0
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5819
0
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5820
0
                tmp_rowset_keys_to_abort.emplace_back(k);
5821
0
            }
5822
54.0k
        }
5823
5824
54.0k
        ++num_expired;
5825
54.0k
        expired_rowset_size += v.size();
5826
54.0k
        if (!rowset.has_resource_id()) {
5827
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5828
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5829
0
                return -1;
5830
0
            }
5831
            // might be a delete pred rowset
5832
0
            tmp_rowset_keys.emplace_back(k);
5833
0
            return 0;
5834
0
        }
5835
        // TODO(plat1ko): check rowset not referenced
5836
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5837
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5838
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5839
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5840
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5841
54.0k
                  << " num_expired=" << num_expired
5842
54.0k
                  << " task_type=" << metrics_context.operation_type;
5843
5844
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5845
        // Remove the rowset ref count key directly since it has not been used.
5846
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5847
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5848
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5849
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5850
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5851
5852
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5853
54.0k
        return 0;
5854
54.0k
    };
5855
5856
    // TODO bacth delete
5857
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5858
51.0k
        std::string dbm_start_key =
5859
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5860
51.0k
        std::string dbm_end_key = dbm_start_key;
5861
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5862
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5863
51.0k
        if (ret != 0) {
5864
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5865
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5866
0
                         << ", rowset_id=" << rowset_id;
5867
0
        }
5868
51.0k
        return ret;
5869
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5857
7
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5858
7
        std::string dbm_start_key =
5859
7
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5860
7
        std::string dbm_end_key = dbm_start_key;
5861
7
        encode_int64(INT64_MAX, &dbm_end_key);
5862
7
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5863
7
        if (ret != 0) {
5864
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5865
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5866
0
                         << ", rowset_id=" << rowset_id;
5867
0
        }
5868
7
        return ret;
5869
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5857
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5858
51.0k
        std::string dbm_start_key =
5859
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5860
51.0k
        std::string dbm_end_key = dbm_start_key;
5861
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5862
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5863
51.0k
        if (ret != 0) {
5864
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5865
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5866
0
                         << ", rowset_id=" << rowset_id;
5867
0
        }
5868
51.0k
        return ret;
5869
51.0k
    };
5870
5871
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5872
51.0k
        auto delete_bitmap_start =
5873
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5874
51.0k
        auto delete_bitmap_end =
5875
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5876
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5877
51.0k
        if (ret != 0) {
5878
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5879
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5880
0
        }
5881
51.0k
        return ret;
5882
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5871
7
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5872
7
        auto delete_bitmap_start =
5873
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5874
7
        auto delete_bitmap_end =
5875
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5876
7
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5877
7
        if (ret != 0) {
5878
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5879
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5880
0
        }
5881
7
        return ret;
5882
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5871
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5872
51.0k
        auto delete_bitmap_start =
5873
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5874
51.0k
        auto delete_bitmap_end =
5875
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5876
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5877
51.0k
        if (ret != 0) {
5878
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5879
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5880
0
        }
5881
51.0k
        return ret;
5882
51.0k
    };
5883
5884
39
    auto loop_done = [&]() -> int {
5885
32
        std::vector<std::string> tmp_rowset_keys_to_delete;
5886
32
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
5887
32
        std::vector<std::string> mark_keys_to_process;
5888
32
        std::vector<std::string> abort_keys_to_process;
5889
32
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
5890
32
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
5891
32
        tmp_rowsets_to_delete.swap(tmp_rowsets);
5892
32
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
5893
32
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
5894
32
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
5895
32
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
5896
32
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
5897
32
                             tmp_rowset_ref_count_keys_to_delete =
5898
32
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
5899
32
                             mark_keys_to_process = std::move(mark_keys_to_process),
5900
32
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5901
32
            if (!mark_keys_to_process.empty() &&
5902
32
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5903
16
                                                                  mark_keys_to_process) != 0) {
5904
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5905
0
                             << instance_id_;
5906
0
                return;
5907
0
            }
5908
32
            if (!abort_keys_to_process.empty() &&
5909
32
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5910
3
                                                                      false) != 0) {
5911
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5912
0
                             << instance_id_;
5913
0
                return;
5914
0
            }
5915
32
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5916
32
                                   metrics_context) != 0) {
5917
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5918
3
                return;
5919
3
            }
5920
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5921
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5922
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5923
0
                                 << rs.ShortDebugString();
5924
0
                    return;
5925
0
                }
5926
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5927
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5928
0
                                 << rs.ShortDebugString();
5929
0
                    return;
5930
0
                }
5931
51.0k
            }
5932
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5933
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5934
0
                return;
5935
0
            }
5936
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5937
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5938
0
                return;
5939
0
            }
5940
29
            num_recycled += tmp_rowset_keys_to_delete.size();
5941
29
            return;
5942
29
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5900
12
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5901
12
            if (!mark_keys_to_process.empty() &&
5902
12
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5903
7
                                                                  mark_keys_to_process) != 0) {
5904
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5905
0
                             << instance_id_;
5906
0
                return;
5907
0
            }
5908
12
            if (!abort_keys_to_process.empty() &&
5909
12
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5910
3
                                                                      false) != 0) {
5911
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5912
0
                             << instance_id_;
5913
0
                return;
5914
0
            }
5915
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5916
12
                                   metrics_context) != 0) {
5917
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5918
0
                return;
5919
0
            }
5920
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5921
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5922
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5923
0
                                 << rs.ShortDebugString();
5924
0
                    return;
5925
0
                }
5926
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5927
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5928
0
                                 << rs.ShortDebugString();
5929
0
                    return;
5930
0
                }
5931
7
            }
5932
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5933
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5934
0
                return;
5935
0
            }
5936
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5937
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5938
0
                return;
5939
0
            }
5940
12
            num_recycled += tmp_rowset_keys_to_delete.size();
5941
12
            return;
5942
12
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5900
20
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5901
20
            if (!mark_keys_to_process.empty() &&
5902
20
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5903
9
                                                                  mark_keys_to_process) != 0) {
5904
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5905
0
                             << instance_id_;
5906
0
                return;
5907
0
            }
5908
20
            if (!abort_keys_to_process.empty() &&
5909
20
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5910
0
                                                                      false) != 0) {
5911
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5912
0
                             << instance_id_;
5913
0
                return;
5914
0
            }
5915
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5916
20
                                   metrics_context) != 0) {
5917
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5918
3
                return;
5919
3
            }
5920
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5921
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5922
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5923
0
                                 << rs.ShortDebugString();
5924
0
                    return;
5925
0
                }
5926
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5927
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5928
0
                                 << rs.ShortDebugString();
5929
0
                    return;
5930
0
                }
5931
51.0k
            }
5932
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5933
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5934
0
                return;
5935
0
            }
5936
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5937
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5938
0
                return;
5939
0
            }
5940
17
            num_recycled += tmp_rowset_keys_to_delete.size();
5941
17
            return;
5942
17
        });
5943
32
        return 0;
5944
32
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
5884
12
    auto loop_done = [&]() -> int {
5885
12
        std::vector<std::string> tmp_rowset_keys_to_delete;
5886
12
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
5887
12
        std::vector<std::string> mark_keys_to_process;
5888
12
        std::vector<std::string> abort_keys_to_process;
5889
12
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
5890
12
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
5891
12
        tmp_rowsets_to_delete.swap(tmp_rowsets);
5892
12
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
5893
12
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
5894
12
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
5895
12
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
5896
12
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
5897
12
                             tmp_rowset_ref_count_keys_to_delete =
5898
12
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
5899
12
                             mark_keys_to_process = std::move(mark_keys_to_process),
5900
12
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5901
12
            if (!mark_keys_to_process.empty() &&
5902
12
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5903
12
                                                                  mark_keys_to_process) != 0) {
5904
12
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5905
12
                             << instance_id_;
5906
12
                return;
5907
12
            }
5908
12
            if (!abort_keys_to_process.empty() &&
5909
12
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5910
12
                                                                      false) != 0) {
5911
12
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5912
12
                             << instance_id_;
5913
12
                return;
5914
12
            }
5915
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5916
12
                                   metrics_context) != 0) {
5917
12
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5918
12
                return;
5919
12
            }
5920
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5921
12
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5922
12
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5923
12
                                 << rs.ShortDebugString();
5924
12
                    return;
5925
12
                }
5926
12
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5927
12
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5928
12
                                 << rs.ShortDebugString();
5929
12
                    return;
5930
12
                }
5931
12
            }
5932
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5933
12
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5934
12
                return;
5935
12
            }
5936
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5937
12
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5938
12
                return;
5939
12
            }
5940
12
            num_recycled += tmp_rowset_keys_to_delete.size();
5941
12
            return;
5942
12
        });
5943
12
        return 0;
5944
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
5884
20
    auto loop_done = [&]() -> int {
5885
20
        std::vector<std::string> tmp_rowset_keys_to_delete;
5886
20
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
5887
20
        std::vector<std::string> mark_keys_to_process;
5888
20
        std::vector<std::string> abort_keys_to_process;
5889
20
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
5890
20
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
5891
20
        tmp_rowsets_to_delete.swap(tmp_rowsets);
5892
20
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
5893
20
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
5894
20
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
5895
20
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
5896
20
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
5897
20
                             tmp_rowset_ref_count_keys_to_delete =
5898
20
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
5899
20
                             mark_keys_to_process = std::move(mark_keys_to_process),
5900
20
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5901
20
            if (!mark_keys_to_process.empty() &&
5902
20
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5903
20
                                                                  mark_keys_to_process) != 0) {
5904
20
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5905
20
                             << instance_id_;
5906
20
                return;
5907
20
            }
5908
20
            if (!abort_keys_to_process.empty() &&
5909
20
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5910
20
                                                                      false) != 0) {
5911
20
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5912
20
                             << instance_id_;
5913
20
                return;
5914
20
            }
5915
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5916
20
                                   metrics_context) != 0) {
5917
20
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5918
20
                return;
5919
20
            }
5920
20
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5921
20
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5922
20
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5923
20
                                 << rs.ShortDebugString();
5924
20
                    return;
5925
20
                }
5926
20
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5927
20
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5928
20
                                 << rs.ShortDebugString();
5929
20
                    return;
5930
20
                }
5931
20
            }
5932
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5933
20
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5934
20
                return;
5935
20
            }
5936
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5937
20
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5938
20
                return;
5939
20
            }
5940
20
            num_recycled += tmp_rowset_keys_to_delete.size();
5941
20
            return;
5942
20
        });
5943
20
        return 0;
5944
20
    };
5945
5946
39
    if (config::enable_recycler_stats_metrics) {
5947
0
        scan_and_statistics_tmp_rowsets();
5948
0
    }
5949
    // recycle_func and loop_done for scan and recycle
5950
39
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
5951
39
                               std::move(loop_done));
5952
5953
39
    worker_pool->stop();
5954
5955
    // Report final metrics after all concurrent tasks completed
5956
39
    segment_metrics_context_.report();
5957
39
    metrics_context.report();
5958
5959
39
    return ret;
5960
39
}
5961
5962
int InstanceRecycler::scan_and_recycle(
5963
        std::string begin, std::string_view end,
5964
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
5965
268
        std::function<int()> loop_done) {
5966
268
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
5967
268
    int ret = 0;
5968
268
    int64_t cnt = 0;
5969
268
    int get_range_retried = 0;
5970
268
    std::string err;
5971
268
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5972
268
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5973
268
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5974
268
                  << " ret=" << ret << " err=" << err;
5975
268
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5971
31
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5972
31
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5973
31
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5974
31
                  << " ret=" << ret << " err=" << err;
5975
31
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5971
237
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5972
237
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5973
237
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5974
237
                  << " ret=" << ret << " err=" << err;
5975
237
    };
5976
5977
268
    std::unique_ptr<RangeGetIterator> it;
5978
449
    while (it == nullptr /* may be not init */ || (it->more() && !stopped())) {
5979
321
        if (get_range_retried > 1000) {
5980
0
            err = "txn_get exceeds max retry(1000), may not scan all keys";
5981
0
            ret = -3;
5982
0
            return ret;
5983
0
        }
5984
321
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
5985
321
        if (get_ret != 0) { // txn kv may complain "Request for future version"
5986
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
5987
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
5988
0
                         << " get_range_retried=" << get_range_retried;
5989
0
            ++get_range_retried;
5990
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5991
0
            continue; // try again
5992
0
        }
5993
321
        if (!it->has_next()) {
5994
140
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
5995
140
            break; // scan finished
5996
140
        }
5997
154k
        while (it->has_next()) {
5998
154k
            ++cnt;
5999
            // recycle corresponding resources
6000
154k
            auto [k, v] = it->next();
6001
154k
            if (!it->has_next()) {
6002
181
                begin = k;
6003
181
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
6004
181
            }
6005
            // FIXME(gavin): if we want to continue scanning, the recycle_func should not return non-zero
6006
154k
            if (recycle_func(k, v) != 0) {
6007
4.00k
                err = "recycle_func error";
6008
4.00k
                ret = -1;
6009
4.00k
            }
6010
154k
        }
6011
181
        begin.push_back('\x00'); // Update to next smallest key for iteration
6012
        // FIXME(gavin): if we want to continue scanning, the loop_done should not return non-zero
6013
181
        if (loop_done && loop_done() != 0) {
6014
4
            err = "loop_done error";
6015
4
            ret = -1;
6016
4
        }
6017
181
    }
6018
268
    return ret;
6019
268
}
6020
6021
19
int InstanceRecycler::abort_timeout_txn() {
6022
19
    const std::string task_name = "abort_timeout_txn";
6023
19
    int64_t num_scanned = 0;
6024
19
    int64_t num_timeout = 0;
6025
19
    int64_t num_abort = 0;
6026
19
    int64_t num_advance = 0;
6027
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6028
6029
19
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
6030
19
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6031
19
    std::string begin_txn_running_key;
6032
19
    std::string end_txn_running_key;
6033
19
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
6034
19
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
6035
6036
19
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
6037
6038
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6039
19
    register_recycle_task(task_name, start_time);
6040
6041
19
    DORIS_CLOUD_DEFER {
6042
19
        unregister_recycle_task(task_name);
6043
19
        int64_t cost =
6044
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6045
19
        metrics_context.finish_report();
6046
19
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6047
19
                .tag("instance_id", instance_id_)
6048
19
                .tag("num_scanned", num_scanned)
6049
19
                .tag("num_timeout", num_timeout)
6050
19
                .tag("num_abort", num_abort)
6051
19
                .tag("num_advance", num_advance);
6052
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
6041
3
    DORIS_CLOUD_DEFER {
6042
3
        unregister_recycle_task(task_name);
6043
3
        int64_t cost =
6044
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6045
3
        metrics_context.finish_report();
6046
3
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6047
3
                .tag("instance_id", instance_id_)
6048
3
                .tag("num_scanned", num_scanned)
6049
3
                .tag("num_timeout", num_timeout)
6050
3
                .tag("num_abort", num_abort)
6051
3
                .tag("num_advance", num_advance);
6052
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
6041
16
    DORIS_CLOUD_DEFER {
6042
16
        unregister_recycle_task(task_name);
6043
16
        int64_t cost =
6044
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6045
16
        metrics_context.finish_report();
6046
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6047
16
                .tag("instance_id", instance_id_)
6048
16
                .tag("num_scanned", num_scanned)
6049
16
                .tag("num_timeout", num_timeout)
6050
16
                .tag("num_abort", num_abort)
6051
16
                .tag("num_advance", num_advance);
6052
16
    };
6053
6054
19
    int64_t current_time =
6055
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6056
6057
19
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
6058
19
                                  &current_time, &metrics_context,
6059
19
                                  this](std::string_view k, std::string_view v) -> int {
6060
9
        ++num_scanned;
6061
6062
9
        std::unique_ptr<Transaction> txn;
6063
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6064
9
        if (err != TxnErrorCode::TXN_OK) {
6065
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6066
0
            return -1;
6067
0
        }
6068
9
        std::string_view k1 = k;
6069
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6070
9
        k1.remove_prefix(1); // Remove key space
6071
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6072
9
        if (decode_key(&k1, &out) != 0) {
6073
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6074
0
            return -1;
6075
0
        }
6076
9
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6077
9
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6078
9
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6079
        // Update txn_info
6080
9
        std::string txn_inf_key, txn_inf_val;
6081
9
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6082
9
        err = txn->get(txn_inf_key, &txn_inf_val);
6083
9
        if (err != TxnErrorCode::TXN_OK) {
6084
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6085
0
            return -1;
6086
0
        }
6087
9
        TxnInfoPB txn_info;
6088
9
        if (!txn_info.ParseFromString(txn_inf_val)) {
6089
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6090
0
            return -1;
6091
0
        }
6092
6093
9
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6094
3
            txn.reset();
6095
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6096
3
            std::shared_ptr<TxnLazyCommitTask> task =
6097
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6098
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6099
3
            if (ret.first != MetaServiceCode::OK) {
6100
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6101
0
                             << "msg=" << ret.second;
6102
0
                return -1;
6103
0
            }
6104
3
            ++num_advance;
6105
3
            return 0;
6106
6
        } else {
6107
6
            TxnRunningPB txn_running_pb;
6108
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6109
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6110
0
                return -1;
6111
0
            }
6112
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6113
4
                return 0;
6114
4
            }
6115
2
            ++num_timeout;
6116
6117
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6118
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6119
2
            txn_info.set_finish_time(current_time);
6120
2
            txn_info.set_reason("timeout");
6121
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6122
2
            txn_inf_val.clear();
6123
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6124
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6125
0
                return -1;
6126
0
            }
6127
2
            txn->put(txn_inf_key, txn_inf_val);
6128
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6129
            // Put recycle txn key
6130
2
            std::string recyc_txn_key, recyc_txn_val;
6131
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6132
2
            RecycleTxnPB recycle_txn_pb;
6133
2
            recycle_txn_pb.set_creation_time(current_time);
6134
2
            recycle_txn_pb.set_label(txn_info.label());
6135
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6136
0
                LOG_WARNING("failed to serialize txn recycle info")
6137
0
                        .tag("key", hex(k))
6138
0
                        .tag("db_id", db_id)
6139
0
                        .tag("txn_id", txn_id);
6140
0
                return -1;
6141
0
            }
6142
2
            txn->put(recyc_txn_key, recyc_txn_val);
6143
            // Remove txn running key
6144
2
            txn->remove(k);
6145
2
            err = txn->commit();
6146
2
            if (err != TxnErrorCode::TXN_OK) {
6147
0
                LOG_WARNING("failed to commit txn err={}", err)
6148
0
                        .tag("key", hex(k))
6149
0
                        .tag("db_id", db_id)
6150
0
                        .tag("txn_id", txn_id);
6151
0
                return -1;
6152
0
            }
6153
2
            metrics_context.total_recycled_num = ++num_abort;
6154
2
            metrics_context.report();
6155
2
        }
6156
6157
2
        return 0;
6158
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6059
3
                                  this](std::string_view k, std::string_view v) -> int {
6060
3
        ++num_scanned;
6061
6062
3
        std::unique_ptr<Transaction> txn;
6063
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6064
3
        if (err != TxnErrorCode::TXN_OK) {
6065
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6066
0
            return -1;
6067
0
        }
6068
3
        std::string_view k1 = k;
6069
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6070
3
        k1.remove_prefix(1); // Remove key space
6071
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6072
3
        if (decode_key(&k1, &out) != 0) {
6073
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6074
0
            return -1;
6075
0
        }
6076
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6077
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6078
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6079
        // Update txn_info
6080
3
        std::string txn_inf_key, txn_inf_val;
6081
3
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6082
3
        err = txn->get(txn_inf_key, &txn_inf_val);
6083
3
        if (err != TxnErrorCode::TXN_OK) {
6084
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6085
0
            return -1;
6086
0
        }
6087
3
        TxnInfoPB txn_info;
6088
3
        if (!txn_info.ParseFromString(txn_inf_val)) {
6089
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6090
0
            return -1;
6091
0
        }
6092
6093
3
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6094
3
            txn.reset();
6095
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6096
3
            std::shared_ptr<TxnLazyCommitTask> task =
6097
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6098
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6099
3
            if (ret.first != MetaServiceCode::OK) {
6100
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6101
0
                             << "msg=" << ret.second;
6102
0
                return -1;
6103
0
            }
6104
3
            ++num_advance;
6105
3
            return 0;
6106
3
        } else {
6107
0
            TxnRunningPB txn_running_pb;
6108
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6109
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6110
0
                return -1;
6111
0
            }
6112
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6113
0
                return 0;
6114
0
            }
6115
0
            ++num_timeout;
6116
6117
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6118
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6119
0
            txn_info.set_finish_time(current_time);
6120
0
            txn_info.set_reason("timeout");
6121
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6122
0
            txn_inf_val.clear();
6123
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6124
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6125
0
                return -1;
6126
0
            }
6127
0
            txn->put(txn_inf_key, txn_inf_val);
6128
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6129
            // Put recycle txn key
6130
0
            std::string recyc_txn_key, recyc_txn_val;
6131
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6132
0
            RecycleTxnPB recycle_txn_pb;
6133
0
            recycle_txn_pb.set_creation_time(current_time);
6134
0
            recycle_txn_pb.set_label(txn_info.label());
6135
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6136
0
                LOG_WARNING("failed to serialize txn recycle info")
6137
0
                        .tag("key", hex(k))
6138
0
                        .tag("db_id", db_id)
6139
0
                        .tag("txn_id", txn_id);
6140
0
                return -1;
6141
0
            }
6142
0
            txn->put(recyc_txn_key, recyc_txn_val);
6143
            // Remove txn running key
6144
0
            txn->remove(k);
6145
0
            err = txn->commit();
6146
0
            if (err != TxnErrorCode::TXN_OK) {
6147
0
                LOG_WARNING("failed to commit txn err={}", err)
6148
0
                        .tag("key", hex(k))
6149
0
                        .tag("db_id", db_id)
6150
0
                        .tag("txn_id", txn_id);
6151
0
                return -1;
6152
0
            }
6153
0
            metrics_context.total_recycled_num = ++num_abort;
6154
0
            metrics_context.report();
6155
0
        }
6156
6157
0
        return 0;
6158
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6059
6
                                  this](std::string_view k, std::string_view v) -> int {
6060
6
        ++num_scanned;
6061
6062
6
        std::unique_ptr<Transaction> txn;
6063
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6064
6
        if (err != TxnErrorCode::TXN_OK) {
6065
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6066
0
            return -1;
6067
0
        }
6068
6
        std::string_view k1 = k;
6069
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6070
6
        k1.remove_prefix(1); // Remove key space
6071
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6072
6
        if (decode_key(&k1, &out) != 0) {
6073
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6074
0
            return -1;
6075
0
        }
6076
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6077
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6078
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6079
        // Update txn_info
6080
6
        std::string txn_inf_key, txn_inf_val;
6081
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6082
6
        err = txn->get(txn_inf_key, &txn_inf_val);
6083
6
        if (err != TxnErrorCode::TXN_OK) {
6084
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6085
0
            return -1;
6086
0
        }
6087
6
        TxnInfoPB txn_info;
6088
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
6089
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6090
0
            return -1;
6091
0
        }
6092
6093
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6094
0
            txn.reset();
6095
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6096
0
            std::shared_ptr<TxnLazyCommitTask> task =
6097
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6098
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6099
0
            if (ret.first != MetaServiceCode::OK) {
6100
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6101
0
                             << "msg=" << ret.second;
6102
0
                return -1;
6103
0
            }
6104
0
            ++num_advance;
6105
0
            return 0;
6106
6
        } else {
6107
6
            TxnRunningPB txn_running_pb;
6108
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6109
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6110
0
                return -1;
6111
0
            }
6112
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6113
4
                return 0;
6114
4
            }
6115
2
            ++num_timeout;
6116
6117
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6118
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6119
2
            txn_info.set_finish_time(current_time);
6120
2
            txn_info.set_reason("timeout");
6121
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6122
2
            txn_inf_val.clear();
6123
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6124
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6125
0
                return -1;
6126
0
            }
6127
2
            txn->put(txn_inf_key, txn_inf_val);
6128
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6129
            // Put recycle txn key
6130
2
            std::string recyc_txn_key, recyc_txn_val;
6131
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6132
2
            RecycleTxnPB recycle_txn_pb;
6133
2
            recycle_txn_pb.set_creation_time(current_time);
6134
2
            recycle_txn_pb.set_label(txn_info.label());
6135
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6136
0
                LOG_WARNING("failed to serialize txn recycle info")
6137
0
                        .tag("key", hex(k))
6138
0
                        .tag("db_id", db_id)
6139
0
                        .tag("txn_id", txn_id);
6140
0
                return -1;
6141
0
            }
6142
2
            txn->put(recyc_txn_key, recyc_txn_val);
6143
            // Remove txn running key
6144
2
            txn->remove(k);
6145
2
            err = txn->commit();
6146
2
            if (err != TxnErrorCode::TXN_OK) {
6147
0
                LOG_WARNING("failed to commit txn err={}", err)
6148
0
                        .tag("key", hex(k))
6149
0
                        .tag("db_id", db_id)
6150
0
                        .tag("txn_id", txn_id);
6151
0
                return -1;
6152
0
            }
6153
2
            metrics_context.total_recycled_num = ++num_abort;
6154
2
            metrics_context.report();
6155
2
        }
6156
6157
2
        return 0;
6158
6
    };
6159
6160
19
    if (config::enable_recycler_stats_metrics) {
6161
0
        scan_and_statistics_abort_timeout_txn();
6162
0
    }
6163
    // recycle_func and loop_done for scan and recycle
6164
19
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
6165
19
                            std::move(handle_txn_running_kv));
6166
19
}
6167
6168
19
int InstanceRecycler::recycle_expired_txn_label() {
6169
19
    const std::string task_name = "recycle_expired_txn_label";
6170
19
    int64_t num_scanned = 0;
6171
19
    int64_t num_expired = 0;
6172
19
    std::atomic_long num_recycled = 0;
6173
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6174
19
    int ret = 0;
6175
6176
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
6177
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6178
19
    std::string begin_recycle_txn_key;
6179
19
    std::string end_recycle_txn_key;
6180
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
6181
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
6182
19
    std::vector<std::string> recycle_txn_info_keys;
6183
6184
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
6185
6186
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6187
19
    register_recycle_task(task_name, start_time);
6188
19
    DORIS_CLOUD_DEFER {
6189
19
        unregister_recycle_task(task_name);
6190
19
        int64_t cost =
6191
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6192
19
        metrics_context.finish_report();
6193
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6194
19
                .tag("instance_id", instance_id_)
6195
19
                .tag("num_scanned", num_scanned)
6196
19
                .tag("num_expired", num_expired)
6197
19
                .tag("num_recycled", num_recycled);
6198
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
6188
1
    DORIS_CLOUD_DEFER {
6189
1
        unregister_recycle_task(task_name);
6190
1
        int64_t cost =
6191
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6192
1
        metrics_context.finish_report();
6193
1
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6194
1
                .tag("instance_id", instance_id_)
6195
1
                .tag("num_scanned", num_scanned)
6196
1
                .tag("num_expired", num_expired)
6197
1
                .tag("num_recycled", num_recycled);
6198
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
6188
18
    DORIS_CLOUD_DEFER {
6189
18
        unregister_recycle_task(task_name);
6190
18
        int64_t cost =
6191
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6192
18
        metrics_context.finish_report();
6193
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6194
18
                .tag("instance_id", instance_id_)
6195
18
                .tag("num_scanned", num_scanned)
6196
18
                .tag("num_expired", num_expired)
6197
18
                .tag("num_recycled", num_recycled);
6198
18
    };
6199
6200
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6201
6202
19
    SyncExecutor<int> concurrent_delete_executor(
6203
19
            _thread_pool_group.s3_producer_pool,
6204
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
6205
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
6205
1
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
6205
23.0k
            [](const int& ret) { return ret != 0; });
6206
6207
19
    int64_t current_time_ms =
6208
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6209
6210
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6211
30.0k
        ++num_scanned;
6212
30.0k
        RecycleTxnPB recycle_txn_pb;
6213
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6214
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6215
0
            return -1;
6216
0
        }
6217
30.0k
        if ((config::force_immediate_recycle) ||
6218
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6219
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6220
30.0k
             current_time_ms)) {
6221
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6222
23.0k
            num_expired++;
6223
23.0k
            recycle_txn_info_keys.emplace_back(k);
6224
23.0k
        }
6225
30.0k
        return 0;
6226
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6210
1
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6211
1
        ++num_scanned;
6212
1
        RecycleTxnPB recycle_txn_pb;
6213
1
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6214
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6215
0
            return -1;
6216
0
        }
6217
1
        if ((config::force_immediate_recycle) ||
6218
1
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6219
1
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6220
1
             current_time_ms)) {
6221
1
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6222
1
            num_expired++;
6223
1
            recycle_txn_info_keys.emplace_back(k);
6224
1
        }
6225
1
        return 0;
6226
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6210
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6211
30.0k
        ++num_scanned;
6212
30.0k
        RecycleTxnPB recycle_txn_pb;
6213
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6214
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6215
0
            return -1;
6216
0
        }
6217
30.0k
        if ((config::force_immediate_recycle) ||
6218
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6219
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6220
30.0k
             current_time_ms)) {
6221
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6222
23.0k
            num_expired++;
6223
23.0k
            recycle_txn_info_keys.emplace_back(k);
6224
23.0k
        }
6225
30.0k
        return 0;
6226
30.0k
    };
6227
6228
    // int 0 for success, 1 for conflict, -1 for error
6229
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6230
23.0k
        std::string_view k1 = k;
6231
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6232
23.0k
        k1.remove_prefix(1); // Remove key space
6233
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6234
23.0k
        int ret = decode_key(&k1, &out);
6235
23.0k
        if (ret != 0) {
6236
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6237
0
            return -1;
6238
0
        }
6239
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6240
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6241
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6242
23.0k
        std::unique_ptr<Transaction> txn;
6243
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6244
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6245
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6246
0
            return -1;
6247
0
        }
6248
        // Remove txn index kv
6249
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6250
23.0k
        txn->remove(index_key);
6251
        // Remove txn info kv
6252
23.0k
        std::string info_key, info_val;
6253
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6254
23.0k
        err = txn->get(info_key, &info_val);
6255
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6256
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6257
0
            return -1;
6258
0
        }
6259
23.0k
        TxnInfoPB txn_info;
6260
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6261
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6262
0
            return -1;
6263
0
        }
6264
23.0k
        txn->remove(info_key);
6265
        // Remove sub txn index kvs
6266
23.0k
        std::vector<std::string> sub_txn_index_keys;
6267
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6268
23.0k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6269
23.0k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6270
23.0k
        }
6271
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6272
22.9k
            txn->remove(sub_txn_index_key);
6273
22.9k
        }
6274
        // Update txn label
6275
23.0k
        std::string label_key, label_val;
6276
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6277
23.0k
        err = txn->get(label_key, &label_val);
6278
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6279
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6280
0
                         << " err=" << err;
6281
0
            return -1;
6282
0
        }
6283
23.0k
        TxnLabelPB txn_label;
6284
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6285
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6286
0
            return -1;
6287
0
        }
6288
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6289
23.0k
        if (it != txn_label.txn_ids().end()) {
6290
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6291
23.0k
        }
6292
23.0k
        if (txn_label.txn_ids().empty()) {
6293
23.0k
            txn->remove(label_key);
6294
23.0k
            TEST_SYNC_POINT_CALLBACK(
6295
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6296
23.0k
        } else {
6297
73
            if (!txn_label.SerializeToString(&label_val)) {
6298
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6299
0
                return -1;
6300
0
            }
6301
73
            TEST_SYNC_POINT_CALLBACK(
6302
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6303
73
            txn->atomic_set_ver_value(label_key, label_val);
6304
73
            TEST_SYNC_POINT_CALLBACK(
6305
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6306
73
        }
6307
        // Remove recycle txn kv
6308
23.0k
        txn->remove(k);
6309
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6310
23.0k
        err = txn->commit();
6311
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6312
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6313
62
                TEST_SYNC_POINT_CALLBACK(
6314
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6315
                // log the txn_id and label
6316
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6317
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6318
62
                             << " txn_label=" << txn_info.label();
6319
62
                return 1;
6320
62
            }
6321
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6322
0
            return -1;
6323
62
        }
6324
23.0k
        ++num_recycled;
6325
6326
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6327
23.0k
        return 0;
6328
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6229
1
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6230
1
        std::string_view k1 = k;
6231
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6232
1
        k1.remove_prefix(1); // Remove key space
6233
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6234
1
        int ret = decode_key(&k1, &out);
6235
1
        if (ret != 0) {
6236
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6237
0
            return -1;
6238
0
        }
6239
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6240
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6241
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6242
1
        std::unique_ptr<Transaction> txn;
6243
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6244
1
        if (err != TxnErrorCode::TXN_OK) {
6245
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6246
0
            return -1;
6247
0
        }
6248
        // Remove txn index kv
6249
1
        auto index_key = txn_index_key({instance_id_, txn_id});
6250
1
        txn->remove(index_key);
6251
        // Remove txn info kv
6252
1
        std::string info_key, info_val;
6253
1
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6254
1
        err = txn->get(info_key, &info_val);
6255
1
        if (err != TxnErrorCode::TXN_OK) {
6256
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6257
0
            return -1;
6258
0
        }
6259
1
        TxnInfoPB txn_info;
6260
1
        if (!txn_info.ParseFromString(info_val)) {
6261
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6262
0
            return -1;
6263
0
        }
6264
1
        txn->remove(info_key);
6265
        // Remove sub txn index kvs
6266
1
        std::vector<std::string> sub_txn_index_keys;
6267
1
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6268
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6269
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
6270
0
        }
6271
1
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6272
0
            txn->remove(sub_txn_index_key);
6273
0
        }
6274
        // Update txn label
6275
1
        std::string label_key, label_val;
6276
1
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6277
1
        err = txn->get(label_key, &label_val);
6278
1
        if (err != TxnErrorCode::TXN_OK) {
6279
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6280
0
                         << " err=" << err;
6281
0
            return -1;
6282
0
        }
6283
1
        TxnLabelPB txn_label;
6284
1
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6285
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6286
0
            return -1;
6287
0
        }
6288
1
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6289
1
        if (it != txn_label.txn_ids().end()) {
6290
1
            txn_label.mutable_txn_ids()->erase(it);
6291
1
        }
6292
1
        if (txn_label.txn_ids().empty()) {
6293
1
            txn->remove(label_key);
6294
1
            TEST_SYNC_POINT_CALLBACK(
6295
1
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6296
1
        } else {
6297
0
            if (!txn_label.SerializeToString(&label_val)) {
6298
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6299
0
                return -1;
6300
0
            }
6301
0
            TEST_SYNC_POINT_CALLBACK(
6302
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6303
0
            txn->atomic_set_ver_value(label_key, label_val);
6304
0
            TEST_SYNC_POINT_CALLBACK(
6305
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6306
0
        }
6307
        // Remove recycle txn kv
6308
1
        txn->remove(k);
6309
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6310
1
        err = txn->commit();
6311
1
        if (err != TxnErrorCode::TXN_OK) {
6312
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
6313
0
                TEST_SYNC_POINT_CALLBACK(
6314
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6315
                // log the txn_id and label
6316
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6317
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6318
0
                             << " txn_label=" << txn_info.label();
6319
0
                return 1;
6320
0
            }
6321
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6322
0
            return -1;
6323
0
        }
6324
1
        ++num_recycled;
6325
6326
1
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6327
1
        return 0;
6328
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6229
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6230
23.0k
        std::string_view k1 = k;
6231
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6232
23.0k
        k1.remove_prefix(1); // Remove key space
6233
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6234
23.0k
        int ret = decode_key(&k1, &out);
6235
23.0k
        if (ret != 0) {
6236
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6237
0
            return -1;
6238
0
        }
6239
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6240
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6241
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6242
23.0k
        std::unique_ptr<Transaction> txn;
6243
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6244
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6245
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6246
0
            return -1;
6247
0
        }
6248
        // Remove txn index kv
6249
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6250
23.0k
        txn->remove(index_key);
6251
        // Remove txn info kv
6252
23.0k
        std::string info_key, info_val;
6253
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6254
23.0k
        err = txn->get(info_key, &info_val);
6255
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6256
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6257
0
            return -1;
6258
0
        }
6259
23.0k
        TxnInfoPB txn_info;
6260
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6261
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6262
0
            return -1;
6263
0
        }
6264
23.0k
        txn->remove(info_key);
6265
        // Remove sub txn index kvs
6266
23.0k
        std::vector<std::string> sub_txn_index_keys;
6267
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6268
23.0k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6269
23.0k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6270
23.0k
        }
6271
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6272
22.9k
            txn->remove(sub_txn_index_key);
6273
22.9k
        }
6274
        // Update txn label
6275
23.0k
        std::string label_key, label_val;
6276
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6277
23.0k
        err = txn->get(label_key, &label_val);
6278
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6279
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6280
0
                         << " err=" << err;
6281
0
            return -1;
6282
0
        }
6283
23.0k
        TxnLabelPB txn_label;
6284
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6285
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6286
0
            return -1;
6287
0
        }
6288
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6289
23.0k
        if (it != txn_label.txn_ids().end()) {
6290
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6291
23.0k
        }
6292
23.0k
        if (txn_label.txn_ids().empty()) {
6293
23.0k
            txn->remove(label_key);
6294
23.0k
            TEST_SYNC_POINT_CALLBACK(
6295
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6296
23.0k
        } else {
6297
73
            if (!txn_label.SerializeToString(&label_val)) {
6298
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6299
0
                return -1;
6300
0
            }
6301
73
            TEST_SYNC_POINT_CALLBACK(
6302
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6303
73
            txn->atomic_set_ver_value(label_key, label_val);
6304
73
            TEST_SYNC_POINT_CALLBACK(
6305
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6306
73
        }
6307
        // Remove recycle txn kv
6308
23.0k
        txn->remove(k);
6309
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6310
23.0k
        err = txn->commit();
6311
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6312
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6313
62
                TEST_SYNC_POINT_CALLBACK(
6314
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6315
                // log the txn_id and label
6316
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6317
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6318
62
                             << " txn_label=" << txn_info.label();
6319
62
                return 1;
6320
62
            }
6321
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6322
0
            return -1;
6323
62
        }
6324
23.0k
        ++num_recycled;
6325
6326
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6327
23.0k
        return 0;
6328
23.0k
    };
6329
6330
19
    auto loop_done = [&]() -> int {
6331
10
        DORIS_CLOUD_DEFER {
6332
10
            recycle_txn_info_keys.clear();
6333
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6331
1
        DORIS_CLOUD_DEFER {
6332
1
            recycle_txn_info_keys.clear();
6333
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6331
9
        DORIS_CLOUD_DEFER {
6332
9
            recycle_txn_info_keys.clear();
6333
9
        };
6334
10
        TEST_SYNC_POINT_CALLBACK(
6335
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6336
10
                &recycle_txn_info_keys);
6337
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6338
23.0k
            concurrent_delete_executor.add([&]() {
6339
23.0k
                int ret = delete_recycle_txn_kv(k);
6340
23.0k
                if (ret == 1) {
6341
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6342
54
                    for (int i = 1; i <= max_retry; ++i) {
6343
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6344
54
                        ret = delete_recycle_txn_kv(k);
6345
                        // clang-format off
6346
54
                        TEST_SYNC_POINT_CALLBACK(
6347
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6348
                        // clang-format off
6349
54
                        if (ret != 1) {
6350
18
                            break;
6351
18
                        }
6352
                        // random sleep 0-100 ms to retry
6353
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6354
36
                    }
6355
18
                }
6356
23.0k
                if (ret != 0) {
6357
9
                    LOG_WARNING("failed to delete recycle txn kv")
6358
9
                            .tag("instance id", instance_id_)
6359
9
                            .tag("key", hex(k));
6360
9
                    return -1;
6361
9
                }
6362
23.0k
                return 0;
6363
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6338
1
            concurrent_delete_executor.add([&]() {
6339
1
                int ret = delete_recycle_txn_kv(k);
6340
1
                if (ret == 1) {
6341
0
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6342
0
                    for (int i = 1; i <= max_retry; ++i) {
6343
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6344
0
                        ret = delete_recycle_txn_kv(k);
6345
                        // clang-format off
6346
0
                        TEST_SYNC_POINT_CALLBACK(
6347
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6348
                        // clang-format off
6349
0
                        if (ret != 1) {
6350
0
                            break;
6351
0
                        }
6352
                        // random sleep 0-100 ms to retry
6353
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6354
0
                    }
6355
0
                }
6356
1
                if (ret != 0) {
6357
0
                    LOG_WARNING("failed to delete recycle txn kv")
6358
0
                            .tag("instance id", instance_id_)
6359
0
                            .tag("key", hex(k));
6360
0
                    return -1;
6361
0
                }
6362
1
                return 0;
6363
1
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6338
23.0k
            concurrent_delete_executor.add([&]() {
6339
23.0k
                int ret = delete_recycle_txn_kv(k);
6340
23.0k
                if (ret == 1) {
6341
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6342
54
                    for (int i = 1; i <= max_retry; ++i) {
6343
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6344
54
                        ret = delete_recycle_txn_kv(k);
6345
                        // clang-format off
6346
54
                        TEST_SYNC_POINT_CALLBACK(
6347
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6348
                        // clang-format off
6349
54
                        if (ret != 1) {
6350
18
                            break;
6351
18
                        }
6352
                        // random sleep 0-100 ms to retry
6353
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6354
36
                    }
6355
18
                }
6356
23.0k
                if (ret != 0) {
6357
9
                    LOG_WARNING("failed to delete recycle txn kv")
6358
9
                            .tag("instance id", instance_id_)
6359
9
                            .tag("key", hex(k));
6360
9
                    return -1;
6361
9
                }
6362
23.0k
                return 0;
6363
23.0k
            });
6364
23.0k
        }
6365
10
        bool finished = true;
6366
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6367
23.0k
        for (int r : rets) {
6368
23.0k
            if (r != 0) {
6369
9
                ret = -1;
6370
9
            }
6371
23.0k
        }
6372
6373
10
        ret = finished ? ret : -1;
6374
6375
        // Update metrics after all concurrent tasks completed
6376
10
        metrics_context.total_recycled_num = num_recycled.load();
6377
10
        metrics_context.report();
6378
6379
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6380
6381
10
        if (ret != 0) {
6382
3
            LOG_WARNING("recycle txn kv ret!=0")
6383
3
                    .tag("finished", finished)
6384
3
                    .tag("ret", ret)
6385
3
                    .tag("instance_id", instance_id_);
6386
3
            return ret;
6387
3
        }
6388
7
        return ret;
6389
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6330
1
    auto loop_done = [&]() -> int {
6331
1
        DORIS_CLOUD_DEFER {
6332
1
            recycle_txn_info_keys.clear();
6333
1
        };
6334
1
        TEST_SYNC_POINT_CALLBACK(
6335
1
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6336
1
                &recycle_txn_info_keys);
6337
1
        for (const auto& k : recycle_txn_info_keys) {
6338
1
            concurrent_delete_executor.add([&]() {
6339
1
                int ret = delete_recycle_txn_kv(k);
6340
1
                if (ret == 1) {
6341
1
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6342
1
                    for (int i = 1; i <= max_retry; ++i) {
6343
1
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6344
1
                        ret = delete_recycle_txn_kv(k);
6345
                        // clang-format off
6346
1
                        TEST_SYNC_POINT_CALLBACK(
6347
1
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6348
                        // clang-format off
6349
1
                        if (ret != 1) {
6350
1
                            break;
6351
1
                        }
6352
                        // random sleep 0-100 ms to retry
6353
1
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6354
1
                    }
6355
1
                }
6356
1
                if (ret != 0) {
6357
1
                    LOG_WARNING("failed to delete recycle txn kv")
6358
1
                            .tag("instance id", instance_id_)
6359
1
                            .tag("key", hex(k));
6360
1
                    return -1;
6361
1
                }
6362
1
                return 0;
6363
1
            });
6364
1
        }
6365
1
        bool finished = true;
6366
1
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6367
1
        for (int r : rets) {
6368
1
            if (r != 0) {
6369
0
                ret = -1;
6370
0
            }
6371
1
        }
6372
6373
1
        ret = finished ? ret : -1;
6374
6375
        // Update metrics after all concurrent tasks completed
6376
1
        metrics_context.total_recycled_num = num_recycled.load();
6377
1
        metrics_context.report();
6378
6379
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6380
6381
1
        if (ret != 0) {
6382
0
            LOG_WARNING("recycle txn kv ret!=0")
6383
0
                    .tag("finished", finished)
6384
0
                    .tag("ret", ret)
6385
0
                    .tag("instance_id", instance_id_);
6386
0
            return ret;
6387
0
        }
6388
1
        return ret;
6389
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6330
9
    auto loop_done = [&]() -> int {
6331
9
        DORIS_CLOUD_DEFER {
6332
9
            recycle_txn_info_keys.clear();
6333
9
        };
6334
9
        TEST_SYNC_POINT_CALLBACK(
6335
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6336
9
                &recycle_txn_info_keys);
6337
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6338
23.0k
            concurrent_delete_executor.add([&]() {
6339
23.0k
                int ret = delete_recycle_txn_kv(k);
6340
23.0k
                if (ret == 1) {
6341
23.0k
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6342
23.0k
                    for (int i = 1; i <= max_retry; ++i) {
6343
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6344
23.0k
                        ret = delete_recycle_txn_kv(k);
6345
                        // clang-format off
6346
23.0k
                        TEST_SYNC_POINT_CALLBACK(
6347
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6348
                        // clang-format off
6349
23.0k
                        if (ret != 1) {
6350
23.0k
                            break;
6351
23.0k
                        }
6352
                        // random sleep 0-100 ms to retry
6353
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6354
23.0k
                    }
6355
23.0k
                }
6356
23.0k
                if (ret != 0) {
6357
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
6358
23.0k
                            .tag("instance id", instance_id_)
6359
23.0k
                            .tag("key", hex(k));
6360
23.0k
                    return -1;
6361
23.0k
                }
6362
23.0k
                return 0;
6363
23.0k
            });
6364
23.0k
        }
6365
9
        bool finished = true;
6366
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6367
23.0k
        for (int r : rets) {
6368
23.0k
            if (r != 0) {
6369
9
                ret = -1;
6370
9
            }
6371
23.0k
        }
6372
6373
9
        ret = finished ? ret : -1;
6374
6375
        // Update metrics after all concurrent tasks completed
6376
9
        metrics_context.total_recycled_num = num_recycled.load();
6377
9
        metrics_context.report();
6378
6379
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6380
6381
9
        if (ret != 0) {
6382
3
            LOG_WARNING("recycle txn kv ret!=0")
6383
3
                    .tag("finished", finished)
6384
3
                    .tag("ret", ret)
6385
3
                    .tag("instance_id", instance_id_);
6386
3
            return ret;
6387
3
        }
6388
6
        return ret;
6389
9
    };
6390
6391
19
    if (config::enable_recycler_stats_metrics) {
6392
0
        scan_and_statistics_expired_txn_label();
6393
0
    }
6394
    // recycle_func and loop_done for scan and recycle
6395
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
6396
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
6397
19
}
6398
6399
struct CopyJobIdTuple {
6400
    std::string instance_id;
6401
    std::string stage_id;
6402
    long table_id;
6403
    std::string copy_id;
6404
    std::string stage_path;
6405
};
6406
struct BatchObjStoreAccessor {
6407
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
6408
                          TxnKv* txn_kv)
6409
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
6410
3
    ~BatchObjStoreAccessor() {
6411
3
        if (!paths_.empty()) {
6412
3
            consume();
6413
3
        }
6414
3
    }
6415
6416
    /**
6417
    * To implicitely do batch work and submit the batch delete task to s3
6418
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
6419
    *
6420
    * @param copy_job The protubuf struct consists of the copy job files.
6421
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
6422
    *            it would last until we finish the delete task, here we need pass one string value
6423
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
6424
    */
6425
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
6426
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
6427
5
        auto& file_keys = copy_file_keys_[key];
6428
5
        file_keys.log_trace =
6429
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
6430
5
                            instance_id, stage_id, table_id, copy_id, path);
6431
5
        std::string_view log_trace = file_keys.log_trace;
6432
2.03k
        for (const auto& file : copy_job.object_files()) {
6433
2.03k
            auto relative_path = file.relative_path();
6434
2.03k
            paths_.push_back(relative_path);
6435
2.03k
            file_keys.keys.push_back(copy_file_key(
6436
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
6437
2.03k
            LOG_INFO(log_trace)
6438
2.03k
                    .tag("relative_path", relative_path)
6439
2.03k
                    .tag("batch_count", batch_count_);
6440
2.03k
        }
6441
5
        LOG_INFO(log_trace)
6442
5
                .tag("objects_num", copy_job.object_files().size())
6443
5
                .tag("batch_count", batch_count_);
6444
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
6445
        // recommend using delete objects when objects num is less than 10)
6446
5
        if (paths_.size() < 1000) {
6447
3
            return;
6448
3
        }
6449
2
        consume();
6450
2
    }
6451
6452
private:
6453
5
    void consume() {
6454
5
        DORIS_CLOUD_DEFER {
6455
5
            paths_.clear();
6456
5
            copy_file_keys_.clear();
6457
5
            batch_count_++;
6458
6459
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
6460
5
                        batch_count_);
6461
5
        };
6462
6463
5
        StopWatch sw;
6464
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
6465
5
        if (0 != accessor_->delete_files(paths_)) {
6466
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
6467
2
                        paths_.size(), batch_count_, sw.elapsed_us());
6468
2
            return;
6469
2
        }
6470
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
6471
3
                    paths_.size(), batch_count_, sw.elapsed_us());
6472
        // delete fdb's keys
6473
3
        for (auto& file_keys : copy_file_keys_) {
6474
3
            auto& [log_trace, keys] = file_keys.second;
6475
3
            std::unique_ptr<Transaction> txn;
6476
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
6477
0
                LOG(WARNING) << "failed to create txn";
6478
0
                continue;
6479
0
            }
6480
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6481
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6482
            // limited, should not cause the txn commit failed.
6483
1.02k
            for (const auto& key : keys) {
6484
1.02k
                txn->remove(key);
6485
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
6486
1.02k
            }
6487
3
            txn->remove(file_keys.first);
6488
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
6489
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
6490
0
                continue;
6491
0
            }
6492
3
        }
6493
3
    }
6494
    std::shared_ptr<StorageVaultAccessor> accessor_;
6495
    // the path of the s3 files to be deleted
6496
    std::vector<std::string> paths_;
6497
    struct CopyFiles {
6498
        std::string log_trace;
6499
        std::vector<std::string> keys;
6500
    };
6501
    // pair<std::string, std::vector<std::string>>
6502
    // first: instance_id_ stage_id table_id query_id
6503
    // second: keys to be deleted
6504
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
6505
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
6506
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
6507
    // which can together uniquely identifies different tasks for tracing log
6508
    uint64_t& batch_count_;
6509
    TxnKv* txn_kv_;
6510
};
6511
6512
13
int InstanceRecycler::recycle_copy_jobs() {
6513
13
    int64_t num_scanned = 0;
6514
13
    int64_t num_finished = 0;
6515
13
    int64_t num_expired = 0;
6516
13
    int64_t num_recycled = 0;
6517
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
6518
13
    uint64_t batch_count = 0;
6519
13
    const std::string task_name = "recycle_copy_jobs";
6520
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6521
6522
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
6523
6524
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6525
13
    register_recycle_task(task_name, start_time);
6526
6527
13
    DORIS_CLOUD_DEFER {
6528
13
        unregister_recycle_task(task_name);
6529
13
        int64_t cost =
6530
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6531
13
        metrics_context.finish_report();
6532
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6533
13
                .tag("instance_id", instance_id_)
6534
13
                .tag("num_scanned", num_scanned)
6535
13
                .tag("num_finished", num_finished)
6536
13
                .tag("num_expired", num_expired)
6537
13
                .tag("num_recycled", num_recycled);
6538
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
6527
13
    DORIS_CLOUD_DEFER {
6528
13
        unregister_recycle_task(task_name);
6529
13
        int64_t cost =
6530
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6531
13
        metrics_context.finish_report();
6532
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6533
13
                .tag("instance_id", instance_id_)
6534
13
                .tag("num_scanned", num_scanned)
6535
13
                .tag("num_finished", num_finished)
6536
13
                .tag("num_expired", num_expired)
6537
13
                .tag("num_recycled", num_recycled);
6538
13
    };
6539
6540
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6541
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6542
13
    std::string key0;
6543
13
    std::string key1;
6544
13
    copy_job_key(key_info0, &key0);
6545
13
    copy_job_key(key_info1, &key1);
6546
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
6547
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
6548
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
6549
16
                         this](std::string_view k, std::string_view v) -> int {
6550
16
        ++num_scanned;
6551
16
        CopyJobPB copy_job;
6552
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6553
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6554
0
            return -1;
6555
0
        }
6556
6557
        // decode copy job key
6558
16
        auto k1 = k;
6559
16
        k1.remove_prefix(1);
6560
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6561
16
        decode_key(&k1, &out);
6562
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6563
        // -> CopyJobPB
6564
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6565
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6566
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6567
6568
16
        bool check_storage = true;
6569
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6570
12
            ++num_finished;
6571
6572
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6573
7
                auto it = stage_accessor_map.find(stage_id);
6574
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6575
7
                std::string_view path;
6576
7
                if (it != stage_accessor_map.end()) {
6577
2
                    accessor = it->second;
6578
5
                } else {
6579
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6580
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6581
5
                                                      &inner_accessor);
6582
5
                    if (ret < 0) { // error
6583
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6584
0
                        return -1;
6585
5
                    } else if (ret == 0) {
6586
3
                        path = inner_accessor->uri();
6587
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6588
3
                                inner_accessor, batch_count, txn_kv_.get());
6589
3
                        stage_accessor_map.emplace(stage_id, accessor);
6590
3
                    } else { // stage not found, skip check storage
6591
2
                        check_storage = false;
6592
2
                    }
6593
5
                }
6594
7
                if (check_storage) {
6595
                    // TODO delete objects with key and etag is not supported
6596
5
                    accessor->add(std::move(copy_job), std::string(k),
6597
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6598
5
                    return 0;
6599
5
                }
6600
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6601
5
                int64_t current_time =
6602
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6603
5
                if (copy_job.finish_time_ms() > 0) {
6604
2
                    if (!config::force_immediate_recycle &&
6605
2
                        current_time < copy_job.finish_time_ms() +
6606
2
                                               config::copy_job_max_retention_second * 1000) {
6607
1
                        return 0;
6608
1
                    }
6609
3
                } else {
6610
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6611
3
                    if (!config::force_immediate_recycle &&
6612
3
                        current_time < copy_job.start_time_ms() +
6613
3
                                               config::copy_job_max_retention_second * 1000) {
6614
1
                        return 0;
6615
1
                    }
6616
3
                }
6617
5
            }
6618
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6619
4
            int64_t current_time =
6620
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6621
            // if copy job is timeout: delete all copy file kvs and copy job kv
6622
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6623
2
                return 0;
6624
2
            }
6625
2
            ++num_expired;
6626
2
        }
6627
6628
        // delete all copy files
6629
7
        std::vector<std::string> copy_file_keys;
6630
70
        for (auto& file : copy_job.object_files()) {
6631
70
            copy_file_keys.push_back(copy_file_key(
6632
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6633
70
        }
6634
7
        std::unique_ptr<Transaction> txn;
6635
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6636
0
            LOG(WARNING) << "failed to create txn";
6637
0
            return -1;
6638
0
        }
6639
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6640
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6641
        // limited, should not cause the txn commit failed.
6642
70
        for (const auto& key : copy_file_keys) {
6643
70
            txn->remove(key);
6644
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6645
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6646
70
                      << ", query_id=" << copy_id;
6647
70
        }
6648
7
        txn->remove(k);
6649
7
        TxnErrorCode err = txn->commit();
6650
7
        if (err != TxnErrorCode::TXN_OK) {
6651
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6652
0
            return -1;
6653
0
        }
6654
6655
7
        metrics_context.total_recycled_num = ++num_recycled;
6656
7
        metrics_context.report();
6657
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6658
7
        return 0;
6659
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6549
16
                         this](std::string_view k, std::string_view v) -> int {
6550
16
        ++num_scanned;
6551
16
        CopyJobPB copy_job;
6552
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6553
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6554
0
            return -1;
6555
0
        }
6556
6557
        // decode copy job key
6558
16
        auto k1 = k;
6559
16
        k1.remove_prefix(1);
6560
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6561
16
        decode_key(&k1, &out);
6562
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6563
        // -> CopyJobPB
6564
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6565
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6566
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6567
6568
16
        bool check_storage = true;
6569
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6570
12
            ++num_finished;
6571
6572
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6573
7
                auto it = stage_accessor_map.find(stage_id);
6574
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6575
7
                std::string_view path;
6576
7
                if (it != stage_accessor_map.end()) {
6577
2
                    accessor = it->second;
6578
5
                } else {
6579
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6580
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6581
5
                                                      &inner_accessor);
6582
5
                    if (ret < 0) { // error
6583
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6584
0
                        return -1;
6585
5
                    } else if (ret == 0) {
6586
3
                        path = inner_accessor->uri();
6587
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6588
3
                                inner_accessor, batch_count, txn_kv_.get());
6589
3
                        stage_accessor_map.emplace(stage_id, accessor);
6590
3
                    } else { // stage not found, skip check storage
6591
2
                        check_storage = false;
6592
2
                    }
6593
5
                }
6594
7
                if (check_storage) {
6595
                    // TODO delete objects with key and etag is not supported
6596
5
                    accessor->add(std::move(copy_job), std::string(k),
6597
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6598
5
                    return 0;
6599
5
                }
6600
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6601
5
                int64_t current_time =
6602
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6603
5
                if (copy_job.finish_time_ms() > 0) {
6604
2
                    if (!config::force_immediate_recycle &&
6605
2
                        current_time < copy_job.finish_time_ms() +
6606
2
                                               config::copy_job_max_retention_second * 1000) {
6607
1
                        return 0;
6608
1
                    }
6609
3
                } else {
6610
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6611
3
                    if (!config::force_immediate_recycle &&
6612
3
                        current_time < copy_job.start_time_ms() +
6613
3
                                               config::copy_job_max_retention_second * 1000) {
6614
1
                        return 0;
6615
1
                    }
6616
3
                }
6617
5
            }
6618
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6619
4
            int64_t current_time =
6620
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6621
            // if copy job is timeout: delete all copy file kvs and copy job kv
6622
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6623
2
                return 0;
6624
2
            }
6625
2
            ++num_expired;
6626
2
        }
6627
6628
        // delete all copy files
6629
7
        std::vector<std::string> copy_file_keys;
6630
70
        for (auto& file : copy_job.object_files()) {
6631
70
            copy_file_keys.push_back(copy_file_key(
6632
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6633
70
        }
6634
7
        std::unique_ptr<Transaction> txn;
6635
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6636
0
            LOG(WARNING) << "failed to create txn";
6637
0
            return -1;
6638
0
        }
6639
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6640
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6641
        // limited, should not cause the txn commit failed.
6642
70
        for (const auto& key : copy_file_keys) {
6643
70
            txn->remove(key);
6644
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6645
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6646
70
                      << ", query_id=" << copy_id;
6647
70
        }
6648
7
        txn->remove(k);
6649
7
        TxnErrorCode err = txn->commit();
6650
7
        if (err != TxnErrorCode::TXN_OK) {
6651
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6652
0
            return -1;
6653
0
        }
6654
6655
7
        metrics_context.total_recycled_num = ++num_recycled;
6656
7
        metrics_context.report();
6657
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6658
7
        return 0;
6659
7
    };
6660
6661
13
    if (config::enable_recycler_stats_metrics) {
6662
0
        scan_and_statistics_copy_jobs();
6663
0
    }
6664
    // recycle_func and loop_done for scan and recycle
6665
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
6666
13
}
6667
6668
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
6669
                                             const StagePB::StageType& stage_type,
6670
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
6671
5
#ifdef UNIT_TEST
6672
    // In unit test, external use the same accessor as the internal stage
6673
5
    auto it = accessor_map_.find(stage_id);
6674
5
    if (it != accessor_map_.end()) {
6675
3
        *accessor = it->second;
6676
3
    } else {
6677
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
6678
2
        return 1;
6679
2
    }
6680
#else
6681
    // init s3 accessor and add to accessor map
6682
    auto stage_it =
6683
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
6684
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
6685
6686
    if (stage_it == instance_info_.stages().end()) {
6687
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
6688
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
6689
        return 1;
6690
    }
6691
6692
    const auto& object_store_info = stage_it->obj_info();
6693
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
6694
6695
    S3Conf s3_conf;
6696
    if (stage_type == StagePB::EXTERNAL) {
6697
        if (stage_access_type == StagePB::AKSK) {
6698
            auto conf = S3Conf::from_obj_store_info(object_store_info);
6699
            if (!conf) {
6700
                return -1;
6701
            }
6702
6703
            s3_conf = std::move(*conf);
6704
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
6705
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
6706
            if (!conf) {
6707
                return -1;
6708
            }
6709
6710
            s3_conf = std::move(*conf);
6711
            if (instance_info_.ram_user().has_encryption_info()) {
6712
                AkSkPair plain_ak_sk_pair;
6713
                int ret = decrypt_ak_sk_helper(
6714
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
6715
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
6716
                if (ret != 0) {
6717
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
6718
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
6719
                    return -1;
6720
                }
6721
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
6722
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
6723
            } else {
6724
                s3_conf.ak = instance_info_.ram_user().ak();
6725
                s3_conf.sk = instance_info_.ram_user().sk();
6726
            }
6727
        } else {
6728
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
6729
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
6730
            return -1;
6731
        }
6732
    } else if (stage_type == StagePB::INTERNAL) {
6733
        int idx = stoi(object_store_info.id());
6734
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6735
            LOG(WARNING) << "invalid idx: " << idx;
6736
            return -1;
6737
        }
6738
6739
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6740
        auto conf = S3Conf::from_obj_store_info(old_obj);
6741
        if (!conf) {
6742
            return -1;
6743
        }
6744
6745
        s3_conf = std::move(*conf);
6746
        s3_conf.prefix = object_store_info.prefix();
6747
    } else {
6748
        LOG(WARNING) << "unknown stage type " << stage_type;
6749
        return -1;
6750
    }
6751
6752
    std::shared_ptr<S3Accessor> s3_accessor;
6753
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
6754
    if (ret != 0) {
6755
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
6756
        return -1;
6757
    }
6758
6759
    *accessor = std::move(s3_accessor);
6760
#endif
6761
3
    return 0;
6762
5
}
6763
6764
11
int InstanceRecycler::recycle_stage() {
6765
11
    int64_t num_scanned = 0;
6766
11
    int64_t num_recycled = 0;
6767
11
    const std::string task_name = "recycle_stage";
6768
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6769
6770
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
6771
6772
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6773
11
    register_recycle_task(task_name, start_time);
6774
6775
11
    DORIS_CLOUD_DEFER {
6776
11
        unregister_recycle_task(task_name);
6777
11
        int64_t cost =
6778
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6779
11
        metrics_context.finish_report();
6780
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6781
11
                .tag("instance_id", instance_id_)
6782
11
                .tag("num_scanned", num_scanned)
6783
11
                .tag("num_recycled", num_recycled);
6784
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
6775
11
    DORIS_CLOUD_DEFER {
6776
11
        unregister_recycle_task(task_name);
6777
11
        int64_t cost =
6778
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6779
11
        metrics_context.finish_report();
6780
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6781
11
                .tag("instance_id", instance_id_)
6782
11
                .tag("num_scanned", num_scanned)
6783
11
                .tag("num_recycled", num_recycled);
6784
11
    };
6785
6786
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6787
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6788
11
    std::string key0 = recycle_stage_key(key_info0);
6789
11
    std::string key1 = recycle_stage_key(key_info1);
6790
6791
11
    std::vector<std::string_view> stage_keys;
6792
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
6793
11
                         this](std::string_view k, std::string_view v) -> int {
6794
1
        ++num_scanned;
6795
1
        RecycleStagePB recycle_stage;
6796
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6797
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6798
0
            return -1;
6799
0
        }
6800
6801
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6802
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6803
0
            LOG(WARNING) << "invalid idx: " << idx;
6804
0
            return -1;
6805
0
        }
6806
6807
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6808
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6809
1
                [&] {
6810
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6811
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6812
1
                    if (!s3_conf) {
6813
1
                        return -1;
6814
1
                    }
6815
6816
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6817
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6818
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6819
1
                    if (ret != 0) {
6820
1
                        return -1;
6821
1
                    }
6822
6823
1
                    accessor = std::move(s3_accessor);
6824
1
                    return 0;
6825
1
                }(),
6826
1
                "recycle_stage:get_accessor", &accessor);
6827
6828
1
        if (ret != 0) {
6829
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6830
0
            return ret;
6831
0
        }
6832
6833
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6834
1
                .tag("instance_id", instance_id_)
6835
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6836
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6837
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6838
1
                .tag("obj_info_id", idx)
6839
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6840
1
        ret = accessor->delete_all();
6841
1
        if (ret != 0) {
6842
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6843
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6844
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6845
0
                         << ", ret=" << ret;
6846
0
            return -1;
6847
0
        }
6848
1
        metrics_context.total_recycled_num = ++num_recycled;
6849
1
        metrics_context.report();
6850
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6851
1
        stage_keys.push_back(k);
6852
1
        return 0;
6853
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6793
1
                         this](std::string_view k, std::string_view v) -> int {
6794
1
        ++num_scanned;
6795
1
        RecycleStagePB recycle_stage;
6796
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6797
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6798
0
            return -1;
6799
0
        }
6800
6801
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6802
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6803
0
            LOG(WARNING) << "invalid idx: " << idx;
6804
0
            return -1;
6805
0
        }
6806
6807
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6808
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6809
1
                [&] {
6810
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6811
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6812
1
                    if (!s3_conf) {
6813
1
                        return -1;
6814
1
                    }
6815
6816
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6817
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6818
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6819
1
                    if (ret != 0) {
6820
1
                        return -1;
6821
1
                    }
6822
6823
1
                    accessor = std::move(s3_accessor);
6824
1
                    return 0;
6825
1
                }(),
6826
1
                "recycle_stage:get_accessor", &accessor);
6827
6828
1
        if (ret != 0) {
6829
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6830
0
            return ret;
6831
0
        }
6832
6833
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6834
1
                .tag("instance_id", instance_id_)
6835
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6836
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6837
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6838
1
                .tag("obj_info_id", idx)
6839
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6840
1
        ret = accessor->delete_all();
6841
1
        if (ret != 0) {
6842
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6843
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6844
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6845
0
                         << ", ret=" << ret;
6846
0
            return -1;
6847
0
        }
6848
1
        metrics_context.total_recycled_num = ++num_recycled;
6849
1
        metrics_context.report();
6850
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6851
1
        stage_keys.push_back(k);
6852
1
        return 0;
6853
1
    };
6854
6855
11
    auto loop_done = [&stage_keys, this]() -> int {
6856
1
        if (stage_keys.empty()) return 0;
6857
1
        DORIS_CLOUD_DEFER {
6858
1
            stage_keys.clear();
6859
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6857
1
        DORIS_CLOUD_DEFER {
6858
1
            stage_keys.clear();
6859
1
        };
6860
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6861
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6862
0
            return -1;
6863
0
        }
6864
1
        return 0;
6865
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
6855
1
    auto loop_done = [&stage_keys, this]() -> int {
6856
1
        if (stage_keys.empty()) return 0;
6857
1
        DORIS_CLOUD_DEFER {
6858
1
            stage_keys.clear();
6859
1
        };
6860
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6861
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6862
0
            return -1;
6863
0
        }
6864
1
        return 0;
6865
1
    };
6866
11
    if (config::enable_recycler_stats_metrics) {
6867
0
        scan_and_statistics_stage();
6868
0
    }
6869
    // recycle_func and loop_done for scan and recycle
6870
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
6871
11
}
6872
6873
10
int InstanceRecycler::recycle_expired_stage_objects() {
6874
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
6875
6876
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6877
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6878
6879
10
    DORIS_CLOUD_DEFER {
6880
10
        int64_t cost =
6881
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6882
10
        metrics_context.finish_report();
6883
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6884
10
                .tag("instance_id", instance_id_);
6885
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
6879
10
    DORIS_CLOUD_DEFER {
6880
10
        int64_t cost =
6881
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6882
10
        metrics_context.finish_report();
6883
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6884
10
                .tag("instance_id", instance_id_);
6885
10
    };
6886
6887
10
    int ret = 0;
6888
6889
10
    if (config::enable_recycler_stats_metrics) {
6890
0
        scan_and_statistics_expired_stage_objects();
6891
0
    }
6892
6893
10
    for (const auto& stage : instance_info_.stages()) {
6894
0
        std::stringstream ss;
6895
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
6896
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
6897
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
6898
0
           << ", prefix=" << stage.obj_info().prefix();
6899
6900
0
        if (stopped()) {
6901
0
            break;
6902
0
        }
6903
0
        if (stage.type() == StagePB::EXTERNAL) {
6904
0
            continue;
6905
0
        }
6906
0
        int idx = stoi(stage.obj_info().id());
6907
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6908
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
6909
0
            continue;
6910
0
        }
6911
6912
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6913
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6914
0
        if (!s3_conf) {
6915
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
6916
0
            continue;
6917
0
        }
6918
6919
0
        s3_conf->prefix = stage.obj_info().prefix();
6920
0
        std::shared_ptr<S3Accessor> accessor;
6921
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
6922
0
        if (ret1 != 0) {
6923
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
6924
0
            ret = -1;
6925
0
            continue;
6926
0
        }
6927
6928
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6929
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
6930
0
            ret = -1;
6931
0
            continue;
6932
0
        }
6933
6934
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
6935
0
        int64_t expiration_time =
6936
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
6937
0
                config::internal_stage_objects_expire_time_second;
6938
0
        if (config::force_immediate_recycle) {
6939
0
            expiration_time = INT64_MAX;
6940
0
        }
6941
0
        ret1 = accessor->delete_all(expiration_time);
6942
0
        if (ret1 != 0) {
6943
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
6944
0
                         << ss.str();
6945
0
            ret = -1;
6946
0
            continue;
6947
0
        }
6948
0
        metrics_context.total_recycled_num++;
6949
0
        metrics_context.report();
6950
0
    }
6951
10
    return ret;
6952
10
}
6953
6954
193
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
6955
193
    std::lock_guard lock(recycle_tasks_mutex);
6956
193
    running_recycle_tasks[task_name] = start_time;
6957
193
}
6958
6959
193
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
6960
193
    std::lock_guard lock(recycle_tasks_mutex);
6961
193
    DCHECK(running_recycle_tasks[task_name] > 0);
6962
193
    running_recycle_tasks.erase(task_name);
6963
193
}
6964
6965
21
bool InstanceRecycler::check_recycle_tasks() {
6966
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
6967
21
    {
6968
21
        std::lock_guard lock(recycle_tasks_mutex);
6969
21
        tmp_running_recycle_tasks = running_recycle_tasks;
6970
21
    }
6971
6972
21
    bool found = false;
6973
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6974
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
6975
20
        int64_t cost = now - start_time;
6976
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
6977
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
6978
20
                    .tag("instance_id", instance_id_)
6979
20
                    .tag("task", task_name);
6980
20
            found = true;
6981
20
        }
6982
20
    }
6983
6984
21
    return found;
6985
21
}
6986
6987
// Scan and statistics indexes that need to be recycled
6988
0
int InstanceRecycler::scan_and_statistics_indexes() {
6989
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
6990
6991
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
6992
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
6993
0
    std::string index_key0;
6994
0
    std::string index_key1;
6995
0
    recycle_index_key(index_key_info0, &index_key0);
6996
0
    recycle_index_key(index_key_info1, &index_key1);
6997
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6998
6999
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
7000
0
        RecycleIndexPB index_pb;
7001
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
7002
0
            return 0;
7003
0
        }
7004
0
        int64_t current_time = ::time(nullptr);
7005
0
        if (current_time <
7006
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
7007
0
            return 0;
7008
0
        }
7009
        // decode index_id
7010
0
        auto k1 = k;
7011
0
        k1.remove_prefix(1);
7012
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7013
0
        decode_key(&k1, &out);
7014
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
7015
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
7016
0
        std::unique_ptr<Transaction> txn;
7017
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7018
0
        if (err != TxnErrorCode::TXN_OK) {
7019
0
            return 0;
7020
0
        }
7021
0
        std::string val;
7022
0
        err = txn->get(k, &val);
7023
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7024
0
            return 0;
7025
0
        }
7026
0
        if (err != TxnErrorCode::TXN_OK) {
7027
0
            return 0;
7028
0
        }
7029
0
        index_pb.Clear();
7030
0
        if (!index_pb.ParseFromString(val)) {
7031
0
            return 0;
7032
0
        }
7033
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
7034
0
            return 0;
7035
0
        }
7036
0
        metrics_context.total_need_recycle_num++;
7037
0
        return 0;
7038
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7039
7040
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
7041
0
    metrics_context.report(true);
7042
0
    segment_metrics_context_.report(true);
7043
0
    tablet_metrics_context_.report(true);
7044
0
    return ret;
7045
0
}
7046
7047
// Scan and statistics partitions that need to be recycled
7048
0
int InstanceRecycler::scan_and_statistics_partitions() {
7049
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
7050
7051
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
7052
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
7053
0
    std::string part_key0;
7054
0
    std::string part_key1;
7055
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7056
7057
0
    recycle_partition_key(part_key_info0, &part_key0);
7058
0
    recycle_partition_key(part_key_info1, &part_key1);
7059
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
7060
0
        RecyclePartitionPB part_pb;
7061
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
7062
0
            return 0;
7063
0
        }
7064
0
        int64_t current_time = ::time(nullptr);
7065
0
        if (current_time <
7066
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
7067
0
            return 0;
7068
0
        }
7069
        // decode partition_id
7070
0
        auto k1 = k;
7071
0
        k1.remove_prefix(1);
7072
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7073
0
        decode_key(&k1, &out);
7074
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
7075
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
7076
        // Change state to RECYCLING
7077
0
        std::unique_ptr<Transaction> txn;
7078
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7079
0
        if (err != TxnErrorCode::TXN_OK) {
7080
0
            return 0;
7081
0
        }
7082
0
        std::string val;
7083
0
        err = txn->get(k, &val);
7084
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7085
0
            return 0;
7086
0
        }
7087
0
        if (err != TxnErrorCode::TXN_OK) {
7088
0
            return 0;
7089
0
        }
7090
0
        part_pb.Clear();
7091
0
        if (!part_pb.ParseFromString(val)) {
7092
0
            return 0;
7093
0
        }
7094
        // Partitions with PREPARED state MUST have no data
7095
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
7096
0
        int ret = 0;
7097
0
        for (int64_t index_id : part_pb.index_id()) {
7098
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
7099
0
                                            partition_id, is_empty_tablet) != 0) {
7100
0
                ret = 0;
7101
0
            }
7102
0
        }
7103
0
        metrics_context.total_need_recycle_num++;
7104
0
        return ret;
7105
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7106
7107
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
7108
0
    metrics_context.report(true);
7109
0
    segment_metrics_context_.report(true);
7110
0
    tablet_metrics_context_.report(true);
7111
0
    return ret;
7112
0
}
7113
7114
// Scan and statistics rowsets that need to be recycled
7115
0
int InstanceRecycler::scan_and_statistics_rowsets() {
7116
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
7117
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
7118
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
7119
0
    std::string recyc_rs_key0;
7120
0
    std::string recyc_rs_key1;
7121
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
7122
0
                recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
7123
0
       int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7124
7125
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
7126
0
        RecycleRowsetPB rowset;
7127
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
7128
0
            return 0;
7129
0
        }
7130
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
7131
0
        int64_t current_time = ::time(nullptr);
7132
0
        if (current_time <
7133
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
7134
0
            return 0;
7135
0
        }
7136
7137
0
        if (!rowset.has_type()) {
7138
0
            if (!rowset.has_resource_id()) [[unlikely]] {
7139
0
                return 0;
7140
0
            }
7141
0
            if (rowset.resource_id().empty()) [[unlikely]] {
7142
0
                return 0;
7143
0
            }
7144
0
            metrics_context.total_need_recycle_num++;
7145
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
7146
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
7147
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
7148
0
            return 0;
7149
0
        }
7150
7151
0
        if(!rowset_meta->has_is_recycled() || !rowset_meta->is_recycled()) {
7152
0
            return 0;
7153
0
        }
7154
7155
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
7156
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
7157
0
                return 0;
7158
0
            }
7159
0
        }
7160
0
        metrics_context.total_need_recycle_num++;
7161
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
7162
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
7163
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
7164
0
        return 0;
7165
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7166
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
7167
0
    metrics_context.report(true);
7168
0
    segment_metrics_context_.report(true);
7169
0
    return ret;
7170
0
}
7171
7172
// Scan and statistics tmp_rowsets that need to be recycled
7173
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
7174
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
7175
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
7176
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
7177
0
    std::string tmp_rs_key0;
7178
0
    std::string tmp_rs_key1;
7179
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
7180
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
7181
7182
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7183
7184
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
7185
0
        doris::RowsetMetaCloudPB rowset;
7186
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
7187
0
            return 0;
7188
0
        }
7189
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
7190
0
        int64_t current_time = ::time(nullptr);
7191
0
        if (current_time < expiration) {
7192
0
            return 0;
7193
0
        }
7194
7195
0
        DCHECK_GT(rowset.txn_id(), 0)
7196
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
7197
7198
0
        if(!rowset.has_is_recycled() || !rowset.is_recycled()) {
7199
0
            return 0;
7200
0
        }
7201
7202
0
        if (!rowset.has_resource_id()) {
7203
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
7204
0
                return 0;
7205
0
            }
7206
0
            return 0;
7207
0
        }
7208
7209
0
        metrics_context.total_need_recycle_num++;
7210
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
7211
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
7212
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
7213
0
        return 0;
7214
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7215
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
7216
0
    metrics_context.report(true);
7217
0
    segment_metrics_context_.report(true);
7218
0
    return ret;
7219
0
}
7220
7221
// Scan and statistics abort_timeout_txn that need to be recycled
7222
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
7223
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
7224
7225
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
7226
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7227
0
    std::string begin_txn_running_key;
7228
0
    std::string end_txn_running_key;
7229
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
7230
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
7231
7232
0
    int64_t current_time =
7233
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7234
7235
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
7236
0
                                               std::string_view k, std::string_view v) -> int {
7237
0
        std::unique_ptr<Transaction> txn;
7238
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7239
0
        if (err != TxnErrorCode::TXN_OK) {
7240
0
            return 0;
7241
0
        }
7242
0
        std::string_view k1 = k;
7243
0
        k1.remove_prefix(1);
7244
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7245
0
        if (decode_key(&k1, &out) != 0) {
7246
0
            return 0;
7247
0
        }
7248
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
7249
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
7250
        // Update txn_info
7251
0
        std::string txn_inf_key, txn_inf_val;
7252
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
7253
0
        err = txn->get(txn_inf_key, &txn_inf_val);
7254
0
        if (err != TxnErrorCode::TXN_OK) {
7255
0
            return 0;
7256
0
        }
7257
0
        TxnInfoPB txn_info;
7258
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
7259
0
            return 0;
7260
0
        }
7261
7262
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
7263
0
            TxnRunningPB txn_running_pb;
7264
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
7265
0
                return 0;
7266
0
            }
7267
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
7268
0
                return 0;
7269
0
            }
7270
0
            metrics_context.total_need_recycle_num++;
7271
0
        }
7272
0
        return 0;
7273
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7274
7275
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
7276
0
    metrics_context.report(true);
7277
0
    return ret;
7278
0
}
7279
7280
// Scan and statistics expired_txn_label that need to be recycled
7281
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
7282
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
7283
7284
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
7285
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7286
0
    std::string begin_recycle_txn_key;
7287
0
    std::string end_recycle_txn_key;
7288
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
7289
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
7290
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7291
0
    int64_t current_time_ms =
7292
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7293
7294
    // for calculate the total num or bytes of recyled objects
7295
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
7296
0
        RecycleTxnPB recycle_txn_pb;
7297
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
7298
0
            return 0;
7299
0
        }
7300
0
        if ((config::force_immediate_recycle) ||
7301
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
7302
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
7303
0
             current_time_ms)) {
7304
0
            metrics_context.total_need_recycle_num++;
7305
0
        }
7306
0
        return 0;
7307
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7308
7309
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
7310
0
    metrics_context.report(true);
7311
0
    return ret;
7312
0
}
7313
7314
// Scan and statistics copy_jobs that need to be recycled
7315
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
7316
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
7317
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
7318
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
7319
0
    std::string key0;
7320
0
    std::string key1;
7321
0
    copy_job_key(key_info0, &key0);
7322
0
    copy_job_key(key_info1, &key1);
7323
7324
    // for calculate the total num or bytes of recyled objects
7325
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
7326
0
        CopyJobPB copy_job;
7327
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
7328
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
7329
0
            return 0;
7330
0
        }
7331
7332
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
7333
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
7334
0
                int64_t current_time =
7335
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7336
0
                if (copy_job.finish_time_ms() > 0) {
7337
0
                    if (!config::force_immediate_recycle &&
7338
0
                        current_time < copy_job.finish_time_ms() +
7339
0
                                               config::copy_job_max_retention_second * 1000) {
7340
0
                        return 0;
7341
0
                    }
7342
0
                } else {
7343
0
                    if (!config::force_immediate_recycle &&
7344
0
                        current_time < copy_job.start_time_ms() +
7345
0
                                               config::copy_job_max_retention_second * 1000) {
7346
0
                        return 0;
7347
0
                    }
7348
0
                }
7349
0
            }
7350
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
7351
0
            int64_t current_time =
7352
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7353
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
7354
0
                return 0;
7355
0
            }
7356
0
        }
7357
0
        metrics_context.total_need_recycle_num++;
7358
0
        return 0;
7359
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7360
7361
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7362
0
    metrics_context.report(true);
7363
0
    return ret;
7364
0
}
7365
7366
// Scan and statistics stage that need to be recycled
7367
0
int InstanceRecycler::scan_and_statistics_stage() {
7368
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
7369
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
7370
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
7371
0
    std::string key0 = recycle_stage_key(key_info0);
7372
0
    std::string key1 = recycle_stage_key(key_info1);
7373
7374
    // for calculate the total num or bytes of recyled objects
7375
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
7376
0
                                                        std::string_view v) -> int {
7377
0
        RecycleStagePB recycle_stage;
7378
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
7379
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
7380
0
            return 0;
7381
0
        }
7382
7383
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
7384
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7385
0
            LOG(WARNING) << "invalid idx: " << idx;
7386
0
            return 0;
7387
0
        }
7388
7389
0
        std::shared_ptr<StorageVaultAccessor> accessor;
7390
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
7391
0
                [&] {
7392
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
7393
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7394
0
                    if (!s3_conf) {
7395
0
                        return 0;
7396
0
                    }
7397
7398
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
7399
0
                    std::shared_ptr<S3Accessor> s3_accessor;
7400
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
7401
0
                    if (ret != 0) {
7402
0
                        return 0;
7403
0
                    }
7404
7405
0
                    accessor = std::move(s3_accessor);
7406
0
                    return 0;
7407
0
                }(),
7408
0
                "recycle_stage:get_accessor", &accessor);
7409
7410
0
        if (ret != 0) {
7411
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
7412
0
            return 0;
7413
0
        }
7414
7415
0
        metrics_context.total_need_recycle_num++;
7416
0
        return 0;
7417
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7418
7419
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7420
0
    metrics_context.report(true);
7421
0
    return ret;
7422
0
}
7423
7424
// Scan and statistics expired_stage_objects that need to be recycled
7425
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
7426
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
7427
7428
    // for calculate the total num or bytes of recyled objects
7429
0
    auto scan_and_statistics = [&metrics_context, this]() {
7430
0
        for (const auto& stage : instance_info_.stages()) {
7431
0
            if (stopped()) {
7432
0
                break;
7433
0
            }
7434
0
            if (stage.type() == StagePB::EXTERNAL) {
7435
0
                continue;
7436
0
            }
7437
0
            int idx = stoi(stage.obj_info().id());
7438
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
7439
0
                continue;
7440
0
            }
7441
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
7442
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7443
0
            if (!s3_conf) {
7444
0
                continue;
7445
0
            }
7446
0
            s3_conf->prefix = stage.obj_info().prefix();
7447
0
            std::shared_ptr<S3Accessor> accessor;
7448
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
7449
0
            if (ret1 != 0) {
7450
0
                continue;
7451
0
            }
7452
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
7453
0
                continue;
7454
0
            }
7455
0
            metrics_context.total_need_recycle_num++;
7456
0
        }
7457
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
7458
7459
0
    scan_and_statistics();
7460
0
    metrics_context.report(true);
7461
0
    return 0;
7462
0
}
7463
7464
// Scan and statistics versions that need to be recycled
7465
0
int InstanceRecycler::scan_and_statistics_versions() {
7466
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
7467
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
7468
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
7469
7470
0
    int64_t last_scanned_table_id = 0;
7471
0
    bool is_recycled = false; // Is last scanned kv recycled
7472
    // for calculate the total num or bytes of recyled objects
7473
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
7474
0
                                       std::string_view k, std::string_view) {
7475
0
        auto k1 = k;
7476
0
        k1.remove_prefix(1);
7477
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
7478
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7479
0
        decode_key(&k1, &out);
7480
0
        DCHECK_EQ(out.size(), 6) << k;
7481
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
7482
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
7483
0
            metrics_context.total_need_recycle_num +=
7484
0
                    is_recycled; // Version kv of this table has been recycled
7485
0
            return 0;
7486
0
        }
7487
0
        last_scanned_table_id = table_id;
7488
0
        is_recycled = false;
7489
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
7490
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
7491
0
        std::unique_ptr<Transaction> txn;
7492
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7493
0
        if (err != TxnErrorCode::TXN_OK) {
7494
0
            return 0;
7495
0
        }
7496
0
        std::unique_ptr<RangeGetIterator> iter;
7497
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
7498
0
        if (err != TxnErrorCode::TXN_OK) {
7499
0
            return 0;
7500
0
        }
7501
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
7502
0
            return 0;
7503
0
        }
7504
0
        metrics_context.total_need_recycle_num++;
7505
0
        is_recycled = true;
7506
0
        return 0;
7507
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7508
7509
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
7510
0
    metrics_context.report(true);
7511
0
    return ret;
7512
0
}
7513
7514
// Scan and statistics restore jobs that need to be recycled
7515
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
7516
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
7517
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
7518
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
7519
0
    std::string restore_job_key0;
7520
0
    std::string restore_job_key1;
7521
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
7522
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
7523
7524
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7525
7526
    // for calculate the total num or bytes of recyled objects
7527
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
7528
0
        RestoreJobCloudPB restore_job_pb;
7529
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
7530
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
7531
0
            return 0;
7532
0
        }
7533
0
        int64_t expiration =
7534
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
7535
0
        int64_t current_time = ::time(nullptr);
7536
0
        if (current_time < expiration) { // not expired
7537
0
            return 0;
7538
0
        }
7539
0
        metrics_context.total_need_recycle_num++;
7540
0
        if(restore_job_pb.need_recycle_data()) {
7541
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
7542
0
        }
7543
0
        return 0;
7544
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7545
7546
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
7547
0
    metrics_context.report(true);
7548
0
    return ret;
7549
0
}
7550
7551
3
void InstanceRecycler::scan_and_statistics_operation_logs() {
7552
3
    if (!should_recycle_versioned_keys()) {
7553
0
        return;
7554
0
    }
7555
7556
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_operation_logs");
7557
7558
3
    OperationLogRecycleChecker recycle_checker(instance_id_, txn_kv_.get(), instance_info_);
7559
3
    if (recycle_checker.init() != 0) {
7560
0
        return;
7561
0
    }
7562
7563
3
    std::string log_key_prefix = versioned::log_key(instance_id_);
7564
3
    std::string begin_key = encode_versioned_key(log_key_prefix, Versionstamp::min());
7565
3
    std::string end_key = encode_versioned_key(log_key_prefix, Versionstamp::max());
7566
7567
3
    std::unique_ptr<BlobIterator> iter = blob_get_range(txn_kv_, begin_key, end_key);
7568
8
    for (; iter->valid(); iter->next()) {
7569
5
        OperationLogPB operation_log;
7570
5
        if (!iter->parse_value(&operation_log)) {
7571
0
            continue;
7572
0
        }
7573
7574
5
        std::string_view key = iter->key();
7575
5
        Versionstamp log_versionstamp;
7576
5
        if (!decode_versioned_key(&key, &log_versionstamp)) {
7577
0
            continue;
7578
0
        }
7579
7580
5
        OperationLogReferenceInfo ref_info;
7581
5
        if (recycle_checker.can_recycle(log_versionstamp, operation_log.min_timestamp(),
7582
5
                                         &ref_info)) {
7583
4
            metrics_context.total_need_recycle_num++;
7584
4
            metrics_context.total_need_recycle_data_size += operation_log.ByteSizeLong();
7585
4
        }
7586
5
    }
7587
7588
3
    metrics_context.report(true);
7589
3
}
7590
7591
int InstanceRecycler::classify_rowset_task_by_ref_count(
7592
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
7593
60
    constexpr int MAX_RETRY = 10;
7594
60
    const auto& rowset_meta = task.rowset_meta;
7595
60
    int64_t tablet_id = rowset_meta.tablet_id();
7596
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
7597
60
    std::string_view reference_instance_id = instance_id_;
7598
60
    if (rowset_meta.has_reference_instance_id()) {
7599
5
        reference_instance_id = rowset_meta.reference_instance_id();
7600
5
    }
7601
7602
61
    for (int i = 0; i < MAX_RETRY; ++i) {
7603
61
        std::unique_ptr<Transaction> txn;
7604
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7605
61
        if (err != TxnErrorCode::TXN_OK) {
7606
0
            LOG_WARNING("failed to create txn when classifying rowset task")
7607
0
                    .tag("instance_id", instance_id_)
7608
0
                    .tag("tablet_id", tablet_id)
7609
0
                    .tag("rowset_id", rowset_id)
7610
0
                    .tag("err", err);
7611
0
            return -1;
7612
0
        }
7613
7614
61
        std::string rowset_ref_count_key =
7615
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
7616
61
        task.rowset_ref_count_key = rowset_ref_count_key;
7617
7618
61
        int64_t ref_count = 0;
7619
61
        {
7620
61
            std::string value;
7621
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
7622
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7623
0
                ref_count = 1;
7624
61
            } else if (err != TxnErrorCode::TXN_OK) {
7625
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
7626
0
                        .tag("instance_id", instance_id_)
7627
0
                        .tag("tablet_id", tablet_id)
7628
0
                        .tag("rowset_id", rowset_id)
7629
0
                        .tag("err", err);
7630
0
                return -1;
7631
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
7632
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
7633
0
                        .tag("instance_id", instance_id_)
7634
0
                        .tag("tablet_id", tablet_id)
7635
0
                        .tag("rowset_id", rowset_id)
7636
0
                        .tag("value", hex(value));
7637
0
                return -1;
7638
0
            }
7639
61
        }
7640
7641
61
        if (ref_count > 1) {
7642
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
7643
12
            txn->atomic_add(rowset_ref_count_key, -1);
7644
12
            LOG_INFO("decrease rowset data ref count in classification phase")
7645
12
                    .tag("instance_id", instance_id_)
7646
12
                    .tag("tablet_id", tablet_id)
7647
12
                    .tag("rowset_id", rowset_id)
7648
12
                    .tag("ref_count", ref_count - 1)
7649
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
7650
7651
12
            if (!task.recycle_rowset_key.empty()) {
7652
0
                txn->remove(task.recycle_rowset_key);
7653
0
                LOG_INFO("remove recycle rowset key in classification phase")
7654
0
                        .tag("key", hex(task.recycle_rowset_key));
7655
0
            }
7656
12
            if (!task.non_versioned_rowset_key.empty()) {
7657
12
                txn->remove(task.non_versioned_rowset_key);
7658
12
                LOG_INFO("remove non versioned rowset key in classification phase")
7659
12
                        .tag("key", hex(task.non_versioned_rowset_key));
7660
12
            }
7661
7662
12
            err = txn->commit();
7663
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
7664
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
7665
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
7666
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
7667
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
7668
1
                continue;
7669
11
            } else if (err != TxnErrorCode::TXN_OK) {
7670
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
7671
0
                        .tag("instance_id", instance_id_)
7672
0
                        .tag("tablet_id", tablet_id)
7673
0
                        .tag("rowset_id", rowset_id)
7674
0
                        .tag("err", err);
7675
0
                return -1;
7676
0
            }
7677
11
            return 1; // handled, not added to batch delete
7678
49
        } else {
7679
            // ref_count == 1: Add to batch delete plan without modifying any KV.
7680
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
7681
49
            LOG_INFO("add rowset to batch delete plan")
7682
49
                    .tag("instance_id", instance_id_)
7683
49
                    .tag("tablet_id", tablet_id)
7684
49
                    .tag("rowset_id", rowset_id)
7685
49
                    .tag("resource_id", rowset_meta.resource_id())
7686
49
                    .tag("ref_count", ref_count);
7687
7688
49
            batch_delete_tasks.push_back(std::move(task));
7689
49
            return 0; // added to batch delete
7690
49
        }
7691
61
    }
7692
7693
0
    LOG_WARNING("failed to classify rowset task after retry")
7694
0
            .tag("instance_id", instance_id_)
7695
0
            .tag("tablet_id", tablet_id)
7696
0
            .tag("rowset_id", rowset_id)
7697
0
            .tag("retry", MAX_RETRY);
7698
0
    return -1;
7699
60
}
7700
7701
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
7702
10
    int ret = 0;
7703
49
    for (const auto& task : tasks) {
7704
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
7705
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
7706
7707
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
7708
        // so we don't need to call it again here.
7709
7710
        // Remove all metadata keys in one transaction
7711
49
        std::unique_ptr<Transaction> txn;
7712
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7713
49
        if (err != TxnErrorCode::TXN_OK) {
7714
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
7715
0
                    .tag("instance_id", instance_id_)
7716
0
                    .tag("tablet_id", tablet_id)
7717
0
                    .tag("rowset_id", rowset_id)
7718
0
                    .tag("err", err);
7719
0
            ret = -1;
7720
0
            continue;
7721
0
        }
7722
7723
49
        std::string_view reference_instance_id = instance_id_;
7724
49
        if (task.rowset_meta.has_reference_instance_id()) {
7725
0
            reference_instance_id = task.rowset_meta.reference_instance_id();
7726
0
        }
7727
7728
49
        txn->remove(task.rowset_ref_count_key);
7729
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
7730
49
                .tag("instance_id", instance_id_)
7731
49
                .tag("tablet_id", tablet_id)
7732
49
                .tag("rowset_id", rowset_id)
7733
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
7734
7735
49
        std::string dbm_start_key =
7736
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
7737
49
        std::string dbm_end_key = meta_delete_bitmap_key(
7738
49
                {reference_instance_id, tablet_id, rowset_id,
7739
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
7740
49
        txn->remove(dbm_start_key, dbm_end_key);
7741
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
7742
49
                .tag("instance_id", instance_id_)
7743
49
                .tag("tablet_id", tablet_id)
7744
49
                .tag("rowset_id", rowset_id)
7745
49
                .tag("begin", hex(dbm_start_key))
7746
49
                .tag("end", hex(dbm_end_key));
7747
7748
49
        std::string versioned_dbm_start_key =
7749
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
7750
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
7751
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
7752
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
7753
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
7754
49
                .tag("instance_id", instance_id_)
7755
49
                .tag("tablet_id", tablet_id)
7756
49
                .tag("rowset_id", rowset_id)
7757
49
                .tag("begin", hex(versioned_dbm_start_key))
7758
49
                .tag("end", hex(versioned_dbm_end_key));
7759
7760
        // Remove versioned meta rowset key
7761
49
        if (!task.versioned_rowset_key.empty()) {
7762
49
            versioned::document_remove<RowsetMetaCloudPB>(
7763
49
                txn.get(), task.versioned_rowset_key, task.versionstamp);
7764
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
7765
49
                    .tag("instance_id", instance_id_)
7766
49
                    .tag("tablet_id", tablet_id)
7767
49
                    .tag("rowset_id", rowset_id)
7768
49
                    .tag("key_prefix", hex(task.versioned_rowset_key));
7769
49
        }
7770
7771
49
        if (!task.non_versioned_rowset_key.empty()) {
7772
49
            txn->remove(task.non_versioned_rowset_key);
7773
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
7774
49
                    .tag("instance_id", instance_id_)
7775
49
                    .tag("tablet_id", tablet_id)
7776
49
                    .tag("rowset_id", rowset_id)
7777
49
                    .tag("key", hex(task.non_versioned_rowset_key));
7778
49
        }
7779
7780
        // Remove recycle_rowset_key last to ensure retry safety:
7781
        // if cleanup fails, this key remains and triggers next round retry.
7782
49
        if (!task.recycle_rowset_key.empty()) {
7783
0
            txn->remove(task.recycle_rowset_key);
7784
0
            LOG_INFO("remove recycle rowset key in cleanup phase")
7785
0
                    .tag("instance_id", instance_id_)
7786
0
                    .tag("tablet_id", tablet_id)
7787
0
                    .tag("rowset_id", rowset_id)
7788
0
                    .tag("key", hex(task.recycle_rowset_key));
7789
0
        }
7790
7791
49
        err = txn->commit();
7792
49
        if (err != TxnErrorCode::TXN_OK) {
7793
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
7794
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
7795
0
                    .tag("instance_id", instance_id_)
7796
0
                    .tag("tablet_id", tablet_id)
7797
0
                    .tag("rowset_id", rowset_id)
7798
0
                    .tag("err", err);
7799
0
            ret = -1;
7800
0
            continue;
7801
0
        }
7802
7803
49
        LOG_INFO("cleanup rowset metadata success")
7804
49
                .tag("instance_id", instance_id_)
7805
49
                .tag("tablet_id", tablet_id)
7806
49
                .tag("rowset_id", rowset_id);
7807
49
    }
7808
10
    return ret;
7809
10
}
7810
7811
} // namespace doris::cloud