Coverage Report

Created: 2026-05-28 18:28

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <optional>
40
#include <random>
41
#include <string>
42
#include <string_view>
43
#include <thread>
44
#include <unordered_map>
45
#include <utility>
46
#include <variant>
47
48
#include "common/defer.h"
49
#include "common/stopwatch.h"
50
#include "meta-service/meta_service.h"
51
#include "meta-service/meta_service_helper.h"
52
#include "meta-service/meta_service_schema.h"
53
#include "meta-store/blob_message.h"
54
#include "meta-store/meta_reader.h"
55
#include "meta-store/txn_kv.h"
56
#include "meta-store/txn_kv_error.h"
57
#include "meta-store/versioned_value.h"
58
#include "recycler/checker.h"
59
#ifdef ENABLE_HDFS_STORAGE_VAULT
60
#include "recycler/hdfs_accessor.h"
61
#endif
62
#include "recycler/s3_accessor.h"
63
#include "recycler/storage_vault_accessor.h"
64
#ifdef UNIT_TEST
65
#include "../test/mock_accessor.h"
66
#endif
67
#include "common/bvars.h"
68
#include "common/config.h"
69
#include "common/encryption_util.h"
70
#include "common/logging.h"
71
#include "common/simple_thread_pool.h"
72
#include "common/util.h"
73
#include "cpp/sync_point.h"
74
#include "meta-store/codec.h"
75
#include "meta-store/document_message.h"
76
#include "meta-store/keys.h"
77
#include "recycler/recycler_service.h"
78
#include "recycler/sync_executor.h"
79
#include "recycler/util.h"
80
#include "snapshot/snapshot_manager_factory.h"
81
82
namespace doris::cloud {
83
84
using namespace std::chrono;
85
86
namespace {
87
88
0
int64_t packed_file_retry_sleep_ms() {
89
0
    const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms);
90
0
    const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms);
91
0
    thread_local std::mt19937_64 gen(std::random_device {}());
92
0
    std::uniform_int_distribution<int64_t> dist(min_ms, max_ms);
93
0
    return dist(gen);
94
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
95
96
0
void sleep_for_packed_file_retry() {
97
0
    std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms()));
98
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
99
100
37
bool filter_out_instance(const std::string& instance_id) {
101
37
    if (config::recycle_whitelist.empty()) {
102
35
        return std::ranges::find(config::recycle_blacklist, instance_id) !=
103
35
               config::recycle_blacklist.end();
104
35
    }
105
2
    return std::ranges::find(config::recycle_whitelist, instance_id) ==
106
2
           config::recycle_whitelist.end();
107
37
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_119filter_out_instanceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_119filter_out_instanceERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
100
37
bool filter_out_instance(const std::string& instance_id) {
101
37
    if (config::recycle_whitelist.empty()) {
102
35
        return std::ranges::find(config::recycle_blacklist, instance_id) !=
103
35
               config::recycle_blacklist.end();
104
35
    }
105
2
    return std::ranges::find(config::recycle_whitelist, instance_id) ==
106
2
           config::recycle_whitelist.end();
107
37
}
108
109
} // namespace
110
111
// return 0 for success get a key, 1 for key not found, negative for error
112
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
113
0
    std::unique_ptr<Transaction> txn;
114
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
115
0
    if (err != TxnErrorCode::TXN_OK) {
116
0
        return -1;
117
0
    }
118
0
    switch (txn->get(key, &val, true)) {
119
0
    case TxnErrorCode::TXN_OK:
120
0
        return 0;
121
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
122
0
        return 1;
123
0
    default:
124
0
        return -1;
125
0
    };
126
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
127
128
// 0 for success, negative for error
129
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
130
337
                   std::unique_ptr<RangeGetIterator>& it) {
131
337
    std::unique_ptr<Transaction> txn;
132
337
    TxnErrorCode err = txn_kv->create_txn(&txn);
133
337
    if (err != TxnErrorCode::TXN_OK) {
134
0
        return -1;
135
0
    }
136
337
    switch (txn->get(begin, end, &it, true)) {
137
337
    case TxnErrorCode::TXN_OK:
138
337
        return 0;
139
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
140
0
        return 1;
141
0
    default:
142
0
        return -1;
143
337
    };
144
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
130
31
                   std::unique_ptr<RangeGetIterator>& it) {
131
31
    std::unique_ptr<Transaction> txn;
132
31
    TxnErrorCode err = txn_kv->create_txn(&txn);
133
31
    if (err != TxnErrorCode::TXN_OK) {
134
0
        return -1;
135
0
    }
136
31
    switch (txn->get(begin, end, &it, true)) {
137
31
    case TxnErrorCode::TXN_OK:
138
31
        return 0;
139
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
140
0
        return 1;
141
0
    default:
142
0
        return -1;
143
31
    };
144
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
130
306
                   std::unique_ptr<RangeGetIterator>& it) {
131
306
    std::unique_ptr<Transaction> txn;
132
306
    TxnErrorCode err = txn_kv->create_txn(&txn);
133
306
    if (err != TxnErrorCode::TXN_OK) {
134
0
        return -1;
135
0
    }
136
306
    switch (txn->get(begin, end, &it, true)) {
137
306
    case TxnErrorCode::TXN_OK:
138
306
        return 0;
139
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
140
0
        return 1;
141
0
    default:
142
0
        return -1;
143
306
    };
144
0
}
145
146
// return 0 for success otherwise error
147
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
148
6
    std::unique_ptr<Transaction> txn;
149
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
150
6
    if (err != TxnErrorCode::TXN_OK) {
151
0
        return -1;
152
0
    }
153
10
    for (auto k : keys) {
154
10
        txn->remove(k);
155
10
    }
156
6
    switch (txn->commit()) {
157
6
    case TxnErrorCode::TXN_OK:
158
6
        return 0;
159
0
    case TxnErrorCode::TXN_CONFLICT:
160
0
        return -1;
161
0
    default:
162
0
        return -1;
163
6
    }
164
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
147
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
148
1
    std::unique_ptr<Transaction> txn;
149
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
150
1
    if (err != TxnErrorCode::TXN_OK) {
151
0
        return -1;
152
0
    }
153
1
    for (auto k : keys) {
154
1
        txn->remove(k);
155
1
    }
156
1
    switch (txn->commit()) {
157
1
    case TxnErrorCode::TXN_OK:
158
1
        return 0;
159
0
    case TxnErrorCode::TXN_CONFLICT:
160
0
        return -1;
161
0
    default:
162
0
        return -1;
163
1
    }
164
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
147
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
148
5
    std::unique_ptr<Transaction> txn;
149
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
150
5
    if (err != TxnErrorCode::TXN_OK) {
151
0
        return -1;
152
0
    }
153
9
    for (auto k : keys) {
154
9
        txn->remove(k);
155
9
    }
156
5
    switch (txn->commit()) {
157
5
    case TxnErrorCode::TXN_OK:
158
5
        return 0;
159
0
    case TxnErrorCode::TXN_CONFLICT:
160
0
        return -1;
161
0
    default:
162
0
        return -1;
163
5
    }
164
5
}
165
166
// return 0 for success otherwise error
167
139
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
168
139
    std::unique_ptr<Transaction> txn;
169
139
    TxnErrorCode err = txn_kv->create_txn(&txn);
170
139
    if (err != TxnErrorCode::TXN_OK) {
171
0
        return -1;
172
0
    }
173
106k
    for (auto& k : keys) {
174
106k
        txn->remove(k);
175
106k
    }
176
139
    switch (txn->commit()) {
177
139
    case TxnErrorCode::TXN_OK:
178
139
        return 0;
179
0
    case TxnErrorCode::TXN_CONFLICT:
180
0
        return -1;
181
0
    default:
182
0
        return -1;
183
139
    }
184
139
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
167
33
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
168
33
    std::unique_ptr<Transaction> txn;
169
33
    TxnErrorCode err = txn_kv->create_txn(&txn);
170
33
    if (err != TxnErrorCode::TXN_OK) {
171
0
        return -1;
172
0
    }
173
33
    for (auto& k : keys) {
174
16
        txn->remove(k);
175
16
    }
176
33
    switch (txn->commit()) {
177
33
    case TxnErrorCode::TXN_OK:
178
33
        return 0;
179
0
    case TxnErrorCode::TXN_CONFLICT:
180
0
        return -1;
181
0
    default:
182
0
        return -1;
183
33
    }
184
33
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
167
106
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
168
106
    std::unique_ptr<Transaction> txn;
169
106
    TxnErrorCode err = txn_kv->create_txn(&txn);
170
106
    if (err != TxnErrorCode::TXN_OK) {
171
0
        return -1;
172
0
    }
173
106k
    for (auto& k : keys) {
174
106k
        txn->remove(k);
175
106k
    }
176
106
    switch (txn->commit()) {
177
106
    case TxnErrorCode::TXN_OK:
178
106
        return 0;
179
0
    case TxnErrorCode::TXN_CONFLICT:
180
0
        return -1;
181
0
    default:
182
0
        return -1;
183
106
    }
184
106
}
185
186
// return 0 for success otherwise error
187
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
188
106k
                                       std::string_view end) {
189
106k
    std::unique_ptr<Transaction> txn;
190
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
191
106k
    if (err != TxnErrorCode::TXN_OK) {
192
0
        return -1;
193
0
    }
194
106k
    txn->remove(begin, end);
195
106k
    switch (txn->commit()) {
196
106k
    case TxnErrorCode::TXN_OK:
197
106k
        return 0;
198
0
    case TxnErrorCode::TXN_CONFLICT:
199
0
        return -1;
200
0
    default:
201
0
        return -1;
202
106k
    }
203
106k
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
188
16
                                       std::string_view end) {
189
16
    std::unique_ptr<Transaction> txn;
190
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
191
16
    if (err != TxnErrorCode::TXN_OK) {
192
0
        return -1;
193
0
    }
194
16
    txn->remove(begin, end);
195
16
    switch (txn->commit()) {
196
16
    case TxnErrorCode::TXN_OK:
197
16
        return 0;
198
0
    case TxnErrorCode::TXN_CONFLICT:
199
0
        return -1;
200
0
    default:
201
0
        return -1;
202
16
    }
203
16
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
188
106k
                                       std::string_view end) {
189
106k
    std::unique_ptr<Transaction> txn;
190
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
191
106k
    if (err != TxnErrorCode::TXN_OK) {
192
0
        return -1;
193
0
    }
194
106k
    txn->remove(begin, end);
195
106k
    switch (txn->commit()) {
196
106k
    case TxnErrorCode::TXN_OK:
197
106k
        return 0;
198
0
    case TxnErrorCode::TXN_CONFLICT:
199
0
        return -1;
200
0
    default:
201
0
        return -1;
202
106k
    }
203
106k
}
204
205
void scan_restore_job_rowset(
206
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
207
        std::string& msg,
208
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
209
210
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
211
                                      int64_t num_scanned, int64_t num_recycled,
212
47
                                      int64_t start_time) {
213
47
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
214
0
        int64_t cost =
215
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
216
0
        if (cost > config::recycle_task_threshold_seconds) {
217
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
218
0
                    .tag("instance_id", instance_id)
219
0
                    .tag("task", task_name)
220
0
                    .tag("num_scanned", num_scanned)
221
0
                    .tag("num_recycled", num_recycled);
222
0
        }
223
0
    }
224
47
    return;
225
47
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
212
2
                                      int64_t start_time) {
213
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
214
0
        int64_t cost =
215
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
216
0
        if (cost > config::recycle_task_threshold_seconds) {
217
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
218
0
                    .tag("instance_id", instance_id)
219
0
                    .tag("task", task_name)
220
0
                    .tag("num_scanned", num_scanned)
221
0
                    .tag("num_recycled", num_recycled);
222
0
        }
223
0
    }
224
2
    return;
225
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
212
45
                                      int64_t start_time) {
213
45
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
214
0
        int64_t cost =
215
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
216
0
        if (cost > config::recycle_task_threshold_seconds) {
217
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
218
0
                    .tag("instance_id", instance_id)
219
0
                    .tag("task", task_name)
220
0
                    .tag("num_scanned", num_scanned)
221
0
                    .tag("num_recycled", num_recycled);
222
0
        }
223
0
    }
224
45
    return;
225
45
}
226
227
6
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
228
6
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
229
230
6
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
231
6
                                                               "s3_producer_pool");
232
6
    s3_producer_pool->start();
233
6
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
234
6
                                                                  "recycle_tablet_pool");
235
6
    recycle_tablet_pool->start();
236
6
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
237
6
            config::recycle_pool_parallelism, "group_recycle_function_pool");
238
6
    group_recycle_function_pool->start();
239
6
    _thread_pool_group =
240
6
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
241
6
                                    std::move(group_recycle_function_pool));
242
243
6
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
244
6
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
245
6
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
246
6
}
247
248
6
Recycler::~Recycler() {
249
6
    if (!stopped()) {
250
0
        stop();
251
0
    }
252
6
}
253
254
5
void Recycler::instance_scanner_callback() {
255
    // sleep 60 seconds before scheduling for the launch procedure to complete:
256
    // some bad hdfs connection may cause some log to stdout stderr
257
    // which may pollute .out file and affect the script to check success
258
5
    std::this_thread::sleep_for(
259
5
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
260
1.36k
    while (!stopped()) {
261
1.36k
        if (config::enable_recycler) {
262
3
            std::vector<InstanceInfoPB> instances;
263
3
            get_all_instances(txn_kv_.get(), instances);
264
            // TODO(plat1ko): delete job recycle kv of non-existent instances
265
3
            LOG(INFO) << "Recycler get instances: " << [&instances] {
266
3
                std::stringstream ss;
267
30
                for (auto& i : instances) ss << ' ' << i.instance_id();
268
3
                return ss.str();
269
3
            }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
265
3
            LOG(INFO) << "Recycler get instances: " << [&instances] {
266
3
                std::stringstream ss;
267
30
                for (auto& i : instances) ss << ' ' << i.instance_id();
268
3
                return ss.str();
269
3
            }();
270
3
            if (!instances.empty()) {
271
                // enqueue instances
272
3
                std::lock_guard lock(mtx_);
273
30
                for (auto& instance : instances) {
274
30
                    if (filter_out_instance(instance.instance_id())) continue;
275
30
                    auto [_, success] = pending_instance_set_.insert(instance.instance_id());
276
                    // skip instance already in pending queue
277
30
                    if (success) {
278
30
                        pending_instance_queue_.push_back(std::move(instance));
279
30
                    }
280
30
                }
281
3
                pending_instance_cond_.notify_all();
282
3
            }
283
1.35k
        } else {
284
1.35k
            LOG(WARNING) << "Skip recycler since enable_recycler is false";
285
1.35k
        }
286
1.36k
        {
287
1.36k
            std::unique_lock lock(mtx_);
288
1.36k
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
289
2.72k
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
289
2.72k
                               [&]() { return stopped(); });
290
1.36k
        }
291
1.36k
    }
292
5
}
293
294
9
void Recycler::recycle_callback() {
295
40
    while (!stopped()) {
296
37
        InstanceInfoPB instance;
297
37
        {
298
37
            std::unique_lock lock(mtx_);
299
37
            pending_instance_cond_.wait(
300
49
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
300
49
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
301
37
            if (stopped()) {
302
6
                return;
303
6
            }
304
31
            instance = std::move(pending_instance_queue_.front());
305
31
            pending_instance_queue_.pop_front();
306
31
            pending_instance_set_.erase(instance.instance_id());
307
31
        }
308
0
        auto& instance_id = instance.instance_id();
309
31
        {
310
31
            std::lock_guard lock(mtx_);
311
            // skip instance in recycling
312
31
            if (recycling_instance_map_.count(instance_id)) continue;
313
31
        }
314
31
        if (!config::enable_recycler) {
315
1
            LOG(WARNING) << "Skip recycle instance_id=" << instance_id
316
1
                         << " since enable_recycler is false";
317
1
            continue;
318
1
        }
319
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
320
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
321
322
30
        if (int r = instance_recycler->init(); r != 0) {
323
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
324
0
                         << " ret=" << r;
325
0
            continue;
326
0
        }
327
30
        std::string recycle_job_key;
328
30
        job_recycle_key({instance_id}, &recycle_job_key);
329
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
330
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
331
30
        if (ret != 0) { // Prepare failed
332
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
333
20
                         << " ret=" << ret;
334
20
            continue;
335
20
        } else {
336
10
            std::lock_guard lock(mtx_);
337
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
338
10
        }
339
10
        if (stopped()) return;
340
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
341
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
342
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
343
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
344
10
        ret = instance_recycler->do_recycle();
345
        // If instance recycler has been aborted, don't finish this job
346
347
10
        if (!instance_recycler->stopped()) {
348
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
349
10
                                        ret == 0, ctime_ms);
350
10
        }
351
10
        if (instance_recycler->stopped() || ret != 0) {
352
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
353
0
        }
354
10
        {
355
10
            std::lock_guard lock(mtx_);
356
10
            recycling_instance_map_.erase(instance_id);
357
10
        }
358
359
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
360
10
        auto elpased_ms = now - ctime_ms;
361
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
362
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
363
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
364
10
                                             now + config::recycle_interval_seconds * 1000);
365
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
366
10
        LOG(INFO) << "recycle instance done, "
367
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
368
10
                  << " now: " << now;
369
370
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
371
372
10
        LOG_WARNING("finish recycle instance")
373
10
                .tag("instance_id", instance_id)
374
10
                .tag("cost_ms", elpased_ms);
375
10
    }
376
9
}
377
378
4
void Recycler::lease_recycle_jobs() {
379
54
    while (!stopped()) {
380
50
        std::vector<std::string> instances;
381
50
        instances.reserve(recycling_instance_map_.size());
382
50
        {
383
50
            std::lock_guard lock(mtx_);
384
50
            for (auto& [id, _] : recycling_instance_map_) {
385
30
                instances.push_back(id);
386
30
            }
387
50
        }
388
50
        for (auto& i : instances) {
389
30
            std::string recycle_job_key;
390
30
            job_recycle_key({i}, &recycle_job_key);
391
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
392
30
            if (ret == 1) {
393
0
                std::lock_guard lock(mtx_);
394
0
                if (auto it = recycling_instance_map_.find(i);
395
0
                    it != recycling_instance_map_.end()) {
396
0
                    it->second->stop();
397
0
                }
398
0
            }
399
30
        }
400
50
        {
401
50
            std::unique_lock lock(mtx_);
402
50
            notifier_.wait_for(lock,
403
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
404
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
404
100
                               [&]() { return stopped(); });
405
50
        }
406
50
    }
407
4
}
408
409
4
void Recycler::check_recycle_tasks() {
410
7
    while (!stopped()) {
411
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
412
3
        {
413
3
            std::lock_guard lock(mtx_);
414
3
            recycling_instance_map = recycling_instance_map_;
415
3
        }
416
3
        for (auto& entry : recycling_instance_map) {
417
0
            entry.second->check_recycle_tasks();
418
0
        }
419
420
3
        std::unique_lock lock(mtx_);
421
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
422
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
422
6
                           [&]() { return stopped(); });
423
3
    }
424
4
}
425
426
4
int Recycler::start(brpc::Server* server) {
427
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
428
4
    S3Environment::getInstance();
429
430
4
    if (config::enable_checker) {
431
0
        checker_ = std::make_unique<Checker>(txn_kv_);
432
0
        int ret = checker_->start();
433
0
        std::string msg;
434
0
        if (ret != 0) {
435
0
            msg = "failed to start checker";
436
0
            LOG(ERROR) << msg;
437
0
            std::cerr << msg << std::endl;
438
0
            return ret;
439
0
        }
440
0
        msg = "checker started";
441
0
        LOG(INFO) << msg;
442
0
        std::cout << msg << std::endl;
443
0
    }
444
445
4
    if (server) {
446
        // Add service
447
1
        auto recycler_service =
448
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
449
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
450
1
    }
451
452
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
452
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
453
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
454
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
454
8
        workers_.emplace_back([this] { recycle_callback(); });
455
8
    }
456
457
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
458
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
459
460
4
    if (config::enable_snapshot_data_migrator) {
461
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
462
0
        int ret = snapshot_data_migrator_->start();
463
0
        if (ret != 0) {
464
0
            LOG(ERROR) << "failed to start snapshot data migrator";
465
0
            return ret;
466
0
        }
467
0
        LOG(INFO) << "snapshot data migrator started";
468
0
    }
469
470
4
    if (config::enable_snapshot_chain_compactor) {
471
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
472
0
        int ret = snapshot_chain_compactor_->start();
473
0
        if (ret != 0) {
474
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
475
0
            return ret;
476
0
        }
477
0
        LOG(INFO) << "snapshot chain compactor started";
478
0
    }
479
480
4
    return 0;
481
4
}
482
483
4
void Recycler::stop() {
484
4
    stopped_ = true;
485
4
    notifier_.notify_all();
486
4
    pending_instance_cond_.notify_all();
487
4
    {
488
4
        std::lock_guard lock(mtx_);
489
4
        for (auto& [_, recycler] : recycling_instance_map_) {
490
0
            recycler->stop();
491
0
        }
492
4
    }
493
20
    for (auto& w : workers_) {
494
20
        if (w.joinable()) w.join();
495
20
    }
496
4
    if (checker_) {
497
0
        checker_->stop();
498
0
    }
499
4
    if (snapshot_data_migrator_) {
500
0
        snapshot_data_migrator_->stop();
501
0
    }
502
4
    if (snapshot_chain_compactor_) {
503
0
        snapshot_chain_compactor_->stop();
504
0
    }
505
4
}
506
507
class InstanceRecycler::InvertedIndexIdCache {
508
public:
509
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
510
132
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
511
512
    // Return 0 if success, 1 if schema kv not found, negative for error
513
    // For the same index_id, schema_version, res, since `get` is not completely atomic
514
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
515
    // resulting in repeated addition and inaccuracy.
516
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
517
    // repeated addition does not affect correctness.
518
28.4k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
519
28.4k
        {
520
28.4k
            std::lock_guard lock(mtx_);
521
28.4k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
522
3.75k
                return 0;
523
3.75k
            }
524
24.6k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
525
24.6k
                it != inverted_index_id_map_.end()) {
526
17.4k
                res = it->second;
527
17.4k
                return 0;
528
17.4k
            }
529
24.6k
        }
530
        // Get schema from kv
531
        // TODO(plat1ko): Single flight
532
7.24k
        std::unique_ptr<Transaction> txn;
533
7.24k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
534
7.24k
        if (err != TxnErrorCode::TXN_OK) {
535
0
            LOG(WARNING) << "failed to create txn, err=" << err;
536
0
            return -1;
537
0
        }
538
7.24k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
539
7.24k
        ValueBuf val_buf;
540
7.24k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
541
7.24k
        if (err != TxnErrorCode::TXN_OK) {
542
504
            LOG(WARNING) << "failed to get schema, err=" << err;
543
504
            return static_cast<int>(err);
544
504
        }
545
6.74k
        doris::TabletSchemaCloudPB schema;
546
6.74k
        if (!parse_schema_value(val_buf, &schema)) {
547
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
548
0
            return -1;
549
0
        }
550
6.74k
        if (schema.index_size() > 0) {
551
4.79k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
552
4.79k
            if (schema.has_inverted_index_storage_format()) {
553
4.79k
                index_format = schema.inverted_index_storage_format();
554
4.79k
            }
555
4.79k
            res.first = index_format;
556
4.79k
            res.second.reserve(schema.index_size());
557
12.3k
            for (auto& i : schema.index()) {
558
12.3k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
559
12.3k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
560
12.3k
                }
561
12.3k
            }
562
4.79k
        }
563
6.74k
        insert(index_id, schema_version, res);
564
6.74k
        return 0;
565
6.74k
    }
566
567
    // Empty `ids` means this schema has no inverted index
568
6.74k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
569
6.74k
        if (index_info.second.empty()) {
570
1.94k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
571
1.94k
            std::lock_guard lock(mtx_);
572
1.94k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
573
4.79k
        } else {
574
4.79k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
575
4.79k
            std::lock_guard lock(mtx_);
576
4.79k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
577
4.79k
        }
578
6.74k
    }
579
580
private:
581
    std::string instance_id_;
582
    std::shared_ptr<TxnKv> txn_kv_;
583
584
    std::mutex mtx_;
585
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
586
    struct HashOfKey {
587
59.8k
        size_t operator()(const Key& key) const {
588
59.8k
            size_t seed = 0;
589
59.8k
            seed = std::hash<int64_t> {}(key.first);
590
59.8k
            seed = std::hash<int32_t> {}(key.second);
591
59.8k
            return seed;
592
59.8k
        }
593
    };
594
    // <index_id, schema_version> -> inverted_index_ids
595
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
596
    // Store <index_id, schema_version> of schema which doesn't have inverted index
597
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
598
};
599
600
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
601
                                   RecyclerThreadPoolGroup thread_pool_group,
602
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
603
        : txn_kv_(std::move(txn_kv)),
604
          instance_id_(instance.instance_id()),
605
          instance_info_(instance),
606
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
607
          _thread_pool_group(std::move(thread_pool_group)),
608
          txn_lazy_committer_(std::move(txn_lazy_committer)),
609
          delete_bitmap_lock_white_list_(std::make_shared<DeleteBitmapLockWhiteList>()),
610
132
          resource_mgr_(std::make_shared<ResourceManager>(txn_kv_)) {
611
132
    delete_bitmap_lock_white_list_->init();
612
132
    resource_mgr_->init();
613
614
132
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
615
616
    // Since the recycler's resource manager could not be notified when instance info changes,
617
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
618
132
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
619
132
};
620
621
132
InstanceRecycler::~InstanceRecycler() = default;
622
623
116
int InstanceRecycler::init_obj_store_accessors() {
624
116
    for (const auto& obj_info : instance_info_.obj_info()) {
625
76
#ifdef UNIT_TEST
626
76
        auto accessor = std::make_shared<MockAccessor>();
627
#else
628
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
629
        if (!s3_conf) {
630
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
631
            return -1;
632
        }
633
634
        std::shared_ptr<S3Accessor> accessor;
635
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
636
        if (ret != 0) {
637
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
638
                         << " resource_id=" << obj_info.id();
639
            return ret;
640
        }
641
#endif
642
76
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
643
76
    }
644
645
116
    return 0;
646
116
}
647
648
116
int InstanceRecycler::init_storage_vault_accessors() {
649
116
    if (instance_info_.resource_ids().empty()) {
650
109
        return 0;
651
109
    }
652
653
7
    FullRangeGetOptions opts(txn_kv_);
654
7
    opts.prefetch = true;
655
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
656
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
657
658
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
659
18
        auto [k, v] = *kv;
660
18
        StorageVaultPB vault;
661
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
662
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
663
0
            return -1;
664
0
        }
665
18
        std::string recycler_storage_vault_white_list = accumulate(
666
18
                config::recycler_storage_vault_white_list.begin(),
667
18
                config::recycler_storage_vault_white_list.end(), std::string(),
668
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
668
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
669
18
        LOG_INFO("config::recycler_storage_vault_white_list")
670
18
                .tag("", recycler_storage_vault_white_list);
671
18
        if (!config::recycler_storage_vault_white_list.empty()) {
672
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
673
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
674
8
                it == config::recycler_storage_vault_white_list.end()) {
675
2
                LOG_WARNING(
676
2
                        "failed to init accessor for vault because this vault is not in "
677
2
                        "config::recycler_storage_vault_white_list. ")
678
2
                        .tag(" vault name:", vault.name())
679
2
                        .tag(" config::recycler_storage_vault_white_list:",
680
2
                             recycler_storage_vault_white_list);
681
2
                continue;
682
2
            }
683
8
        }
684
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
685
16
                                 &accessor_map_, &vault);
686
16
        if (vault.has_hdfs_info()) {
687
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
688
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
689
9
            int ret = accessor->init();
690
9
            if (ret != 0) {
691
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
692
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
693
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
694
4
                continue;
695
4
            }
696
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
697
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
698
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
699
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
700
#else
701
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
702
                       << "but HDFS storage vaults were detected";
703
#endif
704
7
        } else if (vault.has_obj_info()) {
705
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
706
7
            if (!s3_conf) {
707
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
708
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
709
1
                continue;
710
1
            }
711
712
6
            std::shared_ptr<S3Accessor> accessor;
713
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
714
6
            if (ret != 0) {
715
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
716
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
717
0
                             << " ret=" << ret
718
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
719
0
                continue;
720
0
            }
721
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
722
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
723
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
724
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
725
6
        }
726
16
    }
727
728
7
    if (!it->is_valid()) {
729
0
        LOG_WARNING("failed to get storage vault kv");
730
0
        return -1;
731
0
    }
732
733
7
    if (accessor_map_.empty()) {
734
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
735
1
        return -2;
736
1
    }
737
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
738
6
             instance_id_);
739
740
6
    return 0;
741
7
}
742
743
116
int InstanceRecycler::init() {
744
116
    int ret = init_obj_store_accessors();
745
116
    if (ret != 0) {
746
0
        return ret;
747
0
    }
748
749
116
    return init_storage_vault_accessors();
750
116
}
751
752
template <typename... Func>
753
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
120
    return [funcs...]() {
755
120
        return [](std::initializer_list<int> ret_vals) {
756
120
            int i = 0;
757
140
            for (int ret : ret_vals) {
758
140
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
140
            }
762
120
            return i;
763
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
20
            for (int ret : ret_vals) {
758
20
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
20
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
20
            for (int ret : ret_vals) {
758
20
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
20
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
0
                    i = ret;
760
0
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
753
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
754
10
    return [funcs...]() {
755
10
        return [](std::initializer_list<int> ret_vals) {
756
10
            int i = 0;
757
10
            for (int ret : ret_vals) {
758
10
                if (ret != 0) {
759
10
                    i = ret;
760
10
                }
761
10
            }
762
10
            return i;
763
10
        }({funcs()...});
764
10
    };
765
10
}
766
767
10
int InstanceRecycler::do_recycle() {
768
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
769
10
    tablet_metrics_context_.reset();
770
10
    segment_metrics_context_.reset();
771
10
    DORIS_CLOUD_DEFER {
772
10
        tablet_metrics_context_.finish_report();
773
10
        segment_metrics_context_.finish_report();
774
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
771
10
    DORIS_CLOUD_DEFER {
772
10
        tablet_metrics_context_.finish_report();
773
10
        segment_metrics_context_.finish_report();
774
10
    };
775
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
776
0
        int res = recycle_cluster_snapshots();
777
0
        if (res != 0) {
778
0
            return -1;
779
0
        }
780
0
        return recycle_deleted_instance();
781
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
782
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
783
10
                                        fmt::format("instance id {}", instance_id_),
784
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
784
120
                                        [](int r) { return r != 0; });
785
10
        sync_executor
786
10
                .add(task_wrapper(
787
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
787
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
788
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
788
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
789
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
790
                                   // becase they may both recycle the same set of tablets
791
                        // recycle dropped table or idexes(mv, rollup)
792
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
792
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
793
                        // recycle dropped partitions
794
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
794
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
795
10
                .add(task_wrapper(
796
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
796
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
797
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
797
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
798
10
                .add(task_wrapper(
799
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
799
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
800
10
                .add(task_wrapper(
801
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
801
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
802
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
802
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
803
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
803
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
804
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
804
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
805
10
                .add(task_wrapper(
806
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
806
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
807
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
807
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
808
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
808
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
809
10
        bool finished = true;
810
10
        std::vector<int> rets = sync_executor.when_all(&finished);
811
120
        for (int ret : rets) {
812
120
            if (ret != 0) {
813
0
                return ret;
814
0
            }
815
120
        }
816
10
        return finished ? 0 : -1;
817
10
    } else {
818
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
819
0
                     << " instance_id=" << instance_id_;
820
0
        return -1;
821
0
    }
822
10
}
823
824
/**
825
* 1. delete all remote data
826
* 2. delete all kv
827
* 3. remove instance kv
828
*/
829
5
int InstanceRecycler::recycle_deleted_instance() {
830
5
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
831
832
5
    int ret = 0;
833
5
    auto start_time = steady_clock::now();
834
835
5
    DORIS_CLOUD_DEFER {
836
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
837
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
838
5
                     << " recycle deleted instance, cost=" << cost
839
5
                     << "s, instance_id=" << instance_id_;
840
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
835
5
    DORIS_CLOUD_DEFER {
836
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
837
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
838
5
                     << " recycle deleted instance, cost=" << cost
839
5
                     << "s, instance_id=" << instance_id_;
840
5
    };
841
842
    // Step 1: Recycle tmp rowsets (contains ref count but txn is not committed)
843
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
844
5
        int res = recycle_tmp_rowsets();
845
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
846
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
847
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
848
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
849
            // and cannot be recycled.
850
5
            res = recycle_tmp_rowsets();
851
5
        }
852
5
        return res;
853
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
Line
Count
Source
843
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
844
5
        int res = recycle_tmp_rowsets();
845
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
846
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
847
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
848
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
849
            // and cannot be recycled.
850
5
            res = recycle_tmp_rowsets();
851
5
        }
852
5
        return res;
853
5
    };
854
5
    if (recycle_tmp_rowsets_with_mark_delete_enabled() != 0) {
855
0
        LOG_WARNING("failed to recycle tmp rowsets").tag("instance_id", instance_id_);
856
0
        ret = -1;
857
0
        return -1;
858
0
    }
859
860
    // Step 2: Recycle versioned rowsets in recycle space (already marked for deletion)
861
5
    if (recycle_versioned_rowsets() != 0) {
862
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
863
0
        ret = -1;
864
0
        return -1;
865
0
    }
866
867
    // Step 3: Recycle operation logs (can recycle logs not referenced by snapshots)
868
5
    if (recycle_operation_logs() != 0) {
869
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
870
0
        ret = -1;
871
0
        return -1;
872
0
    }
873
874
    // Step 4: Check if there are still cluster snapshots
875
5
    bool has_snapshots = false;
876
5
    if (has_cluster_snapshots(&has_snapshots) != 0) {
877
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
878
0
        ret = -1;
879
0
        return -1;
880
5
    } else if (has_snapshots) {
881
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
882
1
        return 0;
883
1
    }
884
885
4
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
886
4
                            instance_info().snapshot_switch_status() !=
887
1
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
888
4
    if (snapshot_enabled) {
889
1
        bool has_unrecycled_rowsets = false;
890
1
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
891
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
892
0
            ret = -1;
893
0
            return -1;
894
1
        } else if (has_unrecycled_rowsets) {
895
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
896
0
                    .tag("instance_id", instance_id_);
897
0
            return ret;
898
0
        }
899
3
    } else { // delete all remote data if snapshot is disabled
900
3
        for (auto& [_, accessor] : accessor_map_) {
901
3
            if (stopped()) {
902
0
                return ret;
903
0
            }
904
905
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
906
3
            int del_ret = accessor->delete_all();
907
3
            if (del_ret == 0) {
908
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
909
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
910
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
911
                // so the recycling has been successful.
912
0
                ret = -1;
913
0
            }
914
3
        }
915
916
3
        if (ret != 0) {
917
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
918
0
            return ret;
919
0
        }
920
3
    }
921
922
    // Check successor instance, if exists, skip deleting kv because successor instance may still need the data in kv
923
4
    if (instance_info_.has_successor_instance_id() &&
924
4
        !instance_info_.successor_instance_id().empty()) {
925
0
        std::string key = instance_key(instance_info_.successor_instance_id());
926
0
        std::unique_ptr<Transaction> txn;
927
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
928
0
        if (err != TxnErrorCode::TXN_OK) {
929
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_
930
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
931
0
                         << " err=" << err;
932
0
            ret = -1;
933
0
            return -1;
934
0
        }
935
936
0
        std::string value;
937
0
        err = txn->get(key, &value);
938
0
        if (err == TxnErrorCode::TXN_OK) {
939
0
            LOG(INFO) << "instance successor instance is still exist, skip deleting kv,"
940
0
                      << " instance_id=" << instance_id_
941
0
                      << " successor_instance_id=" << instance_info_.successor_instance_id();
942
0
            return 0;
943
0
        } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
944
0
            LOG(WARNING) << "failed to get successor instance, instance_id=" << instance_id_
945
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
946
0
                         << " err=" << err;
947
0
            ret = -1;
948
0
            return -1;
949
0
        }
950
0
    }
951
952
    // delete all kv
953
4
    std::unique_ptr<Transaction> txn;
954
4
    TxnErrorCode err = txn_kv_->create_txn(&txn);
955
4
    if (err != TxnErrorCode::TXN_OK) {
956
0
        LOG(WARNING) << "failed to create txn";
957
0
        ret = -1;
958
0
        return -1;
959
0
    }
960
4
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
961
    // delete kv before deleting objects to prevent the checker from misjudging data loss
962
4
    std::string start_txn_key = txn_key_prefix(instance_id_);
963
4
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
964
4
    txn->remove(start_txn_key, end_txn_key);
965
4
    std::string start_version_key = version_key_prefix(instance_id_);
966
4
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
967
4
    txn->remove(start_version_key, end_version_key);
968
4
    std::string start_meta_key = meta_key_prefix(instance_id_);
969
4
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
970
4
    txn->remove(start_meta_key, end_meta_key);
971
4
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
972
4
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
973
4
    txn->remove(start_recycle_key, end_recycle_key);
974
4
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
975
4
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
976
4
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
977
4
    std::string start_copy_key = copy_key_prefix(instance_id_);
978
4
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
979
4
    txn->remove(start_copy_key, end_copy_key);
980
    // should not remove job key range, because we need to reserve job recycle kv
981
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
982
4
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
983
4
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
984
4
    txn->remove(start_job_tablet_key, end_job_tablet_key);
985
4
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
986
4
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
987
4
    std::string start_vault_key = storage_vault_key(key_info0);
988
4
    std::string end_vault_key = storage_vault_key(key_info1);
989
4
    txn->remove(start_vault_key, end_vault_key);
990
4
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
991
4
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
992
4
    txn->remove(versioned_version_key_start, versioned_version_key_end);
993
4
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
994
4
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
995
4
    txn->remove(versioned_index_key_start, versioned_index_key_end);
996
4
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
997
4
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
998
4
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
999
4
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
1000
4
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
1001
4
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
1002
4
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
1003
4
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
1004
4
    txn->remove(versioned_data_key_start, versioned_data_key_end);
1005
4
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
1006
4
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
1007
4
    txn->remove(versioned_log_key_start, versioned_log_key_end);
1008
4
    err = txn->commit();
1009
4
    if (err != TxnErrorCode::TXN_OK) {
1010
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
1011
0
        ret = -1;
1012
0
    }
1013
1014
4
    if (ret == 0) {
1015
        // remove instance kv
1016
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
1017
4
        err = txn_kv_->create_txn(&txn);
1018
4
        if (err != TxnErrorCode::TXN_OK) {
1019
0
            LOG(WARNING) << "failed to create txn";
1020
0
            ret = -1;
1021
0
            return ret;
1022
0
        }
1023
4
        std::string key;
1024
4
        instance_key({instance_id_}, &key);
1025
4
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
1026
4
        txn->remove(key);
1027
4
        err = txn->commit();
1028
4
        if (err != TxnErrorCode::TXN_OK) {
1029
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
1030
0
                         << " err=" << err;
1031
0
            ret = -1;
1032
0
        }
1033
4
    }
1034
4
    return ret;
1035
4
}
1036
1037
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
1038
9
                                          bool* exists, PackedFileRecycleStats* stats) {
1039
9
    if (exists == nullptr) {
1040
0
        return -1;
1041
0
    }
1042
9
    *exists = false;
1043
1044
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
1045
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
1046
9
    std::string scan_begin = begin;
1047
1048
9
    while (true) {
1049
9
        std::unique_ptr<RangeGetIterator> it_range;
1050
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
1051
9
        if (get_ret < 0) {
1052
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
1053
0
                    .tag("instance_id", instance_id_)
1054
0
                    .tag("tablet_id", tablet_id)
1055
0
                    .tag("ret", get_ret);
1056
0
            return -1;
1057
0
        }
1058
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
1059
6
            return 0;
1060
6
        }
1061
1062
3
        std::string last_key;
1063
3
        while (it_range->has_next()) {
1064
3
            auto [k, v] = it_range->next();
1065
3
            last_key.assign(k.data(), k.size());
1066
3
            doris::RowsetMetaCloudPB rowset_meta;
1067
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
1068
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
1069
0
                        .tag("instance_id", instance_id_)
1070
0
                        .tag("tablet_id", tablet_id)
1071
0
                        .tag("key", hex(k));
1072
0
                continue;
1073
0
            }
1074
3
            if (stats) {
1075
3
                ++stats->rowset_scan_count;
1076
3
            }
1077
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
1078
3
                *exists = true;
1079
3
                return 0;
1080
3
            }
1081
3
        }
1082
1083
0
        if (!it_range->more()) {
1084
0
            return 0;
1085
0
        }
1086
1087
        // Continue scanning from the next key to keep each transaction short.
1088
0
        scan_begin = std::move(last_key);
1089
0
        scan_begin.push_back('\x00');
1090
0
    }
1091
9
}
1092
1093
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
1094
                                                          const std::string& rowset_id,
1095
                                                          int64_t txn_id, bool* recycle_exists,
1096
11
                                                          bool* tmp_exists) {
1097
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
1098
0
        return -1;
1099
0
    }
1100
11
    *recycle_exists = false;
1101
11
    *tmp_exists = false;
1102
1103
11
    if (txn_id <= 0) {
1104
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
1105
0
                .tag("instance_id", instance_id_)
1106
0
                .tag("tablet_id", tablet_id)
1107
0
                .tag("rowset_id", rowset_id)
1108
0
                .tag("txn_id", txn_id);
1109
0
        return -1;
1110
0
    }
1111
1112
11
    std::unique_ptr<Transaction> txn;
1113
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1114
11
    if (err != TxnErrorCode::TXN_OK) {
1115
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
1116
0
                .tag("instance_id", instance_id_)
1117
0
                .tag("tablet_id", tablet_id)
1118
0
                .tag("rowset_id", rowset_id)
1119
0
                .tag("txn_id", txn_id)
1120
0
                .tag("err", err);
1121
0
        return -1;
1122
0
    }
1123
1124
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1125
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1126
11
    if (ret == TxnErrorCode::TXN_OK) {
1127
1
        *recycle_exists = true;
1128
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1129
0
        LOG_WARNING("failed to check recycle rowset existence")
1130
0
                .tag("instance_id", instance_id_)
1131
0
                .tag("tablet_id", tablet_id)
1132
0
                .tag("rowset_id", rowset_id)
1133
0
                .tag("key", hex(recycle_key))
1134
0
                .tag("err", ret);
1135
0
        return -1;
1136
0
    }
1137
1138
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1139
11
    ret = key_exists(txn.get(), tmp_key, true);
1140
11
    if (ret == TxnErrorCode::TXN_OK) {
1141
1
        *tmp_exists = true;
1142
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1143
0
        LOG_WARNING("failed to check tmp rowset existence")
1144
0
                .tag("instance_id", instance_id_)
1145
0
                .tag("tablet_id", tablet_id)
1146
0
                .tag("txn_id", txn_id)
1147
0
                .tag("key", hex(tmp_key))
1148
0
                .tag("err", ret);
1149
0
        return -1;
1150
0
    }
1151
1152
11
    return 0;
1153
11
}
1154
1155
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1156
8
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1157
8
    if (!hint.empty()) {
1158
8
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1159
8
            return {hint, it->second};
1160
8
        }
1161
8
    }
1162
1163
0
    return {"", nullptr};
1164
8
}
1165
1166
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1167
                                               const std::string& packed_file_path,
1168
3
                                               PackedFileRecycleStats* stats) {
1169
3
    bool local_changed = false;
1170
3
    int64_t left_num = 0;
1171
3
    int64_t left_bytes = 0;
1172
3
    bool all_small_files_confirmed = true;
1173
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1174
1175
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1176
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1177
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1178
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1179
14
        LOG_INFO("packed slice correction status")
1180
14
                .tag("instance_id", instance_id_)
1181
14
                .tag("packed_file_path", packed_file_path)
1182
14
                .tag("small_file_path", file.path())
1183
14
                .tag("tablet_id", tablet_id)
1184
14
                .tag("rowset_id", rowset_id)
1185
14
                .tag("txn_id", txn_id)
1186
14
                .tag("size", file.size())
1187
14
                .tag("deleted", file.deleted())
1188
14
                .tag("corrected", file.corrected())
1189
14
                .tag("confirmed_this_round", confirmed_this_round);
1190
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1175
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1176
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1177
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1178
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1179
14
        LOG_INFO("packed slice correction status")
1180
14
                .tag("instance_id", instance_id_)
1181
14
                .tag("packed_file_path", packed_file_path)
1182
14
                .tag("small_file_path", file.path())
1183
14
                .tag("tablet_id", tablet_id)
1184
14
                .tag("rowset_id", rowset_id)
1185
14
                .tag("txn_id", txn_id)
1186
14
                .tag("size", file.size())
1187
14
                .tag("deleted", file.deleted())
1188
14
                .tag("corrected", file.corrected())
1189
14
                .tag("confirmed_this_round", confirmed_this_round);
1190
14
    };
1191
1192
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1193
14
        auto* small_file = packed_info->mutable_slices(i);
1194
14
        if (small_file->deleted()) {
1195
3
            log_small_file_status(*small_file, small_file->corrected());
1196
3
            continue;
1197
3
        }
1198
1199
11
        if (small_file->corrected()) {
1200
0
            left_num++;
1201
0
            left_bytes += small_file->size();
1202
0
            log_small_file_status(*small_file, true);
1203
0
            continue;
1204
0
        }
1205
1206
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1207
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1208
0
                    .tag("instance_id", instance_id_)
1209
0
                    .tag("small_file_path", small_file->path())
1210
0
                    .tag("index", i);
1211
0
            return -1;
1212
0
        }
1213
1214
11
        int64_t tablet_id = small_file->tablet_id();
1215
11
        const std::string& rowset_id = small_file->rowset_id();
1216
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1217
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1218
0
                    .tag("instance_id", instance_id_)
1219
0
                    .tag("small_file_path", small_file->path())
1220
0
                    .tag("index", i)
1221
0
                    .tag("tablet_id", tablet_id)
1222
0
                    .tag("rowset_id", rowset_id)
1223
0
                    .tag("has_txn_id", small_file->has_txn_id())
1224
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1225
0
            return -1;
1226
0
        }
1227
11
        int64_t txn_id = small_file->txn_id();
1228
11
        bool recycle_exists = false;
1229
11
        bool tmp_exists = false;
1230
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1231
11
                                                &tmp_exists) != 0) {
1232
0
            return -1;
1233
0
        }
1234
1235
11
        bool small_file_confirmed = false;
1236
11
        if (tmp_exists) {
1237
1
            left_num++;
1238
1
            left_bytes += small_file->size();
1239
1
            small_file_confirmed = true;
1240
10
        } else if (recycle_exists) {
1241
1
            left_num++;
1242
1
            left_bytes += small_file->size();
1243
            // keep small_file_confirmed=false so the packed file remains uncorrected
1244
9
        } else {
1245
9
            bool rowset_exists = false;
1246
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1247
0
                return -1;
1248
0
            }
1249
1250
9
            if (!rowset_exists) {
1251
6
                if (!small_file->deleted()) {
1252
6
                    small_file->set_deleted(true);
1253
6
                    local_changed = true;
1254
6
                }
1255
6
                if (!small_file->corrected()) {
1256
6
                    small_file->set_corrected(true);
1257
6
                    local_changed = true;
1258
6
                }
1259
6
                small_file_confirmed = true;
1260
6
            } else {
1261
3
                left_num++;
1262
3
                left_bytes += small_file->size();
1263
3
                small_file_confirmed = true;
1264
3
            }
1265
9
        }
1266
1267
11
        if (!small_file_confirmed) {
1268
1
            all_small_files_confirmed = false;
1269
1
        }
1270
1271
11
        if (small_file->corrected() != small_file_confirmed) {
1272
4
            small_file->set_corrected(small_file_confirmed);
1273
4
            local_changed = true;
1274
4
        }
1275
1276
11
        log_small_file_status(*small_file, small_file_confirmed);
1277
11
    }
1278
1279
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1280
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1281
3
        local_changed = true;
1282
3
    }
1283
3
    if (packed_info->ref_cnt() != left_num) {
1284
3
        auto old_ref_cnt = packed_info->ref_cnt();
1285
3
        packed_info->set_ref_cnt(left_num);
1286
3
        LOG_INFO("corrected packed file ref count")
1287
3
                .tag("instance_id", instance_id_)
1288
3
                .tag("resource_id", packed_info->resource_id())
1289
3
                .tag("packed_file_path", packed_file_path)
1290
3
                .tag("old_ref_cnt", old_ref_cnt)
1291
3
                .tag("new_ref_cnt", left_num);
1292
3
        local_changed = true;
1293
3
    }
1294
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1295
2
        packed_info->set_corrected(all_small_files_confirmed);
1296
2
        local_changed = true;
1297
2
    }
1298
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1299
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1300
1
        local_changed = true;
1301
1
    }
1302
1303
3
    if (changed != nullptr) {
1304
3
        *changed = local_changed;
1305
3
    }
1306
3
    return 0;
1307
3
}
1308
1309
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1310
                                                 const std::string& packed_file_path,
1311
4
                                                 PackedFileRecycleStats* stats) {
1312
4
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
1313
4
    bool correction_ok = false;
1314
4
    cloud::PackedFileInfoPB packed_info;
1315
1316
4
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
1317
4
        if (stopped()) {
1318
0
            LOG_WARNING("recycler stopped before processing packed file")
1319
0
                    .tag("instance_id", instance_id_)
1320
0
                    .tag("packed_file_path", packed_file_path)
1321
0
                    .tag("attempt", attempt);
1322
0
            return -1;
1323
0
        }
1324
1325
4
        std::unique_ptr<Transaction> txn;
1326
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1327
4
        if (err != TxnErrorCode::TXN_OK) {
1328
0
            LOG_WARNING("failed to create txn when processing packed file")
1329
0
                    .tag("instance_id", instance_id_)
1330
0
                    .tag("packed_file_path", packed_file_path)
1331
0
                    .tag("attempt", attempt)
1332
0
                    .tag("err", err);
1333
0
            return -1;
1334
0
        }
1335
1336
4
        std::string packed_val;
1337
4
        err = txn->get(packed_key, &packed_val);
1338
4
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1339
0
            return 0;
1340
0
        }
1341
4
        if (err != TxnErrorCode::TXN_OK) {
1342
0
            LOG_WARNING("failed to get packed file kv")
1343
0
                    .tag("instance_id", instance_id_)
1344
0
                    .tag("packed_file_path", packed_file_path)
1345
0
                    .tag("attempt", attempt)
1346
0
                    .tag("err", err);
1347
0
            return -1;
1348
0
        }
1349
1350
4
        if (!packed_info.ParseFromString(packed_val)) {
1351
0
            LOG_WARNING("failed to parse packed file info")
1352
0
                    .tag("instance_id", instance_id_)
1353
0
                    .tag("packed_file_path", packed_file_path)
1354
0
                    .tag("attempt", attempt);
1355
0
            return -1;
1356
0
        }
1357
1358
4
        int64_t now_sec = ::time(nullptr);
1359
4
        bool corrected = packed_info.corrected();
1360
4
        bool due = config::force_immediate_recycle ||
1361
4
                   now_sec - packed_info.created_at_sec() >=
1362
4
                           config::packed_file_correction_delay_seconds;
1363
1364
4
        if (!corrected && due) {
1365
3
            bool changed = false;
1366
3
            if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1367
0
                LOG_WARNING("correct_packed_file_info failed")
1368
0
                        .tag("instance_id", instance_id_)
1369
0
                        .tag("packed_file_path", packed_file_path)
1370
0
                        .tag("attempt", attempt);
1371
0
                return -1;
1372
0
            }
1373
3
            if (changed) {
1374
3
                std::string updated;
1375
3
                if (!packed_info.SerializeToString(&updated)) {
1376
0
                    LOG_WARNING("failed to serialize packed file info after correction")
1377
0
                            .tag("instance_id", instance_id_)
1378
0
                            .tag("packed_file_path", packed_file_path)
1379
0
                            .tag("attempt", attempt);
1380
0
                    return -1;
1381
0
                }
1382
3
                txn->put(packed_key, updated);
1383
3
                err = txn->commit();
1384
3
                if (err == TxnErrorCode::TXN_OK) {
1385
3
                    if (stats) {
1386
3
                        ++stats->num_corrected;
1387
3
                    }
1388
3
                } else {
1389
0
                    if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) {
1390
0
                        LOG_WARNING(
1391
0
                                "failed to commit correction for packed file due to conflict, "
1392
0
                                "retrying")
1393
0
                                .tag("instance_id", instance_id_)
1394
0
                                .tag("packed_file_path", packed_file_path)
1395
0
                                .tag("attempt", attempt);
1396
0
                        sleep_for_packed_file_retry();
1397
0
                        packed_info.Clear();
1398
0
                        continue;
1399
0
                    }
1400
0
                    LOG_WARNING("failed to commit correction for packed file")
1401
0
                            .tag("instance_id", instance_id_)
1402
0
                            .tag("packed_file_path", packed_file_path)
1403
0
                            .tag("attempt", attempt)
1404
0
                            .tag("err", err);
1405
0
                    return -1;
1406
0
                }
1407
3
            }
1408
3
        }
1409
1410
4
        correction_ok = true;
1411
4
        break;
1412
4
    }
1413
1414
4
    if (!correction_ok) {
1415
0
        return -1;
1416
0
    }
1417
1418
4
    if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1419
4
          packed_info.ref_cnt() == 0)) {
1420
3
        return 0;
1421
3
    }
1422
1423
1
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1424
0
        LOG_WARNING("packed file missing resource id when recycling")
1425
0
                .tag("instance_id", instance_id_)
1426
0
                .tag("packed_file_path", packed_file_path);
1427
0
        return -1;
1428
0
    }
1429
1
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1430
1
    if (!accessor) {
1431
0
        LOG_WARNING("no accessor available to delete packed file")
1432
0
                .tag("instance_id", instance_id_)
1433
0
                .tag("packed_file_path", packed_file_path)
1434
0
                .tag("resource_id", packed_info.resource_id());
1435
0
        return -1;
1436
0
    }
1437
1
    int del_ret = accessor->delete_file(packed_file_path);
1438
1
    if (del_ret != 0 && del_ret != 1) {
1439
0
        LOG_WARNING("failed to delete packed file")
1440
0
                .tag("instance_id", instance_id_)
1441
0
                .tag("packed_file_path", packed_file_path)
1442
0
                .tag("resource_id", resource_id)
1443
0
                .tag("ret", del_ret);
1444
0
        return -1;
1445
0
    }
1446
1
    if (del_ret == 1) {
1447
0
        LOG_INFO("packed file already removed")
1448
0
                .tag("instance_id", instance_id_)
1449
0
                .tag("packed_file_path", packed_file_path)
1450
0
                .tag("resource_id", resource_id);
1451
1
    } else {
1452
1
        LOG_INFO("deleted packed file")
1453
1
                .tag("instance_id", instance_id_)
1454
1
                .tag("packed_file_path", packed_file_path)
1455
1
                .tag("resource_id", resource_id);
1456
1
    }
1457
1458
1
    for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) {
1459
1
        std::unique_ptr<Transaction> del_txn;
1460
1
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
1461
1
        if (err != TxnErrorCode::TXN_OK) {
1462
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1463
0
                    .tag("instance_id", instance_id_)
1464
0
                    .tag("packed_file_path", packed_file_path)
1465
0
                    .tag("del_attempt", del_attempt)
1466
0
                    .tag("err", err);
1467
0
            return -1;
1468
0
        }
1469
1470
1
        std::string latest_val;
1471
1
        err = del_txn->get(packed_key, &latest_val);
1472
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1473
0
            return 0;
1474
0
        }
1475
1
        if (err != TxnErrorCode::TXN_OK) {
1476
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1477
0
                    .tag("instance_id", instance_id_)
1478
0
                    .tag("packed_file_path", packed_file_path)
1479
0
                    .tag("del_attempt", del_attempt)
1480
0
                    .tag("err", err);
1481
0
            return -1;
1482
0
        }
1483
1484
1
        cloud::PackedFileInfoPB latest_info;
1485
1
        if (!latest_info.ParseFromString(latest_val)) {
1486
0
            LOG_WARNING("failed to parse packed file info before removal")
1487
0
                    .tag("instance_id", instance_id_)
1488
0
                    .tag("packed_file_path", packed_file_path)
1489
0
                    .tag("del_attempt", del_attempt);
1490
0
            return -1;
1491
0
        }
1492
1493
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1494
1
              latest_info.ref_cnt() == 0)) {
1495
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1496
0
                    .tag("instance_id", instance_id_)
1497
0
                    .tag("packed_file_path", packed_file_path)
1498
0
                    .tag("del_attempt", del_attempt);
1499
0
            return 0;
1500
0
        }
1501
1502
1
        del_txn->remove(packed_key);
1503
1
        err = del_txn->commit();
1504
1
        if (err == TxnErrorCode::TXN_OK) {
1505
1
            if (stats) {
1506
1
                ++stats->num_deleted;
1507
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1508
1
                                        static_cast<int64_t>(latest_val.size());
1509
1
                if (del_ret == 0 || del_ret == 1) {
1510
1
                    ++stats->num_object_deleted;
1511
1
                    int64_t object_size = latest_info.total_slice_bytes();
1512
1
                    if (object_size <= 0) {
1513
0
                        object_size = packed_info.total_slice_bytes();
1514
0
                    }
1515
1
                    stats->bytes_object_deleted += object_size;
1516
1
                }
1517
1
            }
1518
1
            LOG_INFO("removed packed file metadata")
1519
1
                    .tag("instance_id", instance_id_)
1520
1
                    .tag("packed_file_path", packed_file_path);
1521
1
            return 0;
1522
1
        }
1523
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1524
0
            if (del_attempt >= max_retry_times) {
1525
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
1526
0
                        .tag("instance_id", instance_id_)
1527
0
                        .tag("packed_file_path", packed_file_path)
1528
0
                        .tag("del_attempt", del_attempt);
1529
0
                return -1;
1530
0
            }
1531
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
1532
0
                    .tag("instance_id", instance_id_)
1533
0
                    .tag("packed_file_path", packed_file_path)
1534
0
                    .tag("del_attempt", del_attempt);
1535
0
            sleep_for_packed_file_retry();
1536
0
            continue;
1537
0
        }
1538
0
        LOG_WARNING("failed to remove packed file kv")
1539
0
                .tag("instance_id", instance_id_)
1540
0
                .tag("packed_file_path", packed_file_path)
1541
0
                .tag("del_attempt", del_attempt)
1542
0
                .tag("err", err);
1543
0
        return -1;
1544
0
    }
1545
1546
0
    return -1;
1547
1
}
1548
1549
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/,
1550
4
                                            PackedFileRecycleStats* stats, int* ret) {
1551
4
    if (stats) {
1552
4
        ++stats->num_scanned;
1553
4
    }
1554
4
    std::string packed_file_path;
1555
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1556
0
        LOG_WARNING("failed to decode packed file key")
1557
0
                .tag("instance_id", instance_id_)
1558
0
                .tag("key", hex(key));
1559
0
        if (stats) {
1560
0
            ++stats->num_failed;
1561
0
        }
1562
0
        if (ret) {
1563
0
            *ret = -1;
1564
0
        }
1565
0
        return 0;
1566
0
    }
1567
1568
4
    std::string packed_key(key);
1569
4
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1570
4
    if (process_ret != 0) {
1571
0
        if (stats) {
1572
0
            ++stats->num_failed;
1573
0
        }
1574
0
        if (ret) {
1575
0
            *ret = -1;
1576
0
        }
1577
0
    }
1578
4
    return 0;
1579
4
}
1580
1581
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1582
9.77k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1583
9.77k
    if (config::force_immediate_recycle) {
1584
15
        return 0L;
1585
15
    }
1586
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1587
9.75k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1588
9.75k
    int64_t retention_seconds = config::retention_seconds;
1589
9.75k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1590
7.80k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1591
7.80k
    }
1592
9.75k
    int64_t final_expiration = expiration + retention_seconds;
1593
9.75k
    if (*earlest_ts > final_expiration) {
1594
7
        *earlest_ts = final_expiration;
1595
7
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1596
7
    }
1597
9.75k
    return final_expiration;
1598
9.77k
}
1599
1600
int64_t calculate_partition_expired_time(
1601
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1602
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1603
9
    if (config::force_immediate_recycle) {
1604
3
        return 0L;
1605
3
    }
1606
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1607
6
                                                            : partition_meta_pb.creation_time();
1608
6
    int64_t retention_seconds = config::retention_seconds;
1609
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1610
6
        retention_seconds =
1611
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1612
6
    }
1613
6
    int64_t final_expiration = expiration + retention_seconds;
1614
6
    if (*earlest_ts > final_expiration) {
1615
2
        *earlest_ts = final_expiration;
1616
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1617
2
    }
1618
6
    return final_expiration;
1619
9
}
1620
1621
int64_t calculate_index_expired_time(const std::string& instance_id_,
1622
                                     const RecycleIndexPB& index_meta_pb,
1623
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1624
10
    if (config::force_immediate_recycle) {
1625
4
        return 0L;
1626
4
    }
1627
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1628
6
                                                        : index_meta_pb.creation_time();
1629
6
    int64_t retention_seconds = config::retention_seconds;
1630
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1631
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1632
6
    }
1633
6
    int64_t final_expiration = expiration + retention_seconds;
1634
6
    if (*earlest_ts > final_expiration) {
1635
2
        *earlest_ts = final_expiration;
1636
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1637
2
    }
1638
6
    return final_expiration;
1639
10
}
1640
1641
int64_t calculate_tmp_rowset_expired_time(
1642
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1643
106k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1644
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1645
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1646
    //  duration or timeout always < `retention_time` in practice.
1647
106k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1648
106k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1649
106k
                                 : tmp_rowset_meta_pb.creation_time();
1650
106k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1651
106k
    int64_t final_expiration = expiration + config::retention_seconds;
1652
106k
    if (*earlest_ts > final_expiration) {
1653
24
        *earlest_ts = final_expiration;
1654
24
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1655
24
    }
1656
106k
    return final_expiration;
1657
106k
}
1658
1659
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1660
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1661
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1662
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1663
8
        *earlest_ts = final_expiration / 1000;
1664
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1665
8
    }
1666
30.0k
    return final_expiration;
1667
30.0k
}
1668
1669
int64_t calculate_restore_job_expired_time(
1670
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1671
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1672
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1673
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1674
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1675
        // final state, recycle immediately
1676
41
        return 0L;
1677
41
    }
1678
    // not final state, wait much longer than the FE's timeout(1 day)
1679
0
    int64_t last_modified_s =
1680
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1681
0
    int64_t expiration = restore_job.expired_at_s() > 0
1682
0
                                 ? last_modified_s + restore_job.expired_at_s()
1683
0
                                 : last_modified_s;
1684
0
    int64_t final_expiration = expiration + config::retention_seconds;
1685
0
    if (*earlest_ts > final_expiration) {
1686
0
        *earlest_ts = final_expiration;
1687
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1688
0
    }
1689
0
    return final_expiration;
1690
41
}
1691
1692
2
int InstanceRecycler::abort_txn_for_related_rowset(int64_t txn_id) {
1693
2
    AbortTxnRequest req;
1694
2
    TxnInfoPB txn_info;
1695
2
    MetaServiceCode code = MetaServiceCode::OK;
1696
2
    std::string msg;
1697
2
    std::stringstream ss;
1698
2
    std::unique_ptr<Transaction> txn;
1699
2
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1700
2
    if (err != TxnErrorCode::TXN_OK) {
1701
0
        LOG_WARNING("failed to create txn").tag("err", err);
1702
0
        return -1;
1703
0
    }
1704
1705
    // get txn index
1706
2
    TxnIndexPB txn_idx_pb;
1707
2
    auto index_key = txn_index_key({instance_id_, txn_id});
1708
2
    std::string index_val;
1709
2
    err = txn->get(index_key, &index_val);
1710
2
    if (err != TxnErrorCode::TXN_OK) {
1711
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1712
            // maybe recycled
1713
0
            LOG_INFO("txn index not found, txn_id={} instance_id={}", txn_id, instance_id_)
1714
0
                    .tag("key", hex(index_key))
1715
0
                    .tag("txn_id", txn_id);
1716
0
            return 0;
1717
0
        }
1718
0
        LOG_WARNING("failed to get txn index")
1719
0
                .tag("err", err)
1720
0
                .tag("key", hex(index_key))
1721
0
                .tag("txn_id", txn_id);
1722
0
        return -1;
1723
0
    }
1724
2
    if (!txn_idx_pb.ParseFromString(index_val)) {
1725
0
        LOG_WARNING("failed to parse txn index")
1726
0
                .tag("err", err)
1727
0
                .tag("key", hex(index_key))
1728
0
                .tag("txn_id", txn_id);
1729
0
        return -1;
1730
0
    }
1731
1732
2
    auto info_key = txn_info_key({instance_id_, txn_idx_pb.tablet_index().db_id(), txn_id});
1733
2
    std::string info_val;
1734
2
    err = txn->get(info_key, &info_val);
1735
2
    if (err != TxnErrorCode::TXN_OK) {
1736
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1737
            // maybe recycled
1738
0
            LOG_INFO("txn info not found, txn_id={} instance_id={}", txn_id, instance_id_)
1739
0
                    .tag("key", hex(info_key))
1740
0
                    .tag("txn_id", txn_id);
1741
0
            return 0;
1742
0
        }
1743
0
        LOG_WARNING("failed to get txn info")
1744
0
                .tag("err", err)
1745
0
                .tag("key", hex(info_key))
1746
0
                .tag("txn_id", txn_id);
1747
0
        return -1;
1748
0
    }
1749
2
    if (!txn_info.ParseFromString(info_val)) {
1750
0
        LOG_WARNING("failed to parse txn info")
1751
0
                .tag("err", err)
1752
0
                .tag("key", hex(info_key))
1753
0
                .tag("txn_id", txn_id);
1754
0
        return -1;
1755
0
    }
1756
1757
2
    if (txn_info.status() != TxnStatusPB::TXN_STATUS_PREPARED) {
1758
0
        LOG_INFO("txn is not prepared status, txn_id={} status={}", txn_id, txn_info.status())
1759
0
                .tag("key", hex(info_key))
1760
0
                .tag("txn_id", txn_id);
1761
0
        return 0;
1762
0
    }
1763
1764
2
    req.set_txn_id(txn_id);
1765
1766
2
    LOG(INFO) << "begin abort txn for related rowset, txn_id=" << txn_id
1767
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString();
1768
1769
2
    _abort_txn(instance_id_, &req, txn.get(), txn_info, ss, code, msg);
1770
2
    err = txn->commit();
1771
2
    if (err != TxnErrorCode::TXN_OK) {
1772
0
        code = cast_as<ErrCategory::COMMIT>(err);
1773
0
        ss << "failed to commit kv txn, txn_id=" << txn_info.txn_id() << " err=" << err;
1774
0
        msg = ss.str();
1775
0
        return -1;
1776
0
    }
1777
1778
2
    LOG(INFO) << "finish abort txn for related rowset, txn_id=" << txn_id
1779
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString()
1780
2
              << " code=" << code << " msg=" << msg;
1781
1782
2
    return 0;
1783
2
}
1784
1785
4
int InstanceRecycler::abort_job_for_related_rowset(const RowsetMetaCloudPB& rowset_meta) {
1786
4
    FinishTabletJobRequest req;
1787
4
    FinishTabletJobResponse res;
1788
4
    req.set_action(FinishTabletJobRequest::ABORT);
1789
4
    MetaServiceCode code = MetaServiceCode::OK;
1790
4
    std::string msg;
1791
4
    std::stringstream ss;
1792
1793
4
    TabletIndexPB tablet_idx;
1794
4
    int ret = get_tablet_idx(txn_kv_.get(), instance_id_, rowset_meta.tablet_id(), tablet_idx);
1795
4
    if (ret == 1) {
1796
        // tablet maybe recycled, directly return 0
1797
1
        return 0;
1798
3
    } else if (ret != 0) {
1799
0
        LOG(WARNING) << "failed to get tablet index, tablet_id=" << rowset_meta.tablet_id()
1800
0
                     << " instance_id=" << instance_id_ << " ret=" << ret;
1801
0
        return ret;
1802
0
    }
1803
1804
3
    std::unique_ptr<Transaction> txn;
1805
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1806
3
    if (err != TxnErrorCode::TXN_OK) {
1807
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ << " err=" << err;
1808
0
        return -1;
1809
0
    }
1810
1811
3
    std::string job_key =
1812
3
            job_tablet_key({instance_id_, tablet_idx.table_id(), tablet_idx.index_id(),
1813
3
                            tablet_idx.partition_id(), tablet_idx.tablet_id()});
1814
3
    std::string job_val;
1815
3
    err = txn->get(job_key, &job_val);
1816
3
    if (err != TxnErrorCode::TXN_OK) {
1817
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1818
0
            LOG(INFO) << "job not exists, instance_id=" << instance_id_
1819
0
                      << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1820
0
            return 0;
1821
0
        }
1822
0
        LOG(WARNING) << "failed to get job, instance_id=" << instance_id_
1823
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " err=" << err
1824
0
                     << " key=" << hex(job_key);
1825
0
        return -1;
1826
0
    }
1827
1828
3
    TabletJobInfoPB job_pb;
1829
3
    if (!job_pb.ParseFromString(job_val)) {
1830
0
        LOG(WARNING) << "failed to parse job, instance_id=" << instance_id_
1831
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1832
0
        return -1;
1833
0
    }
1834
1835
3
    std::string job_id {};
1836
3
    if (!job_pb.compaction().empty()) {
1837
2
        for (const auto& c : job_pb.compaction()) {
1838
2
            if (c.id() == rowset_meta.job_id()) {
1839
2
                job_id = c.id();
1840
2
                break;
1841
2
            }
1842
2
        }
1843
2
    } else if (job_pb.has_schema_change()) {
1844
1
        job_id = job_pb.schema_change().id();
1845
1
    }
1846
1847
3
    if (!job_id.empty() && rowset_meta.job_id() == job_id) {
1848
3
        LOG(INFO) << "begin to abort job for related rowset, job_id=" << rowset_meta.job_id()
1849
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id();
1850
3
        req.mutable_job()->CopyFrom(job_pb);
1851
3
        req.set_action(FinishTabletJobRequest::ABORT);
1852
3
        _finish_tablet_job(&req, &res, instance_id_, txn, txn_kv_.get(),
1853
3
                           delete_bitmap_lock_white_list_.get(), resource_mgr_.get(), code, msg,
1854
3
                           ss);
1855
3
        if (code != MetaServiceCode::OK) {
1856
0
            LOG(WARNING) << "failed to abort job, instance_id=" << instance_id_
1857
0
                         << " tablet_id=" << tablet_idx.tablet_id() << " code=" << code
1858
0
                         << " msg=" << msg;
1859
0
            return -1;
1860
0
        }
1861
3
        LOG(INFO) << "finish abort job for related rowset, job_id=" << rowset_meta.job_id()
1862
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id()
1863
3
                  << " code=" << code << " msg=" << msg;
1864
3
    } else {
1865
        // clang-format off
1866
0
        LOG(INFO) << "there is no job for related rowset, directly recycle rowset data"
1867
0
                  << ", instance_id=" << instance_id_ 
1868
0
                  << ", tablet_id=" << tablet_idx.tablet_id() 
1869
0
                  << ", job_id=" << job_id
1870
0
                  << ", rowset_id=" << rowset_meta.rowset_id_v2();
1871
        // clang-format on
1872
0
    }
1873
1874
3
    return 0;
1875
3
}
1876
1877
template <typename T>
1878
55.7k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1879
55.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1880
51.9k
        return rowset_meta_pb.mutable_rowset_meta();
1881
51.9k
    } else {
1882
51.9k
        return &rowset_meta_pb;
1883
51.9k
    }
1884
55.7k
}
_ZN5doris5cloud19mutable_rowset_metaINS0_15RecycleRowsetPBEEEPNS_17RowsetMetaCloudPBERT_
Line
Count
Source
1878
3.75k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1879
3.75k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1880
3.75k
        return rowset_meta_pb.mutable_rowset_meta();
1881
3.75k
    } else {
1882
3.75k
        return &rowset_meta_pb;
1883
3.75k
    }
1884
3.75k
}
_ZN5doris5cloud19mutable_rowset_metaINS_17RowsetMetaCloudPBEEEPS2_RT_
Line
Count
Source
1878
51.9k
RowsetMetaCloudPB* mutable_rowset_meta(T& rowset_meta_pb) {
1879
51.9k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1880
51.9k
        return rowset_meta_pb.mutable_rowset_meta();
1881
51.9k
    } else {
1882
51.9k
        return &rowset_meta_pb;
1883
51.9k
    }
1884
51.9k
}
1885
1886
template <typename T>
1887
223k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1888
223k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1889
212k
        return rowset_meta_pb.rowset_meta();
1890
212k
    } else {
1891
212k
        return rowset_meta_pb;
1892
212k
    }
1893
223k
}
_ZN5doris5cloud11rowset_metaINS0_15RecycleRowsetPBEEERKNS_17RowsetMetaCloudPBERKT_
Line
Count
Source
1887
11.9k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1888
11.9k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1889
11.9k
        return rowset_meta_pb.rowset_meta();
1890
11.9k
    } else {
1891
11.9k
        return rowset_meta_pb;
1892
11.9k
    }
1893
11.9k
}
_ZN5doris5cloud11rowset_metaINS_17RowsetMetaCloudPBEEERKS2_RKT_
Line
Count
Source
1887
212k
const RowsetMetaCloudPB& rowset_meta(const T& rowset_meta_pb) {
1888
212k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1889
212k
        return rowset_meta_pb.rowset_meta();
1890
212k
    } else {
1891
212k
        return rowset_meta_pb;
1892
212k
    }
1893
212k
}
1894
1895
struct DeferredRecycleAbortTask {
1896
    enum class Type : uint8_t {
1897
        TXN,
1898
        JOB,
1899
    };
1900
1901
    Type type = Type::TXN;
1902
    int64_t txn_id = 0;
1903
    int64_t tablet_id = 0;
1904
    int64_t start_version = 0;
1905
    int64_t end_version = 0;
1906
    std::string rowset_id;
1907
    std::string job_id;
1908
};
1909
1910
struct DeferredRecyclePrepareDeleteTask {
1911
    std::string key;
1912
    std::string resource_id;
1913
    std::string rowset_id;
1914
    int64_t tablet_id = 0;
1915
};
1916
1917
template <typename T>
1918
57.7k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1919
57.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1920
3.75k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1921
3.10k
            return std::nullopt;
1922
3.10k
        }
1923
3.75k
    }
1924
1925
654
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1926
654
    DeferredRecycleAbortTask task;
1927
654
    task.tablet_id = rs_meta.tablet_id();
1928
654
    task.start_version = rs_meta.start_version();
1929
654
    task.end_version = rs_meta.end_version();
1930
54.6k
    if (rs_meta.has_load_id()) {
1931
4
        task.type = DeferredRecycleAbortTask::Type::TXN;
1932
4
        task.txn_id = rs_meta.txn_id();
1933
4
        return task;
1934
4
    }
1935
54.6k
    if (rs_meta.has_job_id()) {
1936
6
        task.type = DeferredRecycleAbortTask::Type::JOB;
1937
6
        task.rowset_id = rs_meta.rowset_id_v2();
1938
6
        task.job_id = rs_meta.job_id();
1939
6
        return task;
1940
6
    }
1941
54.6k
    return std::nullopt;
1942
54.6k
}
_ZN5doris5cloud24make_deferred_abort_taskINS0_15RecycleRowsetPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_
Line
Count
Source
1918
3.75k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1919
3.75k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1920
3.75k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1921
3.10k
            return std::nullopt;
1922
3.10k
        }
1923
3.75k
    }
1924
1925
654
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1926
654
    DeferredRecycleAbortTask task;
1927
654
    task.tablet_id = rs_meta.tablet_id();
1928
654
    task.start_version = rs_meta.start_version();
1929
654
    task.end_version = rs_meta.end_version();
1930
654
    if (rs_meta.has_load_id()) {
1931
2
        task.type = DeferredRecycleAbortTask::Type::TXN;
1932
2
        task.txn_id = rs_meta.txn_id();
1933
2
        return task;
1934
2
    }
1935
652
    if (rs_meta.has_job_id()) {
1936
2
        task.type = DeferredRecycleAbortTask::Type::JOB;
1937
2
        task.rowset_id = rs_meta.rowset_id_v2();
1938
2
        task.job_id = rs_meta.job_id();
1939
2
        return task;
1940
2
    }
1941
650
    return std::nullopt;
1942
652
}
_ZN5doris5cloud24make_deferred_abort_taskINS_17RowsetMetaCloudPBEEESt8optionalINS0_24DeferredRecycleAbortTaskEERKT_
Line
Count
Source
1918
54.0k
std::optional<DeferredRecycleAbortTask> make_deferred_abort_task(const T& rowset_meta_pb) {
1919
54.0k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1920
54.0k
        if (rowset_meta_pb.type() != RecycleRowsetPB::PREPARE) {
1921
54.0k
            return std::nullopt;
1922
54.0k
        }
1923
54.0k
    }
1924
1925
54.0k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1926
54.0k
    DeferredRecycleAbortTask task;
1927
54.0k
    task.tablet_id = rs_meta.tablet_id();
1928
54.0k
    task.start_version = rs_meta.start_version();
1929
54.0k
    task.end_version = rs_meta.end_version();
1930
54.0k
    if (rs_meta.has_load_id()) {
1931
2
        task.type = DeferredRecycleAbortTask::Type::TXN;
1932
2
        task.txn_id = rs_meta.txn_id();
1933
2
        return task;
1934
2
    }
1935
54.0k
    if (rs_meta.has_job_id()) {
1936
4
        task.type = DeferredRecycleAbortTask::Type::JOB;
1937
4
        task.rowset_id = rs_meta.rowset_id_v2();
1938
4
        task.job_id = rs_meta.job_id();
1939
4
        return task;
1940
4
    }
1941
54.0k
    return std::nullopt;
1942
54.0k
}
1943
1944
template <typename T>
1945
169k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1946
169k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1947
169k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1948
169k
}
_ZN5doris5cloud28need_mark_rowset_as_recycledINS0_15RecycleRowsetPBEEEbRKT_
Line
Count
Source
1945
11.2k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1946
11.2k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1947
11.2k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1948
11.2k
}
_ZN5doris5cloud28need_mark_rowset_as_recycledINS_17RowsetMetaCloudPBEEEbRKT_
Line
Count
Source
1945
158k
bool need_mark_rowset_as_recycled(const T& rowset_meta_pb) {
1946
158k
    const auto& rs_meta = rowset_meta(rowset_meta_pb);
1947
158k
    return !rs_meta.has_is_recycled() || !rs_meta.is_recycled();
1948
158k
}
1949
1950
template <typename T>
1951
int batch_mark_rowsets_as_recycled(TxnKv* txn_kv, const std::string& instance_id,
1952
42
                                   const std::vector<std::string>& keys) {
1953
42
    std::unique_ptr<Transaction> txn;
1954
42
    TxnErrorCode err = txn_kv->create_txn(&txn);
1955
42
    if (err != TxnErrorCode::TXN_OK) {
1956
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1957
0
        return -1;
1958
0
    }
1959
42
    std::vector<std::optional<std::string>> values;
1960
42
    err = txn->batch_get(&values, keys);
1961
42
    if (err != TxnErrorCode::TXN_OK) {
1962
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
1963
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
1964
0
        return -1;
1965
0
    }
1966
42
    size_t total_keys = keys.size();
1967
55.8k
    for (size_t i = 0; i < total_keys; i++) {
1968
55.7k
        if (!values[i].has_value()) {
1969
            // has already been removed by commit_rowset
1970
0
            continue;
1971
0
        }
1972
55.7k
        auto key = keys[i];
1973
55.7k
        auto val = values[i].value();
1974
55.7k
        T rowset_meta_pb;
1975
55.7k
        if (!rowset_meta_pb.ParseFromString(val)) {
1976
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
1977
0
                         << " key=" << hex(key);
1978
0
            return -1;
1979
0
        }
1980
55.7k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
1981
0
            continue;
1982
0
        }
1983
55.7k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
1984
55.7k
        val.clear();
1985
55.7k
        rowset_meta_pb.SerializeToString(&val);
1986
55.7k
        txn->put(key, val);
1987
55.7k
    }
1988
42
    err = txn->commit();
1989
42
    if (err != TxnErrorCode::TXN_OK) {
1990
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1991
0
        return -1;
1992
0
    }
1993
1994
42
    return 0;
1995
42
}
_ZN5doris5cloud30batch_mark_rowsets_as_recycledINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE
Line
Count
Source
1952
26
                                   const std::vector<std::string>& keys) {
1953
26
    std::unique_ptr<Transaction> txn;
1954
26
    TxnErrorCode err = txn_kv->create_txn(&txn);
1955
26
    if (err != TxnErrorCode::TXN_OK) {
1956
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1957
0
        return -1;
1958
0
    }
1959
26
    std::vector<std::optional<std::string>> values;
1960
26
    err = txn->batch_get(&values, keys);
1961
26
    if (err != TxnErrorCode::TXN_OK) {
1962
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
1963
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
1964
0
        return -1;
1965
0
    }
1966
26
    size_t total_keys = keys.size();
1967
3.78k
    for (size_t i = 0; i < total_keys; i++) {
1968
3.75k
        if (!values[i].has_value()) {
1969
            // has already been removed by commit_rowset
1970
0
            continue;
1971
0
        }
1972
3.75k
        auto key = keys[i];
1973
3.75k
        auto val = values[i].value();
1974
3.75k
        T rowset_meta_pb;
1975
3.75k
        if (!rowset_meta_pb.ParseFromString(val)) {
1976
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
1977
0
                         << " key=" << hex(key);
1978
0
            return -1;
1979
0
        }
1980
3.75k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
1981
0
            continue;
1982
0
        }
1983
3.75k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
1984
3.75k
        val.clear();
1985
3.75k
        rowset_meta_pb.SerializeToString(&val);
1986
3.75k
        txn->put(key, val);
1987
3.75k
    }
1988
26
    err = txn->commit();
1989
26
    if (err != TxnErrorCode::TXN_OK) {
1990
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1991
0
        return -1;
1992
0
    }
1993
1994
26
    return 0;
1995
26
}
_ZN5doris5cloud30batch_mark_rowsets_as_recycledINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EE
Line
Count
Source
1952
16
                                   const std::vector<std::string>& keys) {
1953
16
    std::unique_ptr<Transaction> txn;
1954
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
1955
16
    if (err != TxnErrorCode::TXN_OK) {
1956
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1957
0
        return -1;
1958
0
    }
1959
16
    std::vector<std::optional<std::string>> values;
1960
16
    err = txn->batch_get(&values, keys);
1961
16
    if (err != TxnErrorCode::TXN_OK) {
1962
0
        LOG(WARNING) << "failed to batch get rowset meta, instance_id=" << instance_id << ' '
1963
0
                     << "keys size=" << keys.size() << ' ' << "err=" << err;
1964
0
        return -1;
1965
0
    }
1966
16
    size_t total_keys = keys.size();
1967
52.0k
    for (size_t i = 0; i < total_keys; i++) {
1968
52.0k
        if (!values[i].has_value()) {
1969
            // has already been removed by commit_rowset
1970
0
            continue;
1971
0
        }
1972
52.0k
        auto key = keys[i];
1973
52.0k
        auto val = values[i].value();
1974
52.0k
        T rowset_meta_pb;
1975
52.0k
        if (!rowset_meta_pb.ParseFromString(val)) {
1976
0
            LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
1977
0
                         << " key=" << hex(key);
1978
0
            return -1;
1979
0
        }
1980
52.0k
        if (!need_mark_rowset_as_recycled(rowset_meta_pb)) {
1981
0
            continue;
1982
0
        }
1983
52.0k
        mutable_rowset_meta(rowset_meta_pb)->set_is_recycled(true);
1984
52.0k
        val.clear();
1985
52.0k
        rowset_meta_pb.SerializeToString(&val);
1986
52.0k
        txn->put(key, val);
1987
52.0k
    }
1988
16
    err = txn->commit();
1989
16
    if (err != TxnErrorCode::TXN_OK) {
1990
0
        LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1991
0
        return -1;
1992
0
    }
1993
1994
16
    return 0;
1995
16
}
1996
1997
template <typename T>
1998
int collect_deferred_abort_tasks(TxnKv* txn_kv, const std::string& instance_id,
1999
                                 const std::vector<std::string>& keys,
2000
                                 std::vector<DeferredRecycleAbortTask>* abort_tasks,
2001
5
                                 bool skip_base_version) {
2002
5
    constexpr size_t kAbortCheckBatchSize = 256;
2003
10
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
2004
5
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
2005
5
        std::unique_ptr<Transaction> txn;
2006
5
        TxnErrorCode err = txn_kv->create_txn(&txn);
2007
5
        if (err != TxnErrorCode::TXN_OK) {
2008
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2009
0
            return -1;
2010
0
        }
2011
10
        for (size_t idx = offset; idx < limit; ++idx) {
2012
5
            const std::string& key = keys[idx];
2013
5
            std::string val;
2014
5
            err = txn->get(key, &val);
2015
5
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2016
                // has already been removed
2017
0
                continue;
2018
0
            }
2019
5
            if (err != TxnErrorCode::TXN_OK) {
2020
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2021
0
                             << " key=" << hex(key);
2022
0
                return -1;
2023
0
            }
2024
5
            T rowset_meta_pb;
2025
5
            if (!rowset_meta_pb.ParseFromString(val)) {
2026
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2027
0
                             << " key=" << hex(key);
2028
0
                return -1;
2029
0
            }
2030
5
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2031
0
                continue;
2032
0
            }
2033
5
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2034
5
                abort_task.has_value()) {
2035
5
                abort_tasks->emplace_back(std::move(*abort_task));
2036
5
            }
2037
5
        }
2038
5
    }
2039
5
    return 0;
2040
5
}
_ZN5doris5cloud28collect_deferred_abort_tasksINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb
Line
Count
Source
2001
2
                                 bool skip_base_version) {
2002
2
    constexpr size_t kAbortCheckBatchSize = 256;
2003
4
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
2004
2
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
2005
2
        std::unique_ptr<Transaction> txn;
2006
2
        TxnErrorCode err = txn_kv->create_txn(&txn);
2007
2
        if (err != TxnErrorCode::TXN_OK) {
2008
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2009
0
            return -1;
2010
0
        }
2011
4
        for (size_t idx = offset; idx < limit; ++idx) {
2012
2
            const std::string& key = keys[idx];
2013
2
            std::string val;
2014
2
            err = txn->get(key, &val);
2015
2
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2016
                // has already been removed
2017
0
                continue;
2018
0
            }
2019
2
            if (err != TxnErrorCode::TXN_OK) {
2020
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2021
0
                             << " key=" << hex(key);
2022
0
                return -1;
2023
0
            }
2024
2
            T rowset_meta_pb;
2025
2
            if (!rowset_meta_pb.ParseFromString(val)) {
2026
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2027
0
                             << " key=" << hex(key);
2028
0
                return -1;
2029
0
            }
2030
2
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2031
0
                continue;
2032
0
            }
2033
2
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2034
2
                abort_task.has_value()) {
2035
2
                abort_tasks->emplace_back(std::move(*abort_task));
2036
2
            }
2037
2
        }
2038
2
    }
2039
2
    return 0;
2040
2
}
_ZN5doris5cloud28collect_deferred_abort_tasksINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKSt6vectorISA_SaISA_EEPSD_INS0_24DeferredRecycleAbortTaskESaISI_EEb
Line
Count
Source
2001
3
                                 bool skip_base_version) {
2002
3
    constexpr size_t kAbortCheckBatchSize = 256;
2003
6
    for (size_t offset = 0; offset < keys.size(); offset += kAbortCheckBatchSize) {
2004
3
        size_t limit = std::min(keys.size(), offset + kAbortCheckBatchSize);
2005
3
        std::unique_ptr<Transaction> txn;
2006
3
        TxnErrorCode err = txn_kv->create_txn(&txn);
2007
3
        if (err != TxnErrorCode::TXN_OK) {
2008
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2009
0
            return -1;
2010
0
        }
2011
6
        for (size_t idx = offset; idx < limit; ++idx) {
2012
3
            const std::string& key = keys[idx];
2013
3
            std::string val;
2014
3
            err = txn->get(key, &val);
2015
3
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2016
                // has already been removed
2017
0
                continue;
2018
0
            }
2019
3
            if (err != TxnErrorCode::TXN_OK) {
2020
0
                LOG(WARNING) << "failed to get rowset meta, instance_id=" << instance_id
2021
0
                             << " key=" << hex(key);
2022
0
                return -1;
2023
0
            }
2024
3
            T rowset_meta_pb;
2025
3
            if (!rowset_meta_pb.ParseFromString(val)) {
2026
0
                LOG(WARNING) << "failed to parse rowset meta, instance_id=" << instance_id
2027
0
                             << " key=" << hex(key);
2028
0
                return -1;
2029
0
            }
2030
3
            if (skip_base_version && rowset_meta(rowset_meta_pb).end_version() == 1) {
2031
0
                continue;
2032
0
            }
2033
3
            if (auto abort_task = make_deferred_abort_task(rowset_meta_pb);
2034
3
                abort_task.has_value()) {
2035
3
                abort_tasks->emplace_back(std::move(*abort_task));
2036
3
            }
2037
3
        }
2038
3
    }
2039
3
    return 0;
2040
3
}
2041
2042
template <typename T>
2043
int InstanceRecycler::batch_abort_txn_or_job_for_recycle(const std::vector<std::string>& keys,
2044
5
                                                         bool skip_base_version) {
2045
5
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2046
5
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2047
5
                                        skip_base_version) != 0) {
2048
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2049
0
        return -1;
2050
0
    }
2051
5
    for (const auto& abort_task : abort_tasks) {
2052
5
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2053
5
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2054
5
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2055
5
        int abort_ret = 0;
2056
5
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2057
2
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2058
3
        } else {
2059
3
            RowsetMetaCloudPB rowset_meta;
2060
3
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2061
3
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2062
3
            rowset_meta.set_job_id(abort_task.job_id);
2063
3
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2064
3
        }
2065
5
        if (abort_ret != 0) {
2066
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2067
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2068
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2069
0
            return abort_ret;
2070
0
        }
2071
5
    }
2072
5
    return 0;
2073
5
}
_ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS0_15RecycleRowsetPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb
Line
Count
Source
2044
2
                                                         bool skip_base_version) {
2045
2
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2046
2
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2047
2
                                        skip_base_version) != 0) {
2048
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2049
0
        return -1;
2050
0
    }
2051
2
    for (const auto& abort_task : abort_tasks) {
2052
2
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2053
2
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2054
2
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2055
2
        int abort_ret = 0;
2056
2
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2057
1
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2058
1
        } else {
2059
1
            RowsetMetaCloudPB rowset_meta;
2060
1
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2061
1
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2062
1
            rowset_meta.set_job_id(abort_task.job_id);
2063
1
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2064
1
        }
2065
2
        if (abort_ret != 0) {
2066
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2067
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2068
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2069
0
            return abort_ret;
2070
0
        }
2071
2
    }
2072
2
    return 0;
2073
2
}
_ZN5doris5cloud16InstanceRecycler34batch_abort_txn_or_job_for_recycleINS_17RowsetMetaCloudPBEEEiRKSt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaISA_EEb
Line
Count
Source
2044
3
                                                         bool skip_base_version) {
2045
3
    std::vector<DeferredRecycleAbortTask> abort_tasks;
2046
3
    if (collect_deferred_abort_tasks<T>(txn_kv_.get(), instance_id_, keys, &abort_tasks,
2047
3
                                        skip_base_version) != 0) {
2048
0
        LOG(WARNING) << "failed to collect rowset abort tasks, instance_id=" << instance_id_;
2049
0
        return -1;
2050
0
    }
2051
3
    for (const auto& abort_task : abort_tasks) {
2052
3
        LOG(INFO) << "begin to abort txn or job for related rowset, instance_id=" << instance_id_
2053
3
                  << " tablet_id=" << abort_task.tablet_id << " version=["
2054
3
                  << abort_task.start_version << '-' << abort_task.end_version << "]";
2055
3
        int abort_ret = 0;
2056
3
        if (abort_task.type == DeferredRecycleAbortTask::Type::TXN) {
2057
1
            abort_ret = abort_txn_for_related_rowset(abort_task.txn_id);
2058
2
        } else {
2059
2
            RowsetMetaCloudPB rowset_meta;
2060
2
            rowset_meta.set_tablet_id(abort_task.tablet_id);
2061
2
            rowset_meta.set_rowset_id_v2(abort_task.rowset_id);
2062
2
            rowset_meta.set_job_id(abort_task.job_id);
2063
2
            abort_ret = abort_job_for_related_rowset(rowset_meta);
2064
2
        }
2065
3
        if (abort_ret != 0) {
2066
0
            LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
2067
0
                         << instance_id_ << " tablet_id=" << abort_task.tablet_id << " version=["
2068
0
                         << abort_task.start_version << '-' << abort_task.end_version << "]";
2069
0
            return abort_ret;
2070
0
        }
2071
3
    }
2072
3
    return 0;
2073
3
}
2074
2075
int collect_prepare_delete_tasks(TxnKv* txn_kv, const std::string& instance_id,
2076
                                 const std::vector<std::string>& keys,
2077
23
                                 std::vector<DeferredRecyclePrepareDeleteTask>* delete_tasks) {
2078
23
    constexpr size_t kPrepareCheckBatchSize = 256;
2079
46
    for (size_t offset = 0; offset < keys.size(); offset += kPrepareCheckBatchSize) {
2080
23
        size_t limit = std::min(keys.size(), offset + kPrepareCheckBatchSize);
2081
23
        std::unique_ptr<Transaction> txn;
2082
23
        TxnErrorCode err = txn_kv->create_txn(&txn);
2083
23
        if (err != TxnErrorCode::TXN_OK) {
2084
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
2085
0
            return -1;
2086
0
        }
2087
675
        for (size_t idx = offset; idx < limit; ++idx) {
2088
652
            const std::string& key = keys[idx];
2089
652
            std::string val;
2090
652
            err = txn->get(key, &val);
2091
652
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
2092
                // has already been removed
2093
0
                continue;
2094
0
            }
2095
652
            if (err != TxnErrorCode::TXN_OK) {
2096
0
                LOG(WARNING) << "failed to get recycle rowset, instance_id=" << instance_id
2097
0
                             << " key=" << hex(key);
2098
0
                return -1;
2099
0
            }
2100
652
            RecycleRowsetPB rowset;
2101
652
            if (!rowset.ParseFromString(val)) {
2102
0
                LOG(WARNING) << "failed to parse recycle rowset, instance_id=" << instance_id
2103
0
                             << " key=" << hex(key);
2104
0
                return -1;
2105
0
            }
2106
652
            if (rowset.type() != RecycleRowsetPB::PREPARE) {
2107
0
                continue;
2108
0
            }
2109
652
            const auto& rs_meta = rowset.rowset_meta();
2110
652
            delete_tasks->push_back(
2111
652
                    {key, rs_meta.resource_id(), rs_meta.rowset_id_v2(), rs_meta.tablet_id()});
2112
652
        }
2113
23
    }
2114
23
    return 0;
2115
23
}
2116
2117
1
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
2118
1
    const std::string task_name = "recycle_ref_rowsets";
2119
1
    *has_unrecycled_rowsets = false;
2120
2121
1
    std::string data_rowset_ref_count_key_start =
2122
1
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
2123
1
    std::string data_rowset_ref_count_key_end =
2124
1
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
2125
2126
1
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
2127
2128
1
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2129
1
    register_recycle_task(task_name, start_time);
2130
2131
1
    DORIS_CLOUD_DEFER {
2132
1
        unregister_recycle_task(task_name);
2133
1
        int64_t cost =
2134
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2135
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
2136
1
                .tag("instance_id", instance_id_);
2137
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Line
Count
Source
2131
1
    DORIS_CLOUD_DEFER {
2132
1
        unregister_recycle_task(task_name);
2133
1
        int64_t cost =
2134
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2135
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
2136
1
                .tag("instance_id", instance_id_);
2137
1
    };
2138
2139
    // Phase 1: Scan to collect all tablet_ids that have rowset ref counts
2140
1
    std::set<int64_t> tablets_with_refs;
2141
1
    int64_t num_scanned = 0;
2142
2143
1
    auto scan_func = [&](std::string_view k, std::string_view v) -> int {
2144
0
        ++num_scanned;
2145
0
        int64_t tablet_id;
2146
0
        std::string rowset_id;
2147
0
        std::string_view key(k);
2148
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
2149
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
2150
0
            return 0; // Continue scanning
2151
0
        }
2152
2153
0
        tablets_with_refs.insert(tablet_id);
2154
0
        return 0;
2155
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
2156
2157
1
    if (scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
2158
1
                         std::move(scan_func)) != 0) {
2159
0
        LOG_WARNING("failed to scan data rowset ref count keys");
2160
0
        return -1;
2161
0
    }
2162
2163
1
    LOG_INFO("collected {} tablets with rowset refs, scanned {} ref count keys",
2164
1
             tablets_with_refs.size(), num_scanned)
2165
1
            .tag("instance_id", instance_id_);
2166
2167
    // Phase 2: Recycle each tablet
2168
1
    int64_t num_recycled_tablets = 0;
2169
1
    for (int64_t tablet_id : tablets_with_refs) {
2170
0
        if (stopped()) {
2171
0
            LOG_INFO("recycler stopped, skip remaining tablets")
2172
0
                    .tag("instance_id", instance_id_)
2173
0
                    .tag("tablets_processed", num_recycled_tablets)
2174
0
                    .tag("tablets_remaining", tablets_with_refs.size() - num_recycled_tablets);
2175
0
            break;
2176
0
        }
2177
2178
0
        RecyclerMetricsContext metrics_context(instance_id_, task_name);
2179
0
        if (recycle_versioned_tablet(tablet_id, metrics_context) != 0) {
2180
0
            LOG_WARNING("failed to recycle tablet")
2181
0
                    .tag("instance_id", instance_id_)
2182
0
                    .tag("tablet_id", tablet_id);
2183
0
            return -1;
2184
0
        }
2185
0
        ++num_recycled_tablets;
2186
0
    }
2187
2188
1
    LOG_INFO("recycled {} tablets", num_recycled_tablets)
2189
1
            .tag("instance_id", instance_id_)
2190
1
            .tag("total_tablets", tablets_with_refs.size());
2191
2192
    // Phase 3: Scan again to check if any ref count keys still exist
2193
1
    std::unique_ptr<Transaction> txn;
2194
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
2195
1
    if (err != TxnErrorCode::TXN_OK) {
2196
0
        LOG_WARNING("failed to create txn for final check")
2197
0
                .tag("instance_id", instance_id_)
2198
0
                .tag("err", err);
2199
0
        return -1;
2200
0
    }
2201
2202
1
    std::unique_ptr<RangeGetIterator> iter;
2203
1
    err = txn->get(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end, &iter, true);
2204
1
    if (err != TxnErrorCode::TXN_OK) {
2205
0
        LOG_WARNING("failed to create range iterator for final check")
2206
0
                .tag("instance_id", instance_id_)
2207
0
                .tag("err", err);
2208
0
        return -1;
2209
0
    }
2210
2211
1
    *has_unrecycled_rowsets = iter->has_next();
2212
1
    if (*has_unrecycled_rowsets) {
2213
0
        LOG_INFO("still has unrecycled rowsets after recycle_ref_rowsets")
2214
0
                .tag("instance_id", instance_id_);
2215
0
    }
2216
2217
1
    return 0;
2218
1
}
2219
2220
17
int InstanceRecycler::recycle_indexes() {
2221
17
    const std::string task_name = "recycle_indexes";
2222
17
    int64_t num_scanned = 0;
2223
17
    int64_t num_expired = 0;
2224
17
    int64_t num_recycled = 0;
2225
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2226
2227
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
2228
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
2229
17
    std::string index_key0;
2230
17
    std::string index_key1;
2231
17
    recycle_index_key(index_key_info0, &index_key0);
2232
17
    recycle_index_key(index_key_info1, &index_key1);
2233
2234
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
2235
2236
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2237
17
    register_recycle_task(task_name, start_time);
2238
2239
17
    DORIS_CLOUD_DEFER {
2240
17
        unregister_recycle_task(task_name);
2241
17
        int64_t cost =
2242
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2243
17
        metrics_context.finish_report();
2244
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2245
17
                .tag("instance_id", instance_id_)
2246
17
                .tag("num_scanned", num_scanned)
2247
17
                .tag("num_expired", num_expired)
2248
17
                .tag("num_recycled", num_recycled);
2249
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2239
2
    DORIS_CLOUD_DEFER {
2240
2
        unregister_recycle_task(task_name);
2241
2
        int64_t cost =
2242
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2243
2
        metrics_context.finish_report();
2244
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2245
2
                .tag("instance_id", instance_id_)
2246
2
                .tag("num_scanned", num_scanned)
2247
2
                .tag("num_expired", num_expired)
2248
2
                .tag("num_recycled", num_recycled);
2249
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2239
15
    DORIS_CLOUD_DEFER {
2240
15
        unregister_recycle_task(task_name);
2241
15
        int64_t cost =
2242
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2243
15
        metrics_context.finish_report();
2244
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2245
15
                .tag("instance_id", instance_id_)
2246
15
                .tag("num_scanned", num_scanned)
2247
15
                .tag("num_expired", num_expired)
2248
15
                .tag("num_recycled", num_recycled);
2249
15
    };
2250
2251
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2252
2253
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
2254
17
    std::vector<std::string_view> index_keys;
2255
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2256
10
        ++num_scanned;
2257
10
        RecycleIndexPB index_pb;
2258
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2259
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2260
0
            return -1;
2261
0
        }
2262
10
        int64_t current_time = ::time(nullptr);
2263
10
        if (current_time <
2264
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2265
0
            return 0;
2266
0
        }
2267
10
        ++num_expired;
2268
        // decode index_id
2269
10
        auto k1 = k;
2270
10
        k1.remove_prefix(1);
2271
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2272
10
        decode_key(&k1, &out);
2273
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2274
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2275
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2276
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2277
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2278
        // Change state to RECYCLING
2279
10
        std::unique_ptr<Transaction> txn;
2280
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2281
10
        if (err != TxnErrorCode::TXN_OK) {
2282
0
            LOG_WARNING("failed to create txn").tag("err", err);
2283
0
            return -1;
2284
0
        }
2285
10
        std::string val;
2286
10
        err = txn->get(k, &val);
2287
10
        if (err ==
2288
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2289
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2290
0
            return 0;
2291
0
        }
2292
10
        if (err != TxnErrorCode::TXN_OK) {
2293
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2294
0
            return -1;
2295
0
        }
2296
10
        index_pb.Clear();
2297
10
        if (!index_pb.ParseFromString(val)) {
2298
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2299
0
            return -1;
2300
0
        }
2301
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2302
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2303
9
            txn->put(k, index_pb.SerializeAsString());
2304
9
            err = txn->commit();
2305
9
            if (err != TxnErrorCode::TXN_OK) {
2306
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2307
0
                return -1;
2308
0
            }
2309
9
        }
2310
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2311
1
            LOG_WARNING("failed to recycle tablets under index")
2312
1
                    .tag("table_id", index_pb.table_id())
2313
1
                    .tag("instance_id", instance_id_)
2314
1
                    .tag("index_id", index_id);
2315
1
            return -1;
2316
1
        }
2317
2318
9
        if (index_pb.has_db_id()) {
2319
            // Recycle the versioned keys
2320
3
            std::unique_ptr<Transaction> txn;
2321
3
            err = txn_kv_->create_txn(&txn);
2322
3
            if (err != TxnErrorCode::TXN_OK) {
2323
0
                LOG_WARNING("failed to create txn").tag("err", err);
2324
0
                return -1;
2325
0
            }
2326
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2327
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2328
3
            std::string index_inverted_key = versioned::index_inverted_key(
2329
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2330
3
            versioned_remove_all(txn.get(), meta_key);
2331
3
            txn->remove(index_key);
2332
3
            txn->remove(index_inverted_key);
2333
3
            err = txn->commit();
2334
3
            if (err != TxnErrorCode::TXN_OK) {
2335
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2336
0
                return -1;
2337
0
            }
2338
3
        }
2339
2340
9
        metrics_context.total_recycled_num = ++num_recycled;
2341
9
        metrics_context.report();
2342
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2343
9
        index_keys.push_back(k);
2344
9
        return 0;
2345
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2255
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2256
2
        ++num_scanned;
2257
2
        RecycleIndexPB index_pb;
2258
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2259
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2260
0
            return -1;
2261
0
        }
2262
2
        int64_t current_time = ::time(nullptr);
2263
2
        if (current_time <
2264
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2265
0
            return 0;
2266
0
        }
2267
2
        ++num_expired;
2268
        // decode index_id
2269
2
        auto k1 = k;
2270
2
        k1.remove_prefix(1);
2271
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2272
2
        decode_key(&k1, &out);
2273
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2274
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2275
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2276
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2277
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2278
        // Change state to RECYCLING
2279
2
        std::unique_ptr<Transaction> txn;
2280
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2281
2
        if (err != TxnErrorCode::TXN_OK) {
2282
0
            LOG_WARNING("failed to create txn").tag("err", err);
2283
0
            return -1;
2284
0
        }
2285
2
        std::string val;
2286
2
        err = txn->get(k, &val);
2287
2
        if (err ==
2288
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2289
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2290
0
            return 0;
2291
0
        }
2292
2
        if (err != TxnErrorCode::TXN_OK) {
2293
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2294
0
            return -1;
2295
0
        }
2296
2
        index_pb.Clear();
2297
2
        if (!index_pb.ParseFromString(val)) {
2298
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2299
0
            return -1;
2300
0
        }
2301
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2302
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2303
1
            txn->put(k, index_pb.SerializeAsString());
2304
1
            err = txn->commit();
2305
1
            if (err != TxnErrorCode::TXN_OK) {
2306
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2307
0
                return -1;
2308
0
            }
2309
1
        }
2310
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2311
1
            LOG_WARNING("failed to recycle tablets under index")
2312
1
                    .tag("table_id", index_pb.table_id())
2313
1
                    .tag("instance_id", instance_id_)
2314
1
                    .tag("index_id", index_id);
2315
1
            return -1;
2316
1
        }
2317
2318
1
        if (index_pb.has_db_id()) {
2319
            // Recycle the versioned keys
2320
1
            std::unique_ptr<Transaction> txn;
2321
1
            err = txn_kv_->create_txn(&txn);
2322
1
            if (err != TxnErrorCode::TXN_OK) {
2323
0
                LOG_WARNING("failed to create txn").tag("err", err);
2324
0
                return -1;
2325
0
            }
2326
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2327
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2328
1
            std::string index_inverted_key = versioned::index_inverted_key(
2329
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2330
1
            versioned_remove_all(txn.get(), meta_key);
2331
1
            txn->remove(index_key);
2332
1
            txn->remove(index_inverted_key);
2333
1
            err = txn->commit();
2334
1
            if (err != TxnErrorCode::TXN_OK) {
2335
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2336
0
                return -1;
2337
0
            }
2338
1
        }
2339
2340
1
        metrics_context.total_recycled_num = ++num_recycled;
2341
1
        metrics_context.report();
2342
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2343
1
        index_keys.push_back(k);
2344
1
        return 0;
2345
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2255
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2256
8
        ++num_scanned;
2257
8
        RecycleIndexPB index_pb;
2258
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2259
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2260
0
            return -1;
2261
0
        }
2262
8
        int64_t current_time = ::time(nullptr);
2263
8
        if (current_time <
2264
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2265
0
            return 0;
2266
0
        }
2267
8
        ++num_expired;
2268
        // decode index_id
2269
8
        auto k1 = k;
2270
8
        k1.remove_prefix(1);
2271
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2272
8
        decode_key(&k1, &out);
2273
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2274
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2275
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2276
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2277
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2278
        // Change state to RECYCLING
2279
8
        std::unique_ptr<Transaction> txn;
2280
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2281
8
        if (err != TxnErrorCode::TXN_OK) {
2282
0
            LOG_WARNING("failed to create txn").tag("err", err);
2283
0
            return -1;
2284
0
        }
2285
8
        std::string val;
2286
8
        err = txn->get(k, &val);
2287
8
        if (err ==
2288
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2289
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2290
0
            return 0;
2291
0
        }
2292
8
        if (err != TxnErrorCode::TXN_OK) {
2293
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2294
0
            return -1;
2295
0
        }
2296
8
        index_pb.Clear();
2297
8
        if (!index_pb.ParseFromString(val)) {
2298
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2299
0
            return -1;
2300
0
        }
2301
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2302
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2303
8
            txn->put(k, index_pb.SerializeAsString());
2304
8
            err = txn->commit();
2305
8
            if (err != TxnErrorCode::TXN_OK) {
2306
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2307
0
                return -1;
2308
0
            }
2309
8
        }
2310
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2311
0
            LOG_WARNING("failed to recycle tablets under index")
2312
0
                    .tag("table_id", index_pb.table_id())
2313
0
                    .tag("instance_id", instance_id_)
2314
0
                    .tag("index_id", index_id);
2315
0
            return -1;
2316
0
        }
2317
2318
8
        if (index_pb.has_db_id()) {
2319
            // Recycle the versioned keys
2320
2
            std::unique_ptr<Transaction> txn;
2321
2
            err = txn_kv_->create_txn(&txn);
2322
2
            if (err != TxnErrorCode::TXN_OK) {
2323
0
                LOG_WARNING("failed to create txn").tag("err", err);
2324
0
                return -1;
2325
0
            }
2326
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2327
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2328
2
            std::string index_inverted_key = versioned::index_inverted_key(
2329
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2330
2
            versioned_remove_all(txn.get(), meta_key);
2331
2
            txn->remove(index_key);
2332
2
            txn->remove(index_inverted_key);
2333
2
            err = txn->commit();
2334
2
            if (err != TxnErrorCode::TXN_OK) {
2335
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2336
0
                return -1;
2337
0
            }
2338
2
        }
2339
2340
8
        metrics_context.total_recycled_num = ++num_recycled;
2341
8
        metrics_context.report();
2342
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2343
8
        index_keys.push_back(k);
2344
8
        return 0;
2345
8
    };
2346
2347
17
    auto loop_done = [&index_keys, this]() -> int {
2348
6
        if (index_keys.empty()) return 0;
2349
5
        DORIS_CLOUD_DEFER {
2350
5
            index_keys.clear();
2351
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2349
1
        DORIS_CLOUD_DEFER {
2350
1
            index_keys.clear();
2351
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2349
4
        DORIS_CLOUD_DEFER {
2350
4
            index_keys.clear();
2351
4
        };
2352
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2353
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2354
0
            return -1;
2355
0
        }
2356
5
        return 0;
2357
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2347
2
    auto loop_done = [&index_keys, this]() -> int {
2348
2
        if (index_keys.empty()) return 0;
2349
1
        DORIS_CLOUD_DEFER {
2350
1
            index_keys.clear();
2351
1
        };
2352
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2353
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2354
0
            return -1;
2355
0
        }
2356
1
        return 0;
2357
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2347
4
    auto loop_done = [&index_keys, this]() -> int {
2348
4
        if (index_keys.empty()) return 0;
2349
4
        DORIS_CLOUD_DEFER {
2350
4
            index_keys.clear();
2351
4
        };
2352
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2353
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2354
0
            return -1;
2355
0
        }
2356
4
        return 0;
2357
4
    };
2358
2359
17
    if (config::enable_recycler_stats_metrics) {
2360
0
        scan_and_statistics_indexes();
2361
0
    }
2362
    // recycle_func and loop_done for scan and recycle
2363
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
2364
17
}
2365
2366
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
2367
8.24k
                             int64_t tablet_id) {
2368
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
2369
2370
8.24k
    std::unique_ptr<Transaction> txn;
2371
8.24k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2372
8.24k
    if (err != TxnErrorCode::TXN_OK) {
2373
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
2374
0
                     << " tablet_id=" << tablet_id << " err=" << err;
2375
0
        return false;
2376
0
    }
2377
2378
8.24k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
2379
8.24k
    std::string tablet_idx_val;
2380
8.24k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
2381
8.24k
    if (TxnErrorCode::TXN_OK != err) {
2382
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
2383
0
                     << " tablet_id=" << tablet_id << " err=" << err
2384
0
                     << " key=" << hex(tablet_idx_key);
2385
0
        return false;
2386
0
    }
2387
2388
8.24k
    TabletIndexPB tablet_idx_pb;
2389
8.24k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
2390
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
2391
0
                     << " tablet_id=" << tablet_id;
2392
0
        return false;
2393
0
    }
2394
2395
8.24k
    if (!tablet_idx_pb.has_db_id()) {
2396
        // In the previous version, the db_id was not set in the index_pb.
2397
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2398
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
2399
0
                  << " instance_id=" << instance_id
2400
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
2401
0
        return true;
2402
0
    }
2403
2404
8.24k
    std::string ver_val;
2405
8.24k
    std::string ver_key =
2406
8.24k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
2407
8.24k
                                   tablet_idx_pb.partition_id()});
2408
8.24k
    err = txn->get(ver_key, &ver_val);
2409
2410
8.24k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2411
204
        LOG(INFO) << ""
2412
204
                     "partition version not found, instance_id="
2413
204
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
2414
204
                  << " table_id=" << tablet_idx_pb.table_id()
2415
204
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
2416
204
                  << " key=" << hex(ver_key);
2417
204
        return true;
2418
204
    }
2419
2420
8.03k
    if (TxnErrorCode::TXN_OK != err) {
2421
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
2422
0
                     << " db_id=" << tablet_idx_pb.db_id()
2423
0
                     << " table_id=" << tablet_idx_pb.table_id()
2424
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2425
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
2426
0
        return false;
2427
0
    }
2428
2429
8.03k
    VersionPB version_pb;
2430
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
2431
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
2432
0
                     << " db_id=" << tablet_idx_pb.db_id()
2433
0
                     << " table_id=" << tablet_idx_pb.table_id()
2434
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2435
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
2436
0
        return false;
2437
0
    }
2438
2439
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
2440
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
2441
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
2442
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
2443
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
2444
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
2445
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
2446
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
2447
4.00k
                     << " key=" << hex(ver_key);
2448
4.00k
        return false;
2449
4.00k
    }
2450
4.03k
    return true;
2451
8.03k
}
2452
2453
15
int InstanceRecycler::recycle_partitions() {
2454
15
    const std::string task_name = "recycle_partitions";
2455
15
    int64_t num_scanned = 0;
2456
15
    int64_t num_expired = 0;
2457
15
    int64_t num_recycled = 0;
2458
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2459
2460
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
2461
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
2462
15
    std::string part_key0;
2463
15
    std::string part_key1;
2464
15
    recycle_partition_key(part_key_info0, &part_key0);
2465
15
    recycle_partition_key(part_key_info1, &part_key1);
2466
2467
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
2468
2469
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2470
15
    register_recycle_task(task_name, start_time);
2471
2472
15
    DORIS_CLOUD_DEFER {
2473
15
        unregister_recycle_task(task_name);
2474
15
        int64_t cost =
2475
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2476
15
        metrics_context.finish_report();
2477
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2478
15
                .tag("instance_id", instance_id_)
2479
15
                .tag("num_scanned", num_scanned)
2480
15
                .tag("num_expired", num_expired)
2481
15
                .tag("num_recycled", num_recycled);
2482
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2472
2
    DORIS_CLOUD_DEFER {
2473
2
        unregister_recycle_task(task_name);
2474
2
        int64_t cost =
2475
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2476
2
        metrics_context.finish_report();
2477
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2478
2
                .tag("instance_id", instance_id_)
2479
2
                .tag("num_scanned", num_scanned)
2480
2
                .tag("num_expired", num_expired)
2481
2
                .tag("num_recycled", num_recycled);
2482
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2472
13
    DORIS_CLOUD_DEFER {
2473
13
        unregister_recycle_task(task_name);
2474
13
        int64_t cost =
2475
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2476
13
        metrics_context.finish_report();
2477
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2478
13
                .tag("instance_id", instance_id_)
2479
13
                .tag("num_scanned", num_scanned)
2480
13
                .tag("num_expired", num_expired)
2481
13
                .tag("num_recycled", num_recycled);
2482
13
    };
2483
2484
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2485
2486
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
2487
15
    std::vector<std::string_view> partition_keys;
2488
15
    std::vector<std::string> partition_version_keys;
2489
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2490
9
        ++num_scanned;
2491
9
        RecyclePartitionPB part_pb;
2492
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2493
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2494
0
            return -1;
2495
0
        }
2496
9
        int64_t current_time = ::time(nullptr);
2497
9
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2498
9
                                                            &earlest_ts)) { // not expired
2499
0
            return 0;
2500
0
        }
2501
9
        ++num_expired;
2502
        // decode partition_id
2503
9
        auto k1 = k;
2504
9
        k1.remove_prefix(1);
2505
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2506
9
        decode_key(&k1, &out);
2507
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2508
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2509
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2510
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2511
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2512
        // Change state to RECYCLING
2513
9
        std::unique_ptr<Transaction> txn;
2514
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2515
9
        if (err != TxnErrorCode::TXN_OK) {
2516
0
            LOG_WARNING("failed to create txn").tag("err", err);
2517
0
            return -1;
2518
0
        }
2519
9
        std::string val;
2520
9
        err = txn->get(k, &val);
2521
9
        if (err ==
2522
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2523
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2524
0
            return 0;
2525
0
        }
2526
9
        if (err != TxnErrorCode::TXN_OK) {
2527
0
            LOG_WARNING("failed to get kv");
2528
0
            return -1;
2529
0
        }
2530
9
        part_pb.Clear();
2531
9
        if (!part_pb.ParseFromString(val)) {
2532
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2533
0
            return -1;
2534
0
        }
2535
        // Partitions with PREPARED state MUST have no data
2536
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2537
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2538
8
            txn->put(k, part_pb.SerializeAsString());
2539
8
            err = txn->commit();
2540
8
            if (err != TxnErrorCode::TXN_OK) {
2541
0
                LOG_WARNING("failed to commit txn: {}", err);
2542
0
                return -1;
2543
0
            }
2544
8
        }
2545
2546
9
        int ret = 0;
2547
33
        for (int64_t index_id : part_pb.index_id()) {
2548
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2549
1
                LOG_WARNING("failed to recycle tablets under partition")
2550
1
                        .tag("table_id", part_pb.table_id())
2551
1
                        .tag("instance_id", instance_id_)
2552
1
                        .tag("index_id", index_id)
2553
1
                        .tag("partition_id", partition_id);
2554
1
                ret = -1;
2555
1
            }
2556
33
        }
2557
9
        if (ret == 0 && part_pb.has_db_id()) {
2558
            // Recycle the versioned keys
2559
8
            std::unique_ptr<Transaction> txn;
2560
8
            err = txn_kv_->create_txn(&txn);
2561
8
            if (err != TxnErrorCode::TXN_OK) {
2562
0
                LOG_WARNING("failed to create txn").tag("err", err);
2563
0
                return -1;
2564
0
            }
2565
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2566
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2567
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2568
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2569
8
            std::string partition_version_key =
2570
8
                    versioned::partition_version_key({instance_id_, partition_id});
2571
8
            versioned_remove_all(txn.get(), meta_key);
2572
8
            txn->remove(index_key);
2573
8
            txn->remove(inverted_index_key);
2574
8
            versioned_remove_all(txn.get(), partition_version_key);
2575
8
            err = txn->commit();
2576
8
            if (err != TxnErrorCode::TXN_OK) {
2577
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2578
0
                return -1;
2579
0
            }
2580
8
        }
2581
2582
9
        if (ret == 0) {
2583
8
            ++num_recycled;
2584
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2585
8
            partition_keys.push_back(k);
2586
8
            if (part_pb.db_id() > 0) {
2587
8
                partition_version_keys.push_back(partition_version_key(
2588
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2589
8
            }
2590
8
            metrics_context.total_recycled_num = num_recycled;
2591
8
            metrics_context.report();
2592
8
        }
2593
9
        return ret;
2594
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2489
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2490
2
        ++num_scanned;
2491
2
        RecyclePartitionPB part_pb;
2492
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2493
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2494
0
            return -1;
2495
0
        }
2496
2
        int64_t current_time = ::time(nullptr);
2497
2
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2498
2
                                                            &earlest_ts)) { // not expired
2499
0
            return 0;
2500
0
        }
2501
2
        ++num_expired;
2502
        // decode partition_id
2503
2
        auto k1 = k;
2504
2
        k1.remove_prefix(1);
2505
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2506
2
        decode_key(&k1, &out);
2507
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2508
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2509
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2510
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2511
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2512
        // Change state to RECYCLING
2513
2
        std::unique_ptr<Transaction> txn;
2514
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2515
2
        if (err != TxnErrorCode::TXN_OK) {
2516
0
            LOG_WARNING("failed to create txn").tag("err", err);
2517
0
            return -1;
2518
0
        }
2519
2
        std::string val;
2520
2
        err = txn->get(k, &val);
2521
2
        if (err ==
2522
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2523
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2524
0
            return 0;
2525
0
        }
2526
2
        if (err != TxnErrorCode::TXN_OK) {
2527
0
            LOG_WARNING("failed to get kv");
2528
0
            return -1;
2529
0
        }
2530
2
        part_pb.Clear();
2531
2
        if (!part_pb.ParseFromString(val)) {
2532
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2533
0
            return -1;
2534
0
        }
2535
        // Partitions with PREPARED state MUST have no data
2536
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2537
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2538
1
            txn->put(k, part_pb.SerializeAsString());
2539
1
            err = txn->commit();
2540
1
            if (err != TxnErrorCode::TXN_OK) {
2541
0
                LOG_WARNING("failed to commit txn: {}", err);
2542
0
                return -1;
2543
0
            }
2544
1
        }
2545
2546
2
        int ret = 0;
2547
2
        for (int64_t index_id : part_pb.index_id()) {
2548
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2549
1
                LOG_WARNING("failed to recycle tablets under partition")
2550
1
                        .tag("table_id", part_pb.table_id())
2551
1
                        .tag("instance_id", instance_id_)
2552
1
                        .tag("index_id", index_id)
2553
1
                        .tag("partition_id", partition_id);
2554
1
                ret = -1;
2555
1
            }
2556
2
        }
2557
2
        if (ret == 0 && part_pb.has_db_id()) {
2558
            // Recycle the versioned keys
2559
1
            std::unique_ptr<Transaction> txn;
2560
1
            err = txn_kv_->create_txn(&txn);
2561
1
            if (err != TxnErrorCode::TXN_OK) {
2562
0
                LOG_WARNING("failed to create txn").tag("err", err);
2563
0
                return -1;
2564
0
            }
2565
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2566
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2567
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2568
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2569
1
            std::string partition_version_key =
2570
1
                    versioned::partition_version_key({instance_id_, partition_id});
2571
1
            versioned_remove_all(txn.get(), meta_key);
2572
1
            txn->remove(index_key);
2573
1
            txn->remove(inverted_index_key);
2574
1
            versioned_remove_all(txn.get(), partition_version_key);
2575
1
            err = txn->commit();
2576
1
            if (err != TxnErrorCode::TXN_OK) {
2577
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2578
0
                return -1;
2579
0
            }
2580
1
        }
2581
2582
2
        if (ret == 0) {
2583
1
            ++num_recycled;
2584
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2585
1
            partition_keys.push_back(k);
2586
1
            if (part_pb.db_id() > 0) {
2587
1
                partition_version_keys.push_back(partition_version_key(
2588
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2589
1
            }
2590
1
            metrics_context.total_recycled_num = num_recycled;
2591
1
            metrics_context.report();
2592
1
        }
2593
2
        return ret;
2594
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2489
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2490
7
        ++num_scanned;
2491
7
        RecyclePartitionPB part_pb;
2492
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2493
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2494
0
            return -1;
2495
0
        }
2496
7
        int64_t current_time = ::time(nullptr);
2497
7
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2498
7
                                                            &earlest_ts)) { // not expired
2499
0
            return 0;
2500
0
        }
2501
7
        ++num_expired;
2502
        // decode partition_id
2503
7
        auto k1 = k;
2504
7
        k1.remove_prefix(1);
2505
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2506
7
        decode_key(&k1, &out);
2507
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2508
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2509
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2510
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2511
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2512
        // Change state to RECYCLING
2513
7
        std::unique_ptr<Transaction> txn;
2514
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2515
7
        if (err != TxnErrorCode::TXN_OK) {
2516
0
            LOG_WARNING("failed to create txn").tag("err", err);
2517
0
            return -1;
2518
0
        }
2519
7
        std::string val;
2520
7
        err = txn->get(k, &val);
2521
7
        if (err ==
2522
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2523
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2524
0
            return 0;
2525
0
        }
2526
7
        if (err != TxnErrorCode::TXN_OK) {
2527
0
            LOG_WARNING("failed to get kv");
2528
0
            return -1;
2529
0
        }
2530
7
        part_pb.Clear();
2531
7
        if (!part_pb.ParseFromString(val)) {
2532
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2533
0
            return -1;
2534
0
        }
2535
        // Partitions with PREPARED state MUST have no data
2536
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2537
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2538
7
            txn->put(k, part_pb.SerializeAsString());
2539
7
            err = txn->commit();
2540
7
            if (err != TxnErrorCode::TXN_OK) {
2541
0
                LOG_WARNING("failed to commit txn: {}", err);
2542
0
                return -1;
2543
0
            }
2544
7
        }
2545
2546
7
        int ret = 0;
2547
31
        for (int64_t index_id : part_pb.index_id()) {
2548
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2549
0
                LOG_WARNING("failed to recycle tablets under partition")
2550
0
                        .tag("table_id", part_pb.table_id())
2551
0
                        .tag("instance_id", instance_id_)
2552
0
                        .tag("index_id", index_id)
2553
0
                        .tag("partition_id", partition_id);
2554
0
                ret = -1;
2555
0
            }
2556
31
        }
2557
7
        if (ret == 0 && part_pb.has_db_id()) {
2558
            // Recycle the versioned keys
2559
7
            std::unique_ptr<Transaction> txn;
2560
7
            err = txn_kv_->create_txn(&txn);
2561
7
            if (err != TxnErrorCode::TXN_OK) {
2562
0
                LOG_WARNING("failed to create txn").tag("err", err);
2563
0
                return -1;
2564
0
            }
2565
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2566
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2567
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2568
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2569
7
            std::string partition_version_key =
2570
7
                    versioned::partition_version_key({instance_id_, partition_id});
2571
7
            versioned_remove_all(txn.get(), meta_key);
2572
7
            txn->remove(index_key);
2573
7
            txn->remove(inverted_index_key);
2574
7
            versioned_remove_all(txn.get(), partition_version_key);
2575
7
            err = txn->commit();
2576
7
            if (err != TxnErrorCode::TXN_OK) {
2577
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2578
0
                return -1;
2579
0
            }
2580
7
        }
2581
2582
7
        if (ret == 0) {
2583
7
            ++num_recycled;
2584
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2585
7
            partition_keys.push_back(k);
2586
7
            if (part_pb.db_id() > 0) {
2587
7
                partition_version_keys.push_back(partition_version_key(
2588
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2589
7
            }
2590
7
            metrics_context.total_recycled_num = num_recycled;
2591
7
            metrics_context.report();
2592
7
        }
2593
7
        return ret;
2594
7
    };
2595
2596
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2597
5
        if (partition_keys.empty()) return 0;
2598
4
        DORIS_CLOUD_DEFER {
2599
4
            partition_keys.clear();
2600
4
            partition_version_keys.clear();
2601
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2598
1
        DORIS_CLOUD_DEFER {
2599
1
            partition_keys.clear();
2600
1
            partition_version_keys.clear();
2601
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2598
3
        DORIS_CLOUD_DEFER {
2599
3
            partition_keys.clear();
2600
3
            partition_version_keys.clear();
2601
3
        };
2602
4
        std::unique_ptr<Transaction> txn;
2603
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2604
4
        if (err != TxnErrorCode::TXN_OK) {
2605
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2606
0
            return -1;
2607
0
        }
2608
8
        for (auto& k : partition_keys) {
2609
8
            txn->remove(k);
2610
8
        }
2611
8
        for (auto& k : partition_version_keys) {
2612
8
            txn->remove(k);
2613
8
        }
2614
4
        err = txn->commit();
2615
4
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2617
0
                         << " err=" << err;
2618
0
            return -1;
2619
0
        }
2620
4
        return 0;
2621
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2596
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2597
2
        if (partition_keys.empty()) return 0;
2598
1
        DORIS_CLOUD_DEFER {
2599
1
            partition_keys.clear();
2600
1
            partition_version_keys.clear();
2601
1
        };
2602
1
        std::unique_ptr<Transaction> txn;
2603
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2604
1
        if (err != TxnErrorCode::TXN_OK) {
2605
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2606
0
            return -1;
2607
0
        }
2608
1
        for (auto& k : partition_keys) {
2609
1
            txn->remove(k);
2610
1
        }
2611
1
        for (auto& k : partition_version_keys) {
2612
1
            txn->remove(k);
2613
1
        }
2614
1
        err = txn->commit();
2615
1
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2617
0
                         << " err=" << err;
2618
0
            return -1;
2619
0
        }
2620
1
        return 0;
2621
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2596
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2597
3
        if (partition_keys.empty()) return 0;
2598
3
        DORIS_CLOUD_DEFER {
2599
3
            partition_keys.clear();
2600
3
            partition_version_keys.clear();
2601
3
        };
2602
3
        std::unique_ptr<Transaction> txn;
2603
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2604
3
        if (err != TxnErrorCode::TXN_OK) {
2605
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2606
0
            return -1;
2607
0
        }
2608
7
        for (auto& k : partition_keys) {
2609
7
            txn->remove(k);
2610
7
        }
2611
7
        for (auto& k : partition_version_keys) {
2612
7
            txn->remove(k);
2613
7
        }
2614
3
        err = txn->commit();
2615
3
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2617
0
                         << " err=" << err;
2618
0
            return -1;
2619
0
        }
2620
3
        return 0;
2621
3
    };
2622
2623
15
    if (config::enable_recycler_stats_metrics) {
2624
0
        scan_and_statistics_partitions();
2625
0
    }
2626
    // recycle_func and loop_done for scan and recycle
2627
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2628
15
}
2629
2630
14
int InstanceRecycler::recycle_versions() {
2631
14
    if (should_recycle_versioned_keys()) {
2632
2
        return recycle_orphan_partitions();
2633
2
    }
2634
2635
12
    int64_t num_scanned = 0;
2636
12
    int64_t num_recycled = 0;
2637
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2638
2639
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2640
2641
12
    auto start_time = steady_clock::now();
2642
2643
12
    DORIS_CLOUD_DEFER {
2644
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2645
12
        metrics_context.finish_report();
2646
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2647
12
                .tag("instance_id", instance_id_)
2648
12
                .tag("num_scanned", num_scanned)
2649
12
                .tag("num_recycled", num_recycled);
2650
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2643
12
    DORIS_CLOUD_DEFER {
2644
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2645
12
        metrics_context.finish_report();
2646
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2647
12
                .tag("instance_id", instance_id_)
2648
12
                .tag("num_scanned", num_scanned)
2649
12
                .tag("num_recycled", num_recycled);
2650
12
    };
2651
2652
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2653
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2654
12
    int64_t last_scanned_table_id = 0;
2655
12
    bool is_recycled = false; // Is last scanned kv recycled
2656
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2657
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2658
2
        ++num_scanned;
2659
2
        auto k1 = k;
2660
2
        k1.remove_prefix(1);
2661
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2662
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2663
2
        decode_key(&k1, &out);
2664
2
        DCHECK_EQ(out.size(), 6) << k;
2665
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2666
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2667
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2668
0
            return 0;
2669
0
        }
2670
2
        last_scanned_table_id = table_id;
2671
2
        is_recycled = false;
2672
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2673
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2674
2
        std::unique_ptr<Transaction> txn;
2675
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2676
2
        if (err != TxnErrorCode::TXN_OK) {
2677
0
            return -1;
2678
0
        }
2679
2
        std::unique_ptr<RangeGetIterator> iter;
2680
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2681
2
        if (err != TxnErrorCode::TXN_OK) {
2682
0
            return -1;
2683
0
        }
2684
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2685
1
            return 0;
2686
1
        }
2687
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2688
        // 1. Remove all partition version kvs of this table
2689
1
        auto partition_version_key_begin =
2690
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2691
1
        auto partition_version_key_end =
2692
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2693
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2694
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2695
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2696
1
                     << " table_id=" << table_id;
2697
        // 2. Remove the table version kv of this table
2698
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2699
1
        txn->remove(tbl_version_key);
2700
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2701
        // 3. Remove mow delete bitmap update lock and tablet job lock
2702
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2703
1
        txn->remove(lock_key);
2704
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2705
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2706
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2707
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2708
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2709
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2710
1
                     << " table_id=" << table_id;
2711
1
        err = txn->commit();
2712
1
        if (err != TxnErrorCode::TXN_OK) {
2713
0
            return -1;
2714
0
        }
2715
1
        metrics_context.total_recycled_num = ++num_recycled;
2716
1
        metrics_context.report();
2717
1
        is_recycled = true;
2718
1
        return 0;
2719
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2657
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2658
2
        ++num_scanned;
2659
2
        auto k1 = k;
2660
2
        k1.remove_prefix(1);
2661
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2662
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2663
2
        decode_key(&k1, &out);
2664
2
        DCHECK_EQ(out.size(), 6) << k;
2665
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2666
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2667
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2668
0
            return 0;
2669
0
        }
2670
2
        last_scanned_table_id = table_id;
2671
2
        is_recycled = false;
2672
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2673
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2674
2
        std::unique_ptr<Transaction> txn;
2675
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2676
2
        if (err != TxnErrorCode::TXN_OK) {
2677
0
            return -1;
2678
0
        }
2679
2
        std::unique_ptr<RangeGetIterator> iter;
2680
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2681
2
        if (err != TxnErrorCode::TXN_OK) {
2682
0
            return -1;
2683
0
        }
2684
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2685
1
            return 0;
2686
1
        }
2687
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2688
        // 1. Remove all partition version kvs of this table
2689
1
        auto partition_version_key_begin =
2690
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2691
1
        auto partition_version_key_end =
2692
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2693
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2694
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2695
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2696
1
                     << " table_id=" << table_id;
2697
        // 2. Remove the table version kv of this table
2698
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2699
1
        txn->remove(tbl_version_key);
2700
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2701
        // 3. Remove mow delete bitmap update lock and tablet job lock
2702
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2703
1
        txn->remove(lock_key);
2704
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2705
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2706
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2707
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2708
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2709
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2710
1
                     << " table_id=" << table_id;
2711
1
        err = txn->commit();
2712
1
        if (err != TxnErrorCode::TXN_OK) {
2713
0
            return -1;
2714
0
        }
2715
1
        metrics_context.total_recycled_num = ++num_recycled;
2716
1
        metrics_context.report();
2717
1
        is_recycled = true;
2718
1
        return 0;
2719
1
    };
2720
2721
12
    if (config::enable_recycler_stats_metrics) {
2722
0
        scan_and_statistics_versions();
2723
0
    }
2724
    // recycle_func and loop_done for scan and recycle
2725
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2726
14
}
2727
2728
3
int InstanceRecycler::recycle_orphan_partitions() {
2729
3
    int64_t num_scanned = 0;
2730
3
    int64_t num_recycled = 0;
2731
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2732
2733
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2734
3
            .tag("instance_id", instance_id_);
2735
2736
3
    auto start_time = steady_clock::now();
2737
2738
3
    DORIS_CLOUD_DEFER {
2739
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2740
3
        metrics_context.finish_report();
2741
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2742
3
                .tag("instance_id", instance_id_)
2743
3
                .tag("num_scanned", num_scanned)
2744
3
                .tag("num_recycled", num_recycled);
2745
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2738
3
    DORIS_CLOUD_DEFER {
2739
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2740
3
        metrics_context.finish_report();
2741
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2742
3
                .tag("instance_id", instance_id_)
2743
3
                .tag("num_scanned", num_scanned)
2744
3
                .tag("num_recycled", num_recycled);
2745
3
    };
2746
2747
3
    bool is_empty_table = false;        // whether the table has no indexes
2748
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2749
3
    int64_t current_table_id = 0;       // current scanning table id
2750
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2751
3
                         &current_table_id, &is_table_kvs_recycled,
2752
3
                         this](std::string_view k, std::string_view) {
2753
2
        ++num_scanned;
2754
2755
2
        std::string_view k1(k);
2756
2
        int64_t db_id, table_id, partition_id;
2757
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2758
2
                                                            &partition_id)) {
2759
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2760
0
            return -1;
2761
2
        } else if (table_id != current_table_id) {
2762
2
            current_table_id = table_id;
2763
2
            is_table_kvs_recycled = false;
2764
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2765
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2766
2
            if (err != TxnErrorCode::TXN_OK) {
2767
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2768
0
                             << " table_id=" << table_id << " err=" << err;
2769
0
                return -1;
2770
0
            }
2771
2
        }
2772
2773
2
        if (!is_empty_table) {
2774
            // table is not empty, skip recycle
2775
1
            return 0;
2776
1
        }
2777
2778
1
        std::unique_ptr<Transaction> txn;
2779
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2780
1
        if (err != TxnErrorCode::TXN_OK) {
2781
0
            return -1;
2782
0
        }
2783
2784
        // 1. Remove all partition related kvs
2785
1
        std::string partition_meta_key =
2786
1
                versioned::meta_partition_key({instance_id_, partition_id});
2787
1
        std::string partition_index_key =
2788
1
                versioned::partition_index_key({instance_id_, partition_id});
2789
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2790
1
                {instance_id_, db_id, table_id, partition_id});
2791
1
        std::string partition_version_key =
2792
1
                versioned::partition_version_key({instance_id_, partition_id});
2793
1
        txn->remove(partition_index_key);
2794
1
        txn->remove(partition_inverted_key);
2795
1
        versioned_remove_all(txn.get(), partition_meta_key);
2796
1
        versioned_remove_all(txn.get(), partition_version_key);
2797
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2798
1
                     << " table_id=" << table_id << " db_id=" << db_id
2799
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2800
1
                     << " partition_version_key=" << hex(partition_version_key);
2801
2802
1
        if (!is_table_kvs_recycled) {
2803
1
            is_table_kvs_recycled = true;
2804
2805
            // 2. Remove the table version kv of this table
2806
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2807
1
            versioned_remove_all(txn.get(), table_version_key);
2808
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2809
            // 3. Remove mow delete bitmap update lock and tablet job lock
2810
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2811
1
            txn->remove(lock_key);
2812
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2813
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2814
1
            std::string tablet_job_key_end =
2815
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2816
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2817
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2818
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2819
1
                         << " table_id=" << table_id;
2820
1
        }
2821
2822
1
        err = txn->commit();
2823
1
        if (err != TxnErrorCode::TXN_OK) {
2824
0
            return -1;
2825
0
        }
2826
1
        metrics_context.total_recycled_num = ++num_recycled;
2827
1
        metrics_context.report();
2828
1
        return 0;
2829
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2752
2
                         this](std::string_view k, std::string_view) {
2753
2
        ++num_scanned;
2754
2755
2
        std::string_view k1(k);
2756
2
        int64_t db_id, table_id, partition_id;
2757
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2758
2
                                                            &partition_id)) {
2759
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2760
0
            return -1;
2761
2
        } else if (table_id != current_table_id) {
2762
2
            current_table_id = table_id;
2763
2
            is_table_kvs_recycled = false;
2764
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2765
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2766
2
            if (err != TxnErrorCode::TXN_OK) {
2767
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2768
0
                             << " table_id=" << table_id << " err=" << err;
2769
0
                return -1;
2770
0
            }
2771
2
        }
2772
2773
2
        if (!is_empty_table) {
2774
            // table is not empty, skip recycle
2775
1
            return 0;
2776
1
        }
2777
2778
1
        std::unique_ptr<Transaction> txn;
2779
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2780
1
        if (err != TxnErrorCode::TXN_OK) {
2781
0
            return -1;
2782
0
        }
2783
2784
        // 1. Remove all partition related kvs
2785
1
        std::string partition_meta_key =
2786
1
                versioned::meta_partition_key({instance_id_, partition_id});
2787
1
        std::string partition_index_key =
2788
1
                versioned::partition_index_key({instance_id_, partition_id});
2789
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2790
1
                {instance_id_, db_id, table_id, partition_id});
2791
1
        std::string partition_version_key =
2792
1
                versioned::partition_version_key({instance_id_, partition_id});
2793
1
        txn->remove(partition_index_key);
2794
1
        txn->remove(partition_inverted_key);
2795
1
        versioned_remove_all(txn.get(), partition_meta_key);
2796
1
        versioned_remove_all(txn.get(), partition_version_key);
2797
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2798
1
                     << " table_id=" << table_id << " db_id=" << db_id
2799
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2800
1
                     << " partition_version_key=" << hex(partition_version_key);
2801
2802
1
        if (!is_table_kvs_recycled) {
2803
1
            is_table_kvs_recycled = true;
2804
2805
            // 2. Remove the table version kv of this table
2806
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2807
1
            versioned_remove_all(txn.get(), table_version_key);
2808
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2809
            // 3. Remove mow delete bitmap update lock and tablet job lock
2810
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2811
1
            txn->remove(lock_key);
2812
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2813
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2814
1
            std::string tablet_job_key_end =
2815
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2816
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2817
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2818
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2819
1
                         << " table_id=" << table_id;
2820
1
        }
2821
2822
1
        err = txn->commit();
2823
1
        if (err != TxnErrorCode::TXN_OK) {
2824
0
            return -1;
2825
0
        }
2826
1
        metrics_context.total_recycled_num = ++num_recycled;
2827
1
        metrics_context.report();
2828
1
        return 0;
2829
1
    };
2830
2831
    // recycle_func and loop_done for scan and recycle
2832
3
    return scan_and_recycle(
2833
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2834
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2835
3
            std::move(recycle_func));
2836
3
}
2837
2838
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2839
                                      RecyclerMetricsContext& metrics_context,
2840
49
                                      int64_t partition_id) {
2841
49
    bool is_multi_version =
2842
49
            instance_info_.has_multi_version_status() &&
2843
49
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2844
49
    int64_t num_scanned = 0;
2845
49
    std::atomic_long num_recycled = 0;
2846
2847
49
    std::string tablet_key_begin, tablet_key_end;
2848
49
    std::string stats_key_begin, stats_key_end;
2849
49
    std::string job_key_begin, job_key_end;
2850
2851
49
    std::string tablet_belongs;
2852
49
    if (partition_id > 0) {
2853
        // recycle tablets in a partition belonging to the index
2854
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2855
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2856
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2857
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2858
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2859
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2860
33
        tablet_belongs = "partition";
2861
33
    } else {
2862
        // recycle tablets in the index
2863
16
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2864
16
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2865
16
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2866
16
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2867
16
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2868
16
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2869
16
        tablet_belongs = "index";
2870
16
    }
2871
2872
49
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2873
49
            .tag("table_id", table_id)
2874
49
            .tag("index_id", index_id)
2875
49
            .tag("partition_id", partition_id);
2876
2877
49
    auto start_time = steady_clock::now();
2878
2879
49
    DORIS_CLOUD_DEFER {
2880
49
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2881
49
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2882
49
                .tag("instance_id", instance_id_)
2883
49
                .tag("table_id", table_id)
2884
49
                .tag("index_id", index_id)
2885
49
                .tag("partition_id", partition_id)
2886
49
                .tag("num_scanned", num_scanned)
2887
49
                .tag("num_recycled", num_recycled);
2888
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2879
4
    DORIS_CLOUD_DEFER {
2880
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2881
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2882
4
                .tag("instance_id", instance_id_)
2883
4
                .tag("table_id", table_id)
2884
4
                .tag("index_id", index_id)
2885
4
                .tag("partition_id", partition_id)
2886
4
                .tag("num_scanned", num_scanned)
2887
4
                .tag("num_recycled", num_recycled);
2888
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2879
45
    DORIS_CLOUD_DEFER {
2880
45
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2881
45
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2882
45
                .tag("instance_id", instance_id_)
2883
45
                .tag("table_id", table_id)
2884
45
                .tag("index_id", index_id)
2885
45
                .tag("partition_id", partition_id)
2886
45
                .tag("num_scanned", num_scanned)
2887
45
                .tag("num_recycled", num_recycled);
2888
45
    };
2889
2890
    // The first string_view represents the tablet key which has been recycled
2891
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
2892
49
    using TabletKeyPair = std::pair<std::string_view, bool>;
2893
49
    SyncExecutor<TabletKeyPair> sync_executor(
2894
49
            _thread_pool_group.recycle_tablet_pool,
2895
49
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2896
49
                        index_id, partition_id),
2897
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2897
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2897
237
            [](const TabletKeyPair& k) { return k.first.empty(); });
2898
2899
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
2900
49
    std::vector<std::string> tablet_idx_keys;
2901
49
    std::vector<std::string> restore_job_keys;
2902
49
    std::vector<std::string> init_rs_keys;
2903
49
    std::vector<std::string> tablet_compact_stats_keys;
2904
49
    std::vector<std::string> tablet_load_stats_keys;
2905
49
    std::vector<std::string> versioned_meta_tablet_keys;
2906
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2907
8.24k
        bool use_range_remove = true;
2908
8.24k
        ++num_scanned;
2909
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
2910
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2911
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2912
0
            use_range_remove = false;
2913
0
            return -1;
2914
0
        }
2915
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2916
2917
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2918
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2919
4.00k
            return -1;
2920
4.00k
        }
2921
2922
4.24k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2923
4.24k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2924
4.24k
        if (is_multi_version) {
2925
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2926
6
            tablet_compact_stats_keys.push_back(
2927
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2928
6
            tablet_load_stats_keys.push_back(
2929
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2930
6
            versioned_meta_tablet_keys.push_back(
2931
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2932
6
        }
2933
4.24k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2934
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2935
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2936
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2937
1
                LOG_WARNING("failed to recycle tablet")
2938
1
                        .tag("instance_id", instance_id_)
2939
1
                        .tag("tablet_id", tid);
2940
1
                range_move = false;
2941
1
                return {std::string_view(), range_move};
2942
1
            }
2943
4.23k
            ++num_recycled;
2944
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2945
4.23k
            return {k, range_move};
2946
4.23k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2935
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2936
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2937
0
                LOG_WARNING("failed to recycle tablet")
2938
0
                        .tag("instance_id", instance_id_)
2939
0
                        .tag("tablet_id", tid);
2940
0
                range_move = false;
2941
0
                return {std::string_view(), range_move};
2942
0
            }
2943
4.00k
            ++num_recycled;
2944
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2945
4.00k
            return {k, range_move};
2946
4.00k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2935
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2936
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2937
1
                LOG_WARNING("failed to recycle tablet")
2938
1
                        .tag("instance_id", instance_id_)
2939
1
                        .tag("tablet_id", tid);
2940
1
                range_move = false;
2941
1
                return {std::string_view(), range_move};
2942
1
            }
2943
236
            ++num_recycled;
2944
236
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2945
236
            return {k, range_move};
2946
237
        });
2947
4.23k
        return 0;
2948
4.24k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2906
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2907
8.00k
        bool use_range_remove = true;
2908
8.00k
        ++num_scanned;
2909
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2910
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2911
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2912
0
            use_range_remove = false;
2913
0
            return -1;
2914
0
        }
2915
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2916
2917
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2918
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2919
4.00k
            return -1;
2920
4.00k
        }
2921
2922
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2923
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2924
4.00k
        if (is_multi_version) {
2925
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2926
0
            tablet_compact_stats_keys.push_back(
2927
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2928
0
            tablet_load_stats_keys.push_back(
2929
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2930
0
            versioned_meta_tablet_keys.push_back(
2931
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2932
0
        }
2933
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2934
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2935
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2936
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2937
4.00k
                LOG_WARNING("failed to recycle tablet")
2938
4.00k
                        .tag("instance_id", instance_id_)
2939
4.00k
                        .tag("tablet_id", tid);
2940
4.00k
                range_move = false;
2941
4.00k
                return {std::string_view(), range_move};
2942
4.00k
            }
2943
4.00k
            ++num_recycled;
2944
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2945
4.00k
            return {k, range_move};
2946
4.00k
        });
2947
4.00k
        return 0;
2948
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2906
240
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2907
240
        bool use_range_remove = true;
2908
240
        ++num_scanned;
2909
240
        doris::TabletMetaCloudPB tablet_meta_pb;
2910
240
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2911
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2912
0
            use_range_remove = false;
2913
0
            return -1;
2914
0
        }
2915
240
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2916
2917
240
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2918
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2919
0
            return -1;
2920
0
        }
2921
2922
240
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2923
240
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2924
240
        if (is_multi_version) {
2925
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2926
6
            tablet_compact_stats_keys.push_back(
2927
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2928
6
            tablet_load_stats_keys.push_back(
2929
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2930
6
            versioned_meta_tablet_keys.push_back(
2931
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2932
6
        }
2933
240
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2934
237
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2935
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2936
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2937
237
                LOG_WARNING("failed to recycle tablet")
2938
237
                        .tag("instance_id", instance_id_)
2939
237
                        .tag("tablet_id", tid);
2940
237
                range_move = false;
2941
237
                return {std::string_view(), range_move};
2942
237
            }
2943
237
            ++num_recycled;
2944
237
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2945
237
            return {k, range_move};
2946
237
        });
2947
237
        return 0;
2948
240
    };
2949
2950
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2951
49
    auto loop_done = [&, this]() -> int {
2952
49
        bool finished = true;
2953
49
        auto tablet_keys = sync_executor.when_all(&finished);
2954
49
        if (!finished) {
2955
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2956
1
            return -1;
2957
1
        }
2958
48
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2959
46
        if (!tablet_keys.empty() &&
2960
46
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2960
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2960
42
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2961
0
            return -1;
2962
0
        }
2963
        // sort the vector using key's order
2964
46
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2965
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2965
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2965
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2966
46
        bool use_range_remove = true;
2967
4.23k
        for (auto& [_, remove] : tablet_keys) {
2968
4.23k
            if (!remove) {
2969
0
                use_range_remove = remove;
2970
0
                break;
2971
0
            }
2972
4.23k
        }
2973
46
        DORIS_CLOUD_DEFER {
2974
46
            tablet_idx_keys.clear();
2975
46
            restore_job_keys.clear();
2976
46
            init_rs_keys.clear();
2977
46
            tablet_compact_stats_keys.clear();
2978
46
            tablet_load_stats_keys.clear();
2979
46
            versioned_meta_tablet_keys.clear();
2980
46
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2973
2
        DORIS_CLOUD_DEFER {
2974
2
            tablet_idx_keys.clear();
2975
2
            restore_job_keys.clear();
2976
2
            init_rs_keys.clear();
2977
2
            tablet_compact_stats_keys.clear();
2978
2
            tablet_load_stats_keys.clear();
2979
2
            versioned_meta_tablet_keys.clear();
2980
2
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2973
44
        DORIS_CLOUD_DEFER {
2974
44
            tablet_idx_keys.clear();
2975
44
            restore_job_keys.clear();
2976
44
            init_rs_keys.clear();
2977
44
            tablet_compact_stats_keys.clear();
2978
44
            tablet_load_stats_keys.clear();
2979
44
            versioned_meta_tablet_keys.clear();
2980
44
        };
2981
46
        std::unique_ptr<Transaction> txn;
2982
46
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2983
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2984
0
            return -1;
2985
0
        }
2986
46
        std::string tablet_key_end;
2987
46
        if (!tablet_keys.empty()) {
2988
44
            if (use_range_remove) {
2989
44
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2990
44
                txn->remove(tablet_keys.front().first, tablet_key_end);
2991
44
            } else {
2992
0
                for (auto& [k, _] : tablet_keys) {
2993
0
                    txn->remove(k);
2994
0
                }
2995
0
            }
2996
44
        }
2997
46
        if (is_multi_version) {
2998
6
            for (auto& k : tablet_compact_stats_keys) {
2999
                // Remove all versions of tablet compact stats for recycled tablet
3000
6
                LOG_INFO("remove versioned tablet compact stats key")
3001
6
                        .tag("compact_stats_key", hex(k));
3002
6
                versioned_remove_all(txn.get(), k);
3003
6
            }
3004
6
            for (auto& k : tablet_load_stats_keys) {
3005
                // Remove all versions of tablet load stats for recycled tablet
3006
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
3007
6
                versioned_remove_all(txn.get(), k);
3008
6
            }
3009
6
            for (auto& k : versioned_meta_tablet_keys) {
3010
                // Remove all versions of meta tablet for recycled tablet
3011
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
3012
6
                versioned_remove_all(txn.get(), k);
3013
6
            }
3014
5
        }
3015
4.24k
        for (auto& k : tablet_idx_keys) {
3016
4.24k
            txn->remove(k);
3017
4.24k
        }
3018
4.24k
        for (auto& k : restore_job_keys) {
3019
4.24k
            txn->remove(k);
3020
4.24k
        }
3021
46
        for (auto& k : init_rs_keys) {
3022
0
            txn->remove(k);
3023
0
        }
3024
46
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3025
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3026
0
                         << ", err=" << err;
3027
0
            return -1;
3028
0
        }
3029
46
        return 0;
3030
46
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2951
4
    auto loop_done = [&, this]() -> int {
2952
4
        bool finished = true;
2953
4
        auto tablet_keys = sync_executor.when_all(&finished);
2954
4
        if (!finished) {
2955
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2956
0
            return -1;
2957
0
        }
2958
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2959
2
        if (!tablet_keys.empty() &&
2960
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2961
0
            return -1;
2962
0
        }
2963
        // sort the vector using key's order
2964
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2965
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2966
2
        bool use_range_remove = true;
2967
4.00k
        for (auto& [_, remove] : tablet_keys) {
2968
4.00k
            if (!remove) {
2969
0
                use_range_remove = remove;
2970
0
                break;
2971
0
            }
2972
4.00k
        }
2973
2
        DORIS_CLOUD_DEFER {
2974
2
            tablet_idx_keys.clear();
2975
2
            restore_job_keys.clear();
2976
2
            init_rs_keys.clear();
2977
2
            tablet_compact_stats_keys.clear();
2978
2
            tablet_load_stats_keys.clear();
2979
2
            versioned_meta_tablet_keys.clear();
2980
2
        };
2981
2
        std::unique_ptr<Transaction> txn;
2982
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2983
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2984
0
            return -1;
2985
0
        }
2986
2
        std::string tablet_key_end;
2987
2
        if (!tablet_keys.empty()) {
2988
2
            if (use_range_remove) {
2989
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2990
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2991
2
            } else {
2992
0
                for (auto& [k, _] : tablet_keys) {
2993
0
                    txn->remove(k);
2994
0
                }
2995
0
            }
2996
2
        }
2997
2
        if (is_multi_version) {
2998
0
            for (auto& k : tablet_compact_stats_keys) {
2999
                // Remove all versions of tablet compact stats for recycled tablet
3000
0
                LOG_INFO("remove versioned tablet compact stats key")
3001
0
                        .tag("compact_stats_key", hex(k));
3002
0
                versioned_remove_all(txn.get(), k);
3003
0
            }
3004
0
            for (auto& k : tablet_load_stats_keys) {
3005
                // Remove all versions of tablet load stats for recycled tablet
3006
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
3007
0
                versioned_remove_all(txn.get(), k);
3008
0
            }
3009
0
            for (auto& k : versioned_meta_tablet_keys) {
3010
                // Remove all versions of meta tablet for recycled tablet
3011
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
3012
0
                versioned_remove_all(txn.get(), k);
3013
0
            }
3014
0
        }
3015
4.00k
        for (auto& k : tablet_idx_keys) {
3016
4.00k
            txn->remove(k);
3017
4.00k
        }
3018
4.00k
        for (auto& k : restore_job_keys) {
3019
4.00k
            txn->remove(k);
3020
4.00k
        }
3021
2
        for (auto& k : init_rs_keys) {
3022
0
            txn->remove(k);
3023
0
        }
3024
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3025
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3026
0
                         << ", err=" << err;
3027
0
            return -1;
3028
0
        }
3029
2
        return 0;
3030
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2951
45
    auto loop_done = [&, this]() -> int {
2952
45
        bool finished = true;
2953
45
        auto tablet_keys = sync_executor.when_all(&finished);
2954
45
        if (!finished) {
2955
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2956
1
            return -1;
2957
1
        }
2958
44
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2959
44
        if (!tablet_keys.empty() &&
2960
44
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2961
0
            return -1;
2962
0
        }
2963
        // sort the vector using key's order
2964
44
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2965
44
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2966
44
        bool use_range_remove = true;
2967
236
        for (auto& [_, remove] : tablet_keys) {
2968
236
            if (!remove) {
2969
0
                use_range_remove = remove;
2970
0
                break;
2971
0
            }
2972
236
        }
2973
44
        DORIS_CLOUD_DEFER {
2974
44
            tablet_idx_keys.clear();
2975
44
            restore_job_keys.clear();
2976
44
            init_rs_keys.clear();
2977
44
            tablet_compact_stats_keys.clear();
2978
44
            tablet_load_stats_keys.clear();
2979
44
            versioned_meta_tablet_keys.clear();
2980
44
        };
2981
44
        std::unique_ptr<Transaction> txn;
2982
44
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2983
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2984
0
            return -1;
2985
0
        }
2986
44
        std::string tablet_key_end;
2987
44
        if (!tablet_keys.empty()) {
2988
42
            if (use_range_remove) {
2989
42
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2990
42
                txn->remove(tablet_keys.front().first, tablet_key_end);
2991
42
            } else {
2992
0
                for (auto& [k, _] : tablet_keys) {
2993
0
                    txn->remove(k);
2994
0
                }
2995
0
            }
2996
42
        }
2997
44
        if (is_multi_version) {
2998
6
            for (auto& k : tablet_compact_stats_keys) {
2999
                // Remove all versions of tablet compact stats for recycled tablet
3000
6
                LOG_INFO("remove versioned tablet compact stats key")
3001
6
                        .tag("compact_stats_key", hex(k));
3002
6
                versioned_remove_all(txn.get(), k);
3003
6
            }
3004
6
            for (auto& k : tablet_load_stats_keys) {
3005
                // Remove all versions of tablet load stats for recycled tablet
3006
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
3007
6
                versioned_remove_all(txn.get(), k);
3008
6
            }
3009
6
            for (auto& k : versioned_meta_tablet_keys) {
3010
                // Remove all versions of meta tablet for recycled tablet
3011
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
3012
6
                versioned_remove_all(txn.get(), k);
3013
6
            }
3014
5
        }
3015
239
        for (auto& k : tablet_idx_keys) {
3016
239
            txn->remove(k);
3017
239
        }
3018
239
        for (auto& k : restore_job_keys) {
3019
239
            txn->remove(k);
3020
239
        }
3021
44
        for (auto& k : init_rs_keys) {
3022
0
            txn->remove(k);
3023
0
        }
3024
44
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
3025
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
3026
0
                         << ", err=" << err;
3027
0
            return -1;
3028
0
        }
3029
44
        return 0;
3030
44
    };
3031
3032
49
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
3033
49
                               std::move(loop_done));
3034
49
    if (ret != 0) {
3035
3
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
3036
3
        return ret;
3037
3
    }
3038
3039
    // directly remove tablet stats and tablet jobs of these dropped index or partition
3040
46
    std::unique_ptr<Transaction> txn;
3041
46
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3042
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
3043
0
        return -1;
3044
0
    }
3045
46
    txn->remove(stats_key_begin, stats_key_end);
3046
46
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
3047
46
                 << " end=" << hex(stats_key_end);
3048
46
    txn->remove(job_key_begin, job_key_end);
3049
46
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
3050
46
    std::string schema_key_begin, schema_key_end;
3051
46
    std::string schema_dict_key;
3052
46
    std::string versioned_schema_key_begin, versioned_schema_key_end;
3053
46
    if (partition_id <= 0) {
3054
        // Delete schema kv of this index
3055
14
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
3056
14
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
3057
14
        txn->remove(schema_key_begin, schema_key_end);
3058
14
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
3059
14
                     << " end=" << hex(schema_key_end);
3060
14
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
3061
14
        txn->remove(schema_dict_key);
3062
14
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
3063
14
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
3064
14
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
3065
14
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
3066
14
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
3067
14
                     << " end=" << hex(versioned_schema_key_end);
3068
14
    }
3069
3070
46
    TxnErrorCode err = txn->commit();
3071
46
    if (err != TxnErrorCode::TXN_OK) {
3072
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
3073
0
                     << " err=" << err;
3074
0
        return -1;
3075
0
    }
3076
3077
46
    return ret;
3078
46
}
3079
3080
5.61k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
3081
5.61k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
3082
5.61k
    int64_t num_segments = rs_meta_pb.num_segments();
3083
5.61k
    if (num_segments <= 0) return 0;
3084
3085
5.61k
    std::vector<std::string> file_paths;
3086
5.61k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
3087
0
        return -1;
3088
0
    }
3089
3090
    // Process inverted indexes
3091
5.61k
    std::vector<std::pair<int64_t, std::string>> index_ids;
3092
    // default format as v1.
3093
5.61k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3094
5.61k
    bool delete_rowset_data_by_prefix = false;
3095
5.61k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3096
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3097
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3098
0
        delete_rowset_data_by_prefix = true;
3099
5.61k
    } else if (rs_meta_pb.has_tablet_schema()) {
3100
10.0k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
3101
10.0k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3102
10.0k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3103
10.0k
            }
3104
10.0k
        }
3105
4.80k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
3106
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
3107
2.00k
        }
3108
4.80k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
3109
        // schema version and index id are not found, delete rowset data by prefix directly.
3110
0
        delete_rowset_data_by_prefix = true;
3111
809
    } else {
3112
        // otherwise, try to get schema kv
3113
809
        InvertedIndexInfo index_info;
3114
809
        int inverted_index_get_ret = inverted_index_id_cache_->get(
3115
809
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
3116
809
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3117
809
                                 &inverted_index_get_ret);
3118
809
        if (inverted_index_get_ret == 0) {
3119
809
            index_format = index_info.first;
3120
809
            index_ids = index_info.second;
3121
809
        } else if (inverted_index_get_ret == 1) {
3122
            // 1. Schema kv not found means tablet has been recycled
3123
            // Maybe some tablet recycle failed by some bugs
3124
            // We need to delete again to double check
3125
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3126
            // because we are uncertain about the inverted index information.
3127
            // If there are inverted indexes, some data might not be deleted,
3128
            // but this is acceptable as we have made our best effort to delete the data.
3129
0
            LOG_INFO(
3130
0
                    "delete rowset data schema kv not found, need to delete again to double "
3131
0
                    "check")
3132
0
                    .tag("instance_id", instance_id_)
3133
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3134
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
3135
            // Currently index_ids is guaranteed to be empty,
3136
            // but we clear it again here as a safeguard against future code changes
3137
            // that might cause index_ids to no longer be empty
3138
0
            index_format = InvertedIndexStorageFormatPB::V2;
3139
0
            index_ids.clear();
3140
0
        } else {
3141
            // failed to get schema kv, delete rowset data by prefix directly.
3142
0
            delete_rowset_data_by_prefix = true;
3143
0
        }
3144
809
    }
3145
3146
5.61k
    if (delete_rowset_data_by_prefix) {
3147
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
3148
0
                                  rs_meta_pb.rowset_id_v2());
3149
0
    }
3150
3151
5.61k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
3152
5.61k
    if (it == accessor_map_.end()) {
3153
1.59k
        LOG_WARNING("instance has no such resource id")
3154
1.59k
                .tag("instance_id", instance_id_)
3155
1.59k
                .tag("resource_id", rs_meta_pb.resource_id());
3156
1.59k
        return -1;
3157
1.59k
    }
3158
4.01k
    auto& accessor = it->second;
3159
3160
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
3161
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
3162
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
3163
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3164
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
3165
40.0k
            for (const auto& index_id : index_ids) {
3166
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
3167
40.0k
                                                            index_id.second));
3168
40.0k
            }
3169
20.0k
        } else if (!index_ids.empty()) {
3170
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3171
0
        }
3172
20.0k
    }
3173
3174
    // Process delete bitmap - check where it's stored.
3175
4.01k
    DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3176
4.01k
    if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3177
4.01k
                                                       &delete_bitmap_storage_type) != 0) {
3178
0
        LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3179
0
                .tag("instance_id", instance_id_)
3180
0
                .tag("tablet_id", tablet_id)
3181
0
                .tag("rowset_id", rowset_id);
3182
0
        return -1;
3183
0
    }
3184
4.01k
    if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3185
2.00k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3186
2.00k
    }
3187
    // TODO(AlexYue): seems could do do batch
3188
4.01k
    return accessor->delete_files(file_paths);
3189
4.01k
}
3190
3191
62.3k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
3192
62.3k
    LOG_INFO("begin process_packed_file_location_index")
3193
62.3k
            .tag("instance_id", instance_id_)
3194
62.3k
            .tag("tablet_id", rs_meta_pb.tablet_id())
3195
62.3k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3196
62.3k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
3197
62.3k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
3198
62.3k
    if (index_map.empty()) {
3199
62.3k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
3200
62.3k
                .tag("instance_id", instance_id_)
3201
62.3k
                .tag("tablet_id", rs_meta_pb.tablet_id())
3202
62.3k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
3203
62.3k
        return 0;
3204
62.3k
    }
3205
3206
10
    struct PackedSmallFileInfo {
3207
10
        std::string small_file_path;
3208
10
    };
3209
10
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
3210
10
    packed_file_updates.reserve(index_map.size());
3211
27
    for (const auto& [small_path, index_pb] : index_map) {
3212
27
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
3213
0
            continue;
3214
0
        }
3215
27
        packed_file_updates[index_pb.packed_file_path()].push_back(
3216
27
                PackedSmallFileInfo {small_path});
3217
27
    }
3218
10
    if (packed_file_updates.empty()) {
3219
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
3220
0
                .tag("instance_id", instance_id_)
3221
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
3222
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3223
0
                .tag("index_map_size", index_map.size());
3224
0
        return 0;
3225
0
    }
3226
3227
10
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3228
10
    int ret = 0;
3229
24
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
3230
24
        if (small_files.empty()) {
3231
0
            continue;
3232
0
        }
3233
3234
24
        bool success = false;
3235
24
        for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3236
24
            std::unique_ptr<Transaction> txn;
3237
24
            TxnErrorCode err = txn_kv_->create_txn(&txn);
3238
24
            if (err != TxnErrorCode::TXN_OK) {
3239
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
3240
0
                        .tag("instance_id", instance_id_)
3241
0
                        .tag("packed_file_path", packed_file_path)
3242
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3243
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3244
0
                        .tag("err", err);
3245
0
                ret = -1;
3246
0
                break;
3247
0
            }
3248
3249
24
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3250
24
            std::string packed_val;
3251
24
            err = txn->get(packed_key, &packed_val);
3252
24
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3253
0
                LOG_WARNING("packed file info not found when recycling rowset")
3254
0
                        .tag("instance_id", instance_id_)
3255
0
                        .tag("packed_file_path", packed_file_path)
3256
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3257
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3258
0
                        .tag("key", hex(packed_key))
3259
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
3260
                // Skip this packed file entry and continue with others
3261
0
                success = true;
3262
0
                break;
3263
0
            }
3264
24
            if (err != TxnErrorCode::TXN_OK) {
3265
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
3266
0
                        .tag("instance_id", instance_id_)
3267
0
                        .tag("packed_file_path", packed_file_path)
3268
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3269
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3270
0
                        .tag("err", err);
3271
0
                ret = -1;
3272
0
                break;
3273
0
            }
3274
3275
24
            cloud::PackedFileInfoPB packed_info;
3276
24
            if (!packed_info.ParseFromString(packed_val)) {
3277
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
3278
0
                        .tag("instance_id", instance_id_)
3279
0
                        .tag("packed_file_path", packed_file_path)
3280
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3281
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3282
0
                ret = -1;
3283
0
                break;
3284
0
            }
3285
3286
24
            LOG_INFO("packed file update check")
3287
24
                    .tag("instance_id", instance_id_)
3288
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3289
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3290
24
                    .tag("merged_file_path", packed_file_path)
3291
24
                    .tag("requested_small_files", small_files.size())
3292
24
                    .tag("merge_entries", packed_info.slices_size());
3293
3294
24
            auto* small_file_entries = packed_info.mutable_slices();
3295
24
            int64_t changed_files = 0;
3296
24
            int64_t missing_entries = 0;
3297
24
            int64_t already_deleted = 0;
3298
27
            for (const auto& small_file_info : small_files) {
3299
27
                bool found = false;
3300
87
                for (auto& small_file_entry : *small_file_entries) {
3301
87
                    if (small_file_entry.path() == small_file_info.small_file_path) {
3302
27
                        if (!small_file_entry.deleted()) {
3303
27
                            small_file_entry.set_deleted(true);
3304
27
                            if (!small_file_entry.corrected()) {
3305
27
                                small_file_entry.set_corrected(true);
3306
27
                            }
3307
27
                            ++changed_files;
3308
27
                        } else {
3309
0
                            ++already_deleted;
3310
0
                        }
3311
27
                        found = true;
3312
27
                        break;
3313
27
                    }
3314
87
                }
3315
27
                if (!found) {
3316
0
                    ++missing_entries;
3317
0
                    LOG_WARNING("packed file info missing small file entry")
3318
0
                            .tag("instance_id", instance_id_)
3319
0
                            .tag("packed_file_path", packed_file_path)
3320
0
                            .tag("small_file_path", small_file_info.small_file_path)
3321
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3322
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
3323
0
                }
3324
27
            }
3325
3326
24
            if (changed_files == 0) {
3327
0
                LOG_INFO("skip merge file update: no merge entries changed")
3328
0
                        .tag("instance_id", instance_id_)
3329
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3330
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3331
0
                        .tag("merged_file_path", packed_file_path)
3332
0
                        .tag("missing_entries", missing_entries)
3333
0
                        .tag("already_deleted", already_deleted)
3334
0
                        .tag("requested_small_files", small_files.size())
3335
0
                        .tag("merge_entries", packed_info.slices_size());
3336
0
                success = true;
3337
0
                break;
3338
0
            }
3339
3340
            // Calculate remaining files
3341
24
            int64_t left_file_count = 0;
3342
24
            int64_t left_file_bytes = 0;
3343
141
            for (const auto& small_file_entry : packed_info.slices()) {
3344
141
                if (!small_file_entry.deleted()) {
3345
57
                    ++left_file_count;
3346
57
                    left_file_bytes += small_file_entry.size();
3347
57
                }
3348
141
            }
3349
24
            packed_info.set_remaining_slice_bytes(left_file_bytes);
3350
24
            packed_info.set_ref_cnt(left_file_count);
3351
24
            LOG_INFO("updated packed file reference info")
3352
24
                    .tag("instance_id", instance_id_)
3353
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3354
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3355
24
                    .tag("packed_file_path", packed_file_path)
3356
24
                    .tag("ref_cnt", left_file_count)
3357
24
                    .tag("left_file_bytes", left_file_bytes);
3358
3359
24
            if (left_file_count == 0) {
3360
7
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3361
7
            }
3362
3363
24
            std::string updated_val;
3364
24
            if (!packed_info.SerializeToString(&updated_val)) {
3365
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
3366
0
                        .tag("instance_id", instance_id_)
3367
0
                        .tag("packed_file_path", packed_file_path)
3368
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3369
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3370
0
                ret = -1;
3371
0
                break;
3372
0
            }
3373
3374
24
            txn->put(packed_key, updated_val);
3375
24
            err = txn->commit();
3376
24
            if (err == TxnErrorCode::TXN_OK) {
3377
24
                success = true;
3378
24
                if (left_file_count == 0) {
3379
7
                    LOG_INFO("packed file ready to delete, deleting immediately")
3380
7
                            .tag("instance_id", instance_id_)
3381
7
                            .tag("packed_file_path", packed_file_path);
3382
7
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3383
0
                        ret = -1;
3384
0
                    }
3385
7
                }
3386
24
                break;
3387
24
            }
3388
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
3389
0
                if (attempt >= max_retry_times) {
3390
0
                    LOG_WARNING("packed file info update conflict after max retry")
3391
0
                            .tag("instance_id", instance_id_)
3392
0
                            .tag("packed_file_path", packed_file_path)
3393
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3394
0
                            .tag("tablet_id", rs_meta_pb.tablet_id())
3395
0
                            .tag("changed_files", changed_files)
3396
0
                            .tag("attempt", attempt);
3397
0
                    ret = -1;
3398
0
                    break;
3399
0
                }
3400
0
                LOG_WARNING("packed file info update conflict, retrying")
3401
0
                        .tag("instance_id", instance_id_)
3402
0
                        .tag("packed_file_path", packed_file_path)
3403
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3404
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3405
0
                        .tag("changed_files", changed_files)
3406
0
                        .tag("attempt", attempt);
3407
0
                sleep_for_packed_file_retry();
3408
0
                continue;
3409
0
            }
3410
3411
0
            LOG_WARNING("failed to commit packed file info update")
3412
0
                    .tag("instance_id", instance_id_)
3413
0
                    .tag("packed_file_path", packed_file_path)
3414
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3415
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3416
0
                    .tag("err", err)
3417
0
                    .tag("changed_files", changed_files);
3418
0
            ret = -1;
3419
0
            break;
3420
0
        }
3421
3422
24
        if (!success) {
3423
0
            ret = -1;
3424
0
        }
3425
24
    }
3426
3427
10
    return ret;
3428
10
}
3429
3430
int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(
3431
        int64_t tablet_id, const std::string& rowset_id,
3432
58.2k
        DeleteBitmapStorageType* out_storage_type) {
3433
58.2k
    if (out_storage_type) {
3434
58.2k
        *out_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3435
58.2k
    }
3436
3437
    // Get delete bitmap storage info from FDB
3438
58.2k
    std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3439
58.2k
    std::unique_ptr<Transaction> txn;
3440
58.2k
    TxnErrorCode err = txn_kv_->create_txn(&txn);
3441
58.2k
    if (err != TxnErrorCode::TXN_OK) {
3442
0
        LOG_WARNING("failed to create txn when getting delete bitmap storage")
3443
0
                .tag("instance_id", instance_id_)
3444
0
                .tag("tablet_id", tablet_id)
3445
0
                .tag("rowset_id", rowset_id)
3446
0
                .tag("err", err);
3447
0
        return -1;
3448
0
    }
3449
3450
58.2k
    std::string dbm_val;
3451
58.2k
    err = txn->get(dbm_key, &dbm_val);
3452
58.2k
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3453
        // No delete bitmap for this rowset, nothing to do
3454
4.63k
        LOG_INFO("delete bitmap not found, skip packed file ref count decrement")
3455
4.63k
                .tag("instance_id", instance_id_)
3456
4.63k
                .tag("tablet_id", tablet_id)
3457
4.63k
                .tag("rowset_id", rowset_id);
3458
4.63k
        return 0;
3459
4.63k
    }
3460
53.5k
    if (err != TxnErrorCode::TXN_OK) {
3461
0
        LOG_WARNING("failed to get delete bitmap storage")
3462
0
                .tag("instance_id", instance_id_)
3463
0
                .tag("tablet_id", tablet_id)
3464
0
                .tag("rowset_id", rowset_id)
3465
0
                .tag("err", err);
3466
0
        return -1;
3467
0
    }
3468
3469
53.5k
    DeleteBitmapStoragePB storage;
3470
53.5k
    if (!storage.ParseFromString(dbm_val)) {
3471
0
        LOG_WARNING("failed to parse delete bitmap storage")
3472
0
                .tag("instance_id", instance_id_)
3473
0
                .tag("tablet_id", tablet_id)
3474
0
                .tag("rowset_id", rowset_id);
3475
0
        return -1;
3476
0
    }
3477
3478
53.5k
    if (storage.store_in_fdb()) {
3479
0
        if (out_storage_type) {
3480
0
            *out_storage_type = DeleteBitmapStorageType::IN_FDB;
3481
0
        }
3482
0
        return 0;
3483
0
    }
3484
3485
    // Check if delete bitmap is stored in standalone file.
3486
53.5k
    if (!storage.has_packed_slice_location() ||
3487
53.5k
        storage.packed_slice_location().packed_file_path().empty()) {
3488
53.5k
        if (out_storage_type) {
3489
53.5k
            *out_storage_type = DeleteBitmapStorageType::STANDALONE_FILE;
3490
53.5k
        }
3491
53.5k
        return 0;
3492
53.5k
    }
3493
3494
18.4E
    if (out_storage_type) {
3495
0
        *out_storage_type = DeleteBitmapStorageType::PACKED_FILE;
3496
0
    }
3497
3498
18.4E
    const auto& packed_loc = storage.packed_slice_location();
3499
18.4E
    const std::string& packed_file_path = packed_loc.packed_file_path();
3500
3501
18.4E
    LOG_INFO("decrementing delete bitmap packed file ref count")
3502
18.4E
            .tag("instance_id", instance_id_)
3503
18.4E
            .tag("tablet_id", tablet_id)
3504
18.4E
            .tag("rowset_id", rowset_id)
3505
18.4E
            .tag("packed_file_path", packed_file_path);
3506
3507
18.4E
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3508
18.4E
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3509
0
        std::unique_ptr<Transaction> update_txn;
3510
0
        err = txn_kv_->create_txn(&update_txn);
3511
0
        if (err != TxnErrorCode::TXN_OK) {
3512
0
            LOG_WARNING("failed to create txn for delete bitmap packed file update")
3513
0
                    .tag("instance_id", instance_id_)
3514
0
                    .tag("tablet_id", tablet_id)
3515
0
                    .tag("rowset_id", rowset_id)
3516
0
                    .tag("err", err);
3517
0
            return -1;
3518
0
        }
3519
3520
0
        std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3521
0
        std::string packed_val;
3522
0
        err = update_txn->get(packed_key, &packed_val);
3523
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3524
0
            LOG_WARNING("packed file info not found for delete bitmap")
3525
0
                    .tag("instance_id", instance_id_)
3526
0
                    .tag("tablet_id", tablet_id)
3527
0
                    .tag("rowset_id", rowset_id)
3528
0
                    .tag("packed_file_path", packed_file_path);
3529
0
            return 0;
3530
0
        }
3531
0
        if (err != TxnErrorCode::TXN_OK) {
3532
0
            LOG_WARNING("failed to get packed file info for delete bitmap")
3533
0
                    .tag("instance_id", instance_id_)
3534
0
                    .tag("tablet_id", tablet_id)
3535
0
                    .tag("rowset_id", rowset_id)
3536
0
                    .tag("packed_file_path", packed_file_path)
3537
0
                    .tag("err", err);
3538
0
            return -1;
3539
0
        }
3540
3541
0
        cloud::PackedFileInfoPB packed_info;
3542
0
        if (!packed_info.ParseFromString(packed_val)) {
3543
0
            LOG_WARNING("failed to parse packed file info for delete bitmap")
3544
0
                    .tag("instance_id", instance_id_)
3545
0
                    .tag("tablet_id", tablet_id)
3546
0
                    .tag("rowset_id", rowset_id)
3547
0
                    .tag("packed_file_path", packed_file_path);
3548
0
            return -1;
3549
0
        }
3550
3551
        // Find and mark the small file entry as deleted
3552
        // Use tablet_id and rowset_id to match entry instead of path,
3553
        // because path format may vary with path_version (with or without shard prefix)
3554
0
        auto* entries = packed_info.mutable_slices();
3555
0
        bool found = false;
3556
0
        bool already_deleted = false;
3557
0
        for (auto& entry : *entries) {
3558
0
            if (entry.tablet_id() == tablet_id && entry.rowset_id() == rowset_id) {
3559
0
                if (!entry.deleted()) {
3560
0
                    entry.set_deleted(true);
3561
0
                    if (!entry.corrected()) {
3562
0
                        entry.set_corrected(true);
3563
0
                    }
3564
0
                } else {
3565
0
                    already_deleted = true;
3566
0
                }
3567
0
                found = true;
3568
0
                break;
3569
0
            }
3570
0
        }
3571
3572
0
        if (!found) {
3573
0
            LOG_WARNING("delete bitmap entry not found in packed file")
3574
0
                    .tag("instance_id", instance_id_)
3575
0
                    .tag("tablet_id", tablet_id)
3576
0
                    .tag("rowset_id", rowset_id)
3577
0
                    .tag("packed_file_path", packed_file_path);
3578
0
            return 0;
3579
0
        }
3580
3581
0
        if (already_deleted) {
3582
0
            LOG_INFO("delete bitmap entry already deleted in packed file")
3583
0
                    .tag("instance_id", instance_id_)
3584
0
                    .tag("tablet_id", tablet_id)
3585
0
                    .tag("rowset_id", rowset_id)
3586
0
                    .tag("packed_file_path", packed_file_path);
3587
0
            return 0;
3588
0
        }
3589
3590
        // Calculate remaining files
3591
0
        int64_t left_file_count = 0;
3592
0
        int64_t left_file_bytes = 0;
3593
0
        for (const auto& entry : packed_info.slices()) {
3594
0
            if (!entry.deleted()) {
3595
0
                ++left_file_count;
3596
0
                left_file_bytes += entry.size();
3597
0
            }
3598
0
        }
3599
0
        packed_info.set_remaining_slice_bytes(left_file_bytes);
3600
0
        packed_info.set_ref_cnt(left_file_count);
3601
3602
0
        if (left_file_count == 0) {
3603
0
            packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3604
0
        }
3605
3606
0
        std::string updated_val;
3607
0
        if (!packed_info.SerializeToString(&updated_val)) {
3608
0
            LOG_WARNING("failed to serialize packed file info for delete bitmap")
3609
0
                    .tag("instance_id", instance_id_)
3610
0
                    .tag("tablet_id", tablet_id)
3611
0
                    .tag("rowset_id", rowset_id)
3612
0
                    .tag("packed_file_path", packed_file_path);
3613
0
            return -1;
3614
0
        }
3615
3616
0
        update_txn->put(packed_key, updated_val);
3617
0
        err = update_txn->commit();
3618
0
        if (err == TxnErrorCode::TXN_OK) {
3619
0
            LOG_INFO("delete bitmap packed file ref count decremented")
3620
0
                    .tag("instance_id", instance_id_)
3621
0
                    .tag("tablet_id", tablet_id)
3622
0
                    .tag("rowset_id", rowset_id)
3623
0
                    .tag("packed_file_path", packed_file_path)
3624
0
                    .tag("left_file_count", left_file_count);
3625
0
            if (left_file_count == 0) {
3626
0
                if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3627
0
                    return -1;
3628
0
                }
3629
0
            }
3630
0
            return 0;
3631
0
        }
3632
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3633
0
            if (attempt >= max_retry_times) {
3634
0
                LOG_WARNING("delete bitmap packed file update conflict after max retry")
3635
0
                        .tag("instance_id", instance_id_)
3636
0
                        .tag("tablet_id", tablet_id)
3637
0
                        .tag("rowset_id", rowset_id)
3638
0
                        .tag("packed_file_path", packed_file_path)
3639
0
                        .tag("attempt", attempt);
3640
0
                return -1;
3641
0
            }
3642
0
            sleep_for_packed_file_retry();
3643
0
            continue;
3644
0
        }
3645
3646
0
        LOG_WARNING("failed to commit delete bitmap packed file update")
3647
0
                .tag("instance_id", instance_id_)
3648
0
                .tag("tablet_id", tablet_id)
3649
0
                .tag("rowset_id", rowset_id)
3650
0
                .tag("packed_file_path", packed_file_path)
3651
0
                .tag("err", err);
3652
0
        return -1;
3653
0
    }
3654
3655
18.4E
    return -1;
3656
18.4E
}
3657
3658
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
3659
                                                const std::string& packed_key,
3660
7
                                                const cloud::PackedFileInfoPB& packed_info) {
3661
7
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
3662
0
        LOG_WARNING("packed file missing resource id when recycling")
3663
0
                .tag("instance_id", instance_id_)
3664
0
                .tag("packed_file_path", packed_file_path);
3665
0
        return -1;
3666
0
    }
3667
3668
7
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
3669
7
    if (!accessor) {
3670
0
        LOG_WARNING("no accessor available to delete packed file")
3671
0
                .tag("instance_id", instance_id_)
3672
0
                .tag("packed_file_path", packed_file_path)
3673
0
                .tag("resource_id", packed_info.resource_id());
3674
0
        return -1;
3675
0
    }
3676
3677
7
    int del_ret = accessor->delete_file(packed_file_path);
3678
7
    if (del_ret != 0 && del_ret != 1) {
3679
0
        LOG_WARNING("failed to delete packed file")
3680
0
                .tag("instance_id", instance_id_)
3681
0
                .tag("packed_file_path", packed_file_path)
3682
0
                .tag("resource_id", resource_id)
3683
0
                .tag("ret", del_ret);
3684
0
        return -1;
3685
0
    }
3686
7
    if (del_ret == 1) {
3687
0
        LOG_INFO("packed file already removed")
3688
0
                .tag("instance_id", instance_id_)
3689
0
                .tag("packed_file_path", packed_file_path)
3690
0
                .tag("resource_id", resource_id);
3691
7
    } else {
3692
7
        LOG_INFO("deleted packed file")
3693
7
                .tag("instance_id", instance_id_)
3694
7
                .tag("packed_file_path", packed_file_path)
3695
7
                .tag("resource_id", resource_id);
3696
7
    }
3697
3698
7
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
3699
7
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3700
7
        std::unique_ptr<Transaction> del_txn;
3701
7
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
3702
7
        if (err != TxnErrorCode::TXN_OK) {
3703
0
            LOG_WARNING("failed to create txn when removing packed file kv")
3704
0
                    .tag("instance_id", instance_id_)
3705
0
                    .tag("packed_file_path", packed_file_path)
3706
0
                    .tag("attempt", attempt)
3707
0
                    .tag("err", err);
3708
0
            return -1;
3709
0
        }
3710
3711
7
        std::string latest_val;
3712
7
        err = del_txn->get(packed_key, &latest_val);
3713
7
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3714
0
            return 0;
3715
0
        }
3716
7
        if (err != TxnErrorCode::TXN_OK) {
3717
0
            LOG_WARNING("failed to re-read packed file kv before removal")
3718
0
                    .tag("instance_id", instance_id_)
3719
0
                    .tag("packed_file_path", packed_file_path)
3720
0
                    .tag("attempt", attempt)
3721
0
                    .tag("err", err);
3722
0
            return -1;
3723
0
        }
3724
3725
7
        cloud::PackedFileInfoPB latest_info;
3726
7
        if (!latest_info.ParseFromString(latest_val)) {
3727
0
            LOG_WARNING("failed to parse packed file info before removal")
3728
0
                    .tag("instance_id", instance_id_)
3729
0
                    .tag("packed_file_path", packed_file_path)
3730
0
                    .tag("attempt", attempt);
3731
0
            return -1;
3732
0
        }
3733
3734
7
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
3735
7
              latest_info.ref_cnt() == 0)) {
3736
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
3737
0
                    .tag("instance_id", instance_id_)
3738
0
                    .tag("packed_file_path", packed_file_path)
3739
0
                    .tag("attempt", attempt);
3740
0
            return 0;
3741
0
        }
3742
3743
7
        del_txn->remove(packed_key);
3744
7
        err = del_txn->commit();
3745
7
        if (err == TxnErrorCode::TXN_OK) {
3746
7
            LOG_INFO("removed packed file metadata")
3747
7
                    .tag("instance_id", instance_id_)
3748
7
                    .tag("packed_file_path", packed_file_path);
3749
7
            return 0;
3750
7
        }
3751
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3752
0
            if (attempt >= max_retry_times) {
3753
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
3754
0
                        .tag("instance_id", instance_id_)
3755
0
                        .tag("packed_file_path", packed_file_path)
3756
0
                        .tag("attempt", attempt);
3757
0
                return -1;
3758
0
            }
3759
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
3760
0
                    .tag("instance_id", instance_id_)
3761
0
                    .tag("packed_file_path", packed_file_path)
3762
0
                    .tag("attempt", attempt);
3763
0
            sleep_for_packed_file_retry();
3764
0
            continue;
3765
0
        }
3766
0
        LOG_WARNING("failed to remove packed file kv")
3767
0
                .tag("instance_id", instance_id_)
3768
0
                .tag("packed_file_path", packed_file_path)
3769
0
                .tag("attempt", attempt)
3770
0
                .tag("err", err);
3771
0
        return -1;
3772
0
    }
3773
0
    return -1;
3774
7
}
3775
3776
int InstanceRecycler::delete_rowset_data(
3777
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
3778
98
        RecyclerMetricsContext& metrics_context) {
3779
98
    int ret = 0;
3780
    // resource_id -> file_paths
3781
98
    std::map<std::string, std::vector<std::string>> resource_file_paths;
3782
    // (resource_id, tablet_id, rowset_id)
3783
98
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
3784
98
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
3785
3786
57.1k
    for (const auto& [_, rs] : rowsets) {
3787
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
3788
        // due to aborted schema change.
3789
57.1k
        if (is_formal_rowset) {
3790
3.15k
            std::lock_guard lock(recycled_tablets_mtx_);
3791
3.15k
            if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) {
3792
                // Tablet has been recycled and this rowset has no packed slices, so file data
3793
                // should already be gone; skip to avoid redundant deletes. Rowsets with packed
3794
                // slice info must still run to decrement packed file ref counts.
3795
0
                continue;
3796
0
            }
3797
3.15k
        }
3798
3799
57.1k
        int64_t num_segments = rs.num_segments();
3800
        // Check num_segments before accessor lookup, because empty rowsets
3801
        // (e.g. base compaction output of empty rowsets) may have no resource_id
3802
        // set. Skipping them early avoids a spurious "no such resource id" error
3803
        // that marks the entire batch as failed and prevents txn_remove from
3804
        // cleaning up recycle KV keys.
3805
57.1k
        if (num_segments <= 0) {
3806
0
            metrics_context.total_recycled_num++;
3807
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
3808
0
            continue;
3809
0
        }
3810
3811
57.1k
        auto it = accessor_map_.find(rs.resource_id());
3812
        // possible if the accessor is not initilized correctly
3813
57.1k
        if (it == accessor_map_.end()) [[unlikely]] {
3814
3.00k
            LOG_WARNING("instance has no such resource id")
3815
3.00k
                    .tag("instance_id", instance_id_)
3816
3.00k
                    .tag("resource_id", rs.resource_id());
3817
3.00k
            ret = -1;
3818
3.00k
            continue;
3819
3.00k
        }
3820
3821
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
3822
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
3823
54.1k
        int64_t tablet_id = rs.tablet_id();
3824
54.1k
        LOG_INFO("recycle rowset merge index size")
3825
54.1k
                .tag("instance_id", instance_id_)
3826
54.1k
                .tag("tablet_id", tablet_id)
3827
54.1k
                .tag("rowset_id", rowset_id)
3828
54.1k
                .tag("merge_index_size", rs.packed_slice_locations_size());
3829
54.1k
        if (decrement_packed_file_ref_counts(rs) != 0) {
3830
0
            ret = -1;
3831
0
            continue;
3832
0
        }
3833
3834
        // Process delete bitmap - check where it's stored.
3835
54.1k
        DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3836
54.1k
        if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3837
54.1k
                                                           &delete_bitmap_storage_type) != 0) {
3838
0
            LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3839
0
                    .tag("instance_id", instance_id_)
3840
0
                    .tag("tablet_id", tablet_id)
3841
0
                    .tag("rowset_id", rowset_id);
3842
0
            ret = -1;
3843
0
            continue;
3844
0
        }
3845
54.1k
        if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3846
51.5k
            file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3847
51.5k
        }
3848
3849
        // Process inverted indexes
3850
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
3851
        // default format as v1.
3852
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3853
54.1k
        int inverted_index_get_ret = 0;
3854
54.1k
        if (rs.has_tablet_schema()) {
3855
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
3856
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3857
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3858
53.5k
                }
3859
53.5k
            }
3860
26.6k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
3861
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
3862
26.5k
            }
3863
27.5k
        } else {
3864
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
3865
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
3866
0
                                "instance_id="
3867
0
                             << instance_id_ << " tablet_id=" << tablet_id
3868
0
                             << " rowset_id=" << rowset_id;
3869
0
                ret = -1;
3870
0
                continue;
3871
0
            }
3872
27.5k
            InvertedIndexInfo index_info;
3873
27.5k
            inverted_index_get_ret =
3874
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
3875
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3876
27.5k
                                     &inverted_index_get_ret);
3877
27.5k
            if (inverted_index_get_ret == 0) {
3878
27.0k
                index_format = index_info.first;
3879
27.0k
                index_ids = index_info.second;
3880
27.0k
            } else if (inverted_index_get_ret == 1) {
3881
                // 1. Schema kv not found means tablet has been recycled
3882
                // Maybe some tablet recycle failed by some bugs
3883
                // We need to delete again to double check
3884
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3885
                // because we are uncertain about the inverted index information.
3886
                // If there are inverted indexes, some data might not be deleted,
3887
                // but this is acceptable as we have made our best effort to delete the data.
3888
507
                LOG_INFO(
3889
507
                        "delete rowset data schema kv not found, need to delete again to "
3890
507
                        "double "
3891
507
                        "check")
3892
507
                        .tag("instance_id", instance_id_)
3893
507
                        .tag("tablet_id", tablet_id)
3894
507
                        .tag("rowset", rs.ShortDebugString());
3895
                // Currently index_ids is guaranteed to be empty,
3896
                // but we clear it again here as a safeguard against future code changes
3897
                // that might cause index_ids to no longer be empty
3898
507
                index_format = InvertedIndexStorageFormatPB::V2;
3899
507
                index_ids.clear();
3900
18.4E
            } else {
3901
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
3902
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3903
18.4E
                ret = -1;
3904
18.4E
                continue;
3905
18.4E
            }
3906
27.5k
        }
3907
54.2k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3908
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3909
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3910
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3911
5
            continue;
3912
5
        }
3913
323k
        for (int64_t i = 0; i < num_segments; ++i) {
3914
268k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3915
268k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3916
531k
                for (const auto& index_id : index_ids) {
3917
531k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
3918
531k
                                                                index_id.first, index_id.second));
3919
531k
                }
3920
266k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3921
                // try to recycle inverted index v2 when get_ret == 1
3922
                // we treat schema not found as if it has a v2 format inverted index
3923
                // to reduce chance of data leakage
3924
2.50k
                if (inverted_index_get_ret == 1) {
3925
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3926
2.50k
                            .tag("instance_id", instance_id_)
3927
2.50k
                            .tag("inverted index v2 path",
3928
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3929
2.50k
                }
3930
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3931
2.50k
            }
3932
268k
        }
3933
54.1k
    }
3934
3935
98
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3936
98
                                                 "delete_rowset_data",
3937
98
                                                 [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3937
5
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3937
51
                                                 [](const int& ret) { return ret != 0; });
3938
98
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3939
51
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3940
51
            DCHECK(accessor_map_.count(*rid))
3941
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3942
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3943
51
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3944
51
                                     &accessor_map_);
3945
51
            if (!accessor_map_.contains(*rid)) {
3946
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3947
0
                        .tag("resource_id", resource_id)
3948
0
                        .tag("instance_id", instance_id_);
3949
0
                return -1;
3950
0
            }
3951
51
            auto& accessor = accessor_map_[*rid];
3952
51
            int ret = accessor->delete_files(*paths);
3953
51
            if (!ret) {
3954
                // deduplication of different files with the same rowset id
3955
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3956
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3957
51
                std::set<std::string> deleted_rowset_id;
3958
3959
51
                std::for_each(paths->begin(), paths->end(),
3960
51
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3961
856k
                               this](const std::string& path) {
3962
856k
                                  std::vector<std::string> str;
3963
856k
                                  butil::SplitString(path, '/', &str);
3964
856k
                                  std::string rowset_id;
3965
856k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3966
852k
                                      rowset_id = str.back().substr(0, pos);
3967
852k
                                  } else {
3968
3.95k
                                      if (path.find("packed_file/") != std::string::npos) {
3969
0
                                          return; // packed files do not have rowset_id encoded
3970
0
                                      }
3971
3.95k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3972
3.95k
                                      return;
3973
3.95k
                                  }
3974
852k
                                  auto rs_meta = rowsets.find(rowset_id);
3975
852k
                                  if (rs_meta != rowsets.end() &&
3976
858k
                                      !deleted_rowset_id.contains(rowset_id)) {
3977
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3978
54.1k
                                      metrics_context.total_recycled_data_size +=
3979
54.1k
                                              rs_meta->second.total_disk_size();
3980
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3981
54.1k
                                              rs_meta->second.num_segments();
3982
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3983
54.1k
                                              rs_meta->second.total_disk_size();
3984
54.1k
                                      metrics_context.total_recycled_num++;
3985
54.1k
                                  }
3986
852k
                              });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3961
7
                               this](const std::string& path) {
3962
7
                                  std::vector<std::string> str;
3963
7
                                  butil::SplitString(path, '/', &str);
3964
7
                                  std::string rowset_id;
3965
7
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3966
7
                                      rowset_id = str.back().substr(0, pos);
3967
7
                                  } else {
3968
0
                                      if (path.find("packed_file/") != std::string::npos) {
3969
0
                                          return; // packed files do not have rowset_id encoded
3970
0
                                      }
3971
0
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3972
0
                                      return;
3973
0
                                  }
3974
7
                                  auto rs_meta = rowsets.find(rowset_id);
3975
7
                                  if (rs_meta != rowsets.end() &&
3976
7
                                      !deleted_rowset_id.contains(rowset_id)) {
3977
7
                                      deleted_rowset_id.emplace(rowset_id);
3978
7
                                      metrics_context.total_recycled_data_size +=
3979
7
                                              rs_meta->second.total_disk_size();
3980
7
                                      segment_metrics_context_.total_recycled_num +=
3981
7
                                              rs_meta->second.num_segments();
3982
7
                                      segment_metrics_context_.total_recycled_data_size +=
3983
7
                                              rs_meta->second.total_disk_size();
3984
7
                                      metrics_context.total_recycled_num++;
3985
7
                                  }
3986
7
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3961
856k
                               this](const std::string& path) {
3962
856k
                                  std::vector<std::string> str;
3963
856k
                                  butil::SplitString(path, '/', &str);
3964
856k
                                  std::string rowset_id;
3965
856k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3966
852k
                                      rowset_id = str.back().substr(0, pos);
3967
852k
                                  } else {
3968
3.95k
                                      if (path.find("packed_file/") != std::string::npos) {
3969
0
                                          return; // packed files do not have rowset_id encoded
3970
0
                                      }
3971
3.95k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3972
3.95k
                                      return;
3973
3.95k
                                  }
3974
852k
                                  auto rs_meta = rowsets.find(rowset_id);
3975
852k
                                  if (rs_meta != rowsets.end() &&
3976
858k
                                      !deleted_rowset_id.contains(rowset_id)) {
3977
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3978
54.1k
                                      metrics_context.total_recycled_data_size +=
3979
54.1k
                                              rs_meta->second.total_disk_size();
3980
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3981
54.1k
                                              rs_meta->second.num_segments();
3982
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3983
54.1k
                                              rs_meta->second.total_disk_size();
3984
54.1k
                                      metrics_context.total_recycled_num++;
3985
54.1k
                                  }
3986
852k
                              });
3987
51
            }
3988
51
            return ret;
3989
51
        });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3939
5
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3940
5
            DCHECK(accessor_map_.count(*rid))
3941
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3942
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3943
5
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3944
5
                                     &accessor_map_);
3945
5
            if (!accessor_map_.contains(*rid)) {
3946
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3947
0
                        .tag("resource_id", resource_id)
3948
0
                        .tag("instance_id", instance_id_);
3949
0
                return -1;
3950
0
            }
3951
5
            auto& accessor = accessor_map_[*rid];
3952
5
            int ret = accessor->delete_files(*paths);
3953
5
            if (!ret) {
3954
                // deduplication of different files with the same rowset id
3955
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3956
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3957
5
                std::set<std::string> deleted_rowset_id;
3958
3959
5
                std::for_each(paths->begin(), paths->end(),
3960
5
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3961
5
                               this](const std::string& path) {
3962
5
                                  std::vector<std::string> str;
3963
5
                                  butil::SplitString(path, '/', &str);
3964
5
                                  std::string rowset_id;
3965
5
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3966
5
                                      rowset_id = str.back().substr(0, pos);
3967
5
                                  } else {
3968
5
                                      if (path.find("packed_file/") != std::string::npos) {
3969
5
                                          return; // packed files do not have rowset_id encoded
3970
5
                                      }
3971
5
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3972
5
                                      return;
3973
5
                                  }
3974
5
                                  auto rs_meta = rowsets.find(rowset_id);
3975
5
                                  if (rs_meta != rowsets.end() &&
3976
5
                                      !deleted_rowset_id.contains(rowset_id)) {
3977
5
                                      deleted_rowset_id.emplace(rowset_id);
3978
5
                                      metrics_context.total_recycled_data_size +=
3979
5
                                              rs_meta->second.total_disk_size();
3980
5
                                      segment_metrics_context_.total_recycled_num +=
3981
5
                                              rs_meta->second.num_segments();
3982
5
                                      segment_metrics_context_.total_recycled_data_size +=
3983
5
                                              rs_meta->second.total_disk_size();
3984
5
                                      metrics_context.total_recycled_num++;
3985
5
                                  }
3986
5
                              });
3987
5
            }
3988
5
            return ret;
3989
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3939
46
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3940
46
            DCHECK(accessor_map_.count(*rid))
3941
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3942
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3943
46
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3944
46
                                     &accessor_map_);
3945
46
            if (!accessor_map_.contains(*rid)) {
3946
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3947
0
                        .tag("resource_id", resource_id)
3948
0
                        .tag("instance_id", instance_id_);
3949
0
                return -1;
3950
0
            }
3951
46
            auto& accessor = accessor_map_[*rid];
3952
46
            int ret = accessor->delete_files(*paths);
3953
46
            if (!ret) {
3954
                // deduplication of different files with the same rowset id
3955
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3956
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3957
46
                std::set<std::string> deleted_rowset_id;
3958
3959
46
                std::for_each(paths->begin(), paths->end(),
3960
46
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3961
46
                               this](const std::string& path) {
3962
46
                                  std::vector<std::string> str;
3963
46
                                  butil::SplitString(path, '/', &str);
3964
46
                                  std::string rowset_id;
3965
46
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3966
46
                                      rowset_id = str.back().substr(0, pos);
3967
46
                                  } else {
3968
46
                                      if (path.find("packed_file/") != std::string::npos) {
3969
46
                                          return; // packed files do not have rowset_id encoded
3970
46
                                      }
3971
46
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3972
46
                                      return;
3973
46
                                  }
3974
46
                                  auto rs_meta = rowsets.find(rowset_id);
3975
46
                                  if (rs_meta != rowsets.end() &&
3976
46
                                      !deleted_rowset_id.contains(rowset_id)) {
3977
46
                                      deleted_rowset_id.emplace(rowset_id);
3978
46
                                      metrics_context.total_recycled_data_size +=
3979
46
                                              rs_meta->second.total_disk_size();
3980
46
                                      segment_metrics_context_.total_recycled_num +=
3981
46
                                              rs_meta->second.num_segments();
3982
46
                                      segment_metrics_context_.total_recycled_data_size +=
3983
46
                                              rs_meta->second.total_disk_size();
3984
46
                                      metrics_context.total_recycled_num++;
3985
46
                                  }
3986
46
                              });
3987
46
            }
3988
46
            return ret;
3989
46
        });
3990
51
    }
3991
98
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
3992
5
        LOG_INFO(
3993
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
3994
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
3995
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
3996
5
        concurrent_delete_executor.add([&]() -> int {
3997
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3998
5
            if (!ret) {
3999
5
                auto rs = rowsets.at(rowset_id);
4000
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
4001
5
                metrics_context.total_recycled_num++;
4002
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
4003
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
4004
5
            }
4005
5
            return ret;
4006
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
3996
5
        concurrent_delete_executor.add([&]() -> int {
3997
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3998
5
            if (!ret) {
3999
5
                auto rs = rowsets.at(rowset_id);
4000
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
4001
5
                metrics_context.total_recycled_num++;
4002
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
4003
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
4004
5
            }
4005
5
            return ret;
4006
5
        });
4007
5
    }
4008
4009
98
    bool finished = true;
4010
98
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4011
98
    for (int r : rets) {
4012
56
        if (r != 0) {
4013
0
            ret = -1;
4014
0
            break;
4015
0
        }
4016
56
    }
4017
98
    ret = finished ? ret : -1;
4018
98
    return ret;
4019
98
}
4020
4021
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
4022
3.30k
                                         const std::string& rowset_id) {
4023
3.30k
    auto it = accessor_map_.find(resource_id);
4024
3.30k
    if (it == accessor_map_.end()) {
4025
400
        LOG_WARNING("instance has no such resource id")
4026
400
                .tag("instance_id", instance_id_)
4027
400
                .tag("resource_id", resource_id)
4028
400
                .tag("tablet_id", tablet_id)
4029
400
                .tag("rowset_id", rowset_id);
4030
400
        return -1;
4031
400
    }
4032
2.90k
    auto& accessor = it->second;
4033
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
4034
3.30k
}
4035
4036
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
4037
4
    if (key.empty()) {
4038
0
        return false;
4039
0
    }
4040
4
    std::string_view key_view = key;
4041
4
    key_view.remove_prefix(1); // remove keyspace prefix
4042
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
4043
4
    if (decode_key(&key_view, &decoded) != 0) {
4044
0
        return false;
4045
0
    }
4046
4
    if (decoded.size() < 4) {
4047
0
        return false;
4048
0
    }
4049
4
    try {
4050
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
4051
4
    } catch (const std::bad_variant_access&) {
4052
0
        return false;
4053
0
    }
4054
4
    return true;
4055
4
}
4056
4057
14
int InstanceRecycler::recycle_packed_files() {
4058
14
    const std::string task_name = "recycle_packed_files";
4059
14
    auto start_tp = steady_clock::now();
4060
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
4061
14
    int ret = 0;
4062
14
    PackedFileRecycleStats stats;
4063
4064
14
    register_recycle_task(task_name, start_time);
4065
14
    DORIS_CLOUD_DEFER {
4066
14
        unregister_recycle_task(task_name);
4067
14
        int64_t cost =
4068
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4069
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
4070
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
4071
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
4072
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
4073
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
4074
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
4075
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
4076
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
4077
14
                                                             stats.bytes_object_deleted);
4078
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
4079
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
4080
14
                .tag("instance_id", instance_id_)
4081
14
                .tag("num_scanned", stats.num_scanned)
4082
14
                .tag("num_corrected", stats.num_corrected)
4083
14
                .tag("num_deleted", stats.num_deleted)
4084
14
                .tag("num_failed", stats.num_failed)
4085
14
                .tag("num_objects_deleted", stats.num_object_deleted)
4086
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
4087
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
4088
14
                .tag("bytes_deleted", stats.bytes_deleted)
4089
14
                .tag("ret", ret);
4090
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
4065
14
    DORIS_CLOUD_DEFER {
4066
14
        unregister_recycle_task(task_name);
4067
14
        int64_t cost =
4068
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4069
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
4070
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
4071
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
4072
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
4073
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
4074
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
4075
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
4076
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
4077
14
                                                             stats.bytes_object_deleted);
4078
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
4079
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
4080
14
                .tag("instance_id", instance_id_)
4081
14
                .tag("num_scanned", stats.num_scanned)
4082
14
                .tag("num_corrected", stats.num_corrected)
4083
14
                .tag("num_deleted", stats.num_deleted)
4084
14
                .tag("num_failed", stats.num_failed)
4085
14
                .tag("num_objects_deleted", stats.num_object_deleted)
4086
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
4087
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
4088
14
                .tag("bytes_deleted", stats.bytes_deleted)
4089
14
                .tag("ret", ret);
4090
14
    };
4091
4092
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
4093
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
4094
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
4095
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
4092
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
4093
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
4094
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
4095
4
    };
4096
4097
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
4098
4099
14
    std::string begin = packed_file_key({instance_id_, ""});
4100
14
    std::string end = packed_file_key({instance_id_, "\xff"});
4101
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
4102
0
        ret = -1;
4103
0
    }
4104
4105
14
    return ret;
4106
14
}
4107
4108
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
4109
                                                  RecyclerMetricsContext& metrics_context,
4110
0
                                                  int64_t partition_id, bool is_empty_tablet) {
4111
0
    std::string tablet_key_begin, tablet_key_end;
4112
4113
0
    if (partition_id > 0) {
4114
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
4115
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
4116
0
    } else {
4117
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
4118
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
4119
0
    }
4120
    // for calculate the total num or bytes of recyled objects
4121
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
4122
0
                                                          std::string_view v) -> int {
4123
0
        doris::TabletMetaCloudPB tablet_meta_pb;
4124
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
4125
0
            return 0;
4126
0
        }
4127
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
4128
4129
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
4130
0
            return 0;
4131
0
        }
4132
4133
0
        if (!is_empty_tablet) {
4134
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
4135
0
                return 0;
4136
0
            }
4137
0
            tablet_metrics_context_.total_need_recycle_num++;
4138
0
        }
4139
0
        return 0;
4140
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
4141
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
4142
0
    metrics_context.report(true);
4143
0
    tablet_metrics_context_.report(true);
4144
0
    segment_metrics_context_.report(true);
4145
0
    return ret;
4146
0
}
4147
4148
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
4149
0
                                                 RecyclerMetricsContext& metrics_context) {
4150
0
    int ret = 0;
4151
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
4152
0
    std::unique_ptr<Transaction> txn;
4153
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4154
0
        LOG_WARNING("failed to recycle tablet ")
4155
0
                .tag("tablet id", tablet_id)
4156
0
                .tag("instance_id", instance_id_)
4157
0
                .tag("reason", "failed to create txn");
4158
0
        ret = -1;
4159
0
    }
4160
0
    GetRowsetResponse resp;
4161
0
    std::string msg;
4162
0
    MetaServiceCode code = MetaServiceCode::OK;
4163
    // get rowsets in tablet
4164
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4165
0
                        tablet_id, code, msg, &resp);
4166
0
    if (code != MetaServiceCode::OK) {
4167
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4168
0
                .tag("tablet id", tablet_id)
4169
0
                .tag("msg", msg)
4170
0
                .tag("code", code)
4171
0
                .tag("instance id", instance_id_);
4172
0
        ret = -1;
4173
0
    }
4174
0
    for (const auto& rs_meta : resp.rowset_meta()) {
4175
        /*
4176
        * For compatibility, we skip the loop for [0-1] here.
4177
        * The purpose of this loop is to delete object files,
4178
        * and since [0-1] only has meta and doesn't have object files,
4179
        * skipping it doesn't affect system correctness.
4180
        *
4181
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
4182
        * would return error -1 directly, causing the recycle operation to fail.
4183
        *
4184
        * [0-1] doesn't have resource id is a bug.
4185
        * In the future, we will fix this problem, after that,
4186
        * we can remove this if statement.
4187
        *
4188
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
4189
        */
4190
4191
0
        if (rs_meta.end_version() == 1) {
4192
            // Assert that [0-1] has no resource_id to make sure
4193
            // this if statement will not be forgetted to remove
4194
            // when the resource id bug is fixed
4195
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4196
0
            continue;
4197
0
        }
4198
0
        if (!rs_meta.has_resource_id()) {
4199
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4200
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4201
0
                    .tag("instance_id", instance_id_)
4202
0
                    .tag("tablet_id", tablet_id);
4203
0
            continue;
4204
0
        }
4205
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4206
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4207
        // possible if the accessor is not initilized correctly
4208
0
        if (it == accessor_map_.end()) [[unlikely]] {
4209
0
            LOG_WARNING(
4210
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4211
0
                    "recycle process")
4212
0
                    .tag("tablet id", tablet_id)
4213
0
                    .tag("instance_id", instance_id_)
4214
0
                    .tag("resource_id", rs_meta.resource_id())
4215
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4216
0
            continue;
4217
0
        }
4218
4219
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
4220
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4221
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4222
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
4223
0
    }
4224
0
    return ret;
4225
0
}
4226
4227
4.25k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
4228
4.25k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
4229
4.25k
            .tag("instance_id", instance_id_)
4230
4.25k
            .tag("tablet_id", tablet_id);
4231
4232
4.25k
    if (should_recycle_versioned_keys()) {
4233
11
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
4234
11
        if (ret != 0) {
4235
0
            return ret;
4236
0
        }
4237
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
4238
        // during the recycle_versioned_tablet process.
4239
        //
4240
        // .. And remove restore job rowsets of this tablet too
4241
11
    }
4242
4243
4.25k
    int ret = 0;
4244
4.25k
    auto start_time = steady_clock::now();
4245
4246
4.25k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4247
4248
    // collect resource ids
4249
248
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4250
248
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4251
248
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4252
248
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4253
248
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4254
248
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4255
4256
248
    std::set<std::string> resource_ids;
4257
248
    int64_t recycle_rowsets_number = 0;
4258
248
    int64_t recycle_segments_number = 0;
4259
248
    int64_t recycle_rowsets_data_size = 0;
4260
248
    int64_t recycle_rowsets_index_size = 0;
4261
248
    int64_t recycle_restore_job_rowsets_number = 0;
4262
248
    int64_t recycle_restore_job_segments_number = 0;
4263
248
    int64_t recycle_restore_job_rowsets_data_size = 0;
4264
248
    int64_t recycle_restore_job_rowsets_index_size = 0;
4265
248
    int64_t max_rowset_version = 0;
4266
248
    int64_t min_rowset_creation_time = INT64_MAX;
4267
248
    int64_t max_rowset_creation_time = 0;
4268
248
    int64_t min_rowset_expiration_time = INT64_MAX;
4269
248
    int64_t max_rowset_expiration_time = 0;
4270
4271
248
    DORIS_CLOUD_DEFER {
4272
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4273
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4274
248
                .tag("instance_id", instance_id_)
4275
248
                .tag("tablet_id", tablet_id)
4276
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4277
248
                .tag("recycle segments number", recycle_segments_number)
4278
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4279
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4280
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4281
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4282
248
                .tag("all restore job rowsets recycle data size",
4283
248
                     recycle_restore_job_rowsets_data_size)
4284
248
                .tag("all restore job rowsets recycle index size",
4285
248
                     recycle_restore_job_rowsets_index_size)
4286
248
                .tag("max rowset version", max_rowset_version)
4287
248
                .tag("min rowset creation time", min_rowset_creation_time)
4288
248
                .tag("max rowset creation time", max_rowset_creation_time)
4289
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4290
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4291
248
                .tag("task type", metrics_context.operation_type)
4292
248
                .tag("ret", ret);
4293
248
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4271
248
    DORIS_CLOUD_DEFER {
4272
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4273
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4274
248
                .tag("instance_id", instance_id_)
4275
248
                .tag("tablet_id", tablet_id)
4276
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4277
248
                .tag("recycle segments number", recycle_segments_number)
4278
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4279
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4280
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4281
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4282
248
                .tag("all restore job rowsets recycle data size",
4283
248
                     recycle_restore_job_rowsets_data_size)
4284
248
                .tag("all restore job rowsets recycle index size",
4285
248
                     recycle_restore_job_rowsets_index_size)
4286
248
                .tag("max rowset version", max_rowset_version)
4287
248
                .tag("min rowset creation time", min_rowset_creation_time)
4288
248
                .tag("max rowset creation time", max_rowset_creation_time)
4289
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4290
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4291
248
                .tag("task type", metrics_context.operation_type)
4292
248
                .tag("ret", ret);
4293
248
    };
4294
4295
248
    std::unique_ptr<Transaction> txn;
4296
248
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4297
0
        LOG_WARNING("failed to recycle tablet ")
4298
0
                .tag("tablet id", tablet_id)
4299
0
                .tag("instance_id", instance_id_)
4300
0
                .tag("reason", "failed to create txn");
4301
0
        ret = -1;
4302
0
    }
4303
248
    GetRowsetResponse resp;
4304
248
    std::string msg;
4305
248
    MetaServiceCode code = MetaServiceCode::OK;
4306
    // get rowsets in tablet
4307
248
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4308
248
                        tablet_id, code, msg, &resp);
4309
248
    if (code != MetaServiceCode::OK) {
4310
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4311
0
                .tag("tablet id", tablet_id)
4312
0
                .tag("msg", msg)
4313
0
                .tag("code", code)
4314
0
                .tag("instance id", instance_id_);
4315
0
        ret = -1;
4316
0
    }
4317
248
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
4318
4319
2.51k
    for (const auto& rs_meta : resp.rowset_meta()) {
4320
        // The rowset has no resource id and segments when it was generated by compaction
4321
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
4322
2.51k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
4323
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
4324
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4325
0
                    .tag("instance_id", instance_id_)
4326
0
                    .tag("tablet_id", tablet_id);
4327
0
            recycle_rowsets_number += 1;
4328
0
            continue;
4329
0
        }
4330
2.51k
        if (!rs_meta.has_resource_id()) {
4331
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4332
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
4333
1
                    .tag("instance_id", instance_id_)
4334
1
                    .tag("tablet_id", tablet_id);
4335
1
            return -1;
4336
1
        }
4337
18.4E
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4338
2.51k
        auto it = accessor_map_.find(rs_meta.resource_id());
4339
        // possible if the accessor is not initilized correctly
4340
2.51k
        if (it == accessor_map_.end()) [[unlikely]] {
4341
1
            LOG_WARNING(
4342
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4343
1
                    "recycle process")
4344
1
                    .tag("tablet id", tablet_id)
4345
1
                    .tag("instance_id", instance_id_)
4346
1
                    .tag("resource_id", rs_meta.resource_id())
4347
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4348
1
            return -1;
4349
1
        }
4350
2.51k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4351
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
4352
0
                    .tag("instance_id", instance_id_)
4353
0
                    .tag("tablet_id", tablet_id)
4354
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4355
0
            return -1;
4356
0
        }
4357
2.51k
        recycle_rowsets_number += 1;
4358
2.51k
        recycle_segments_number += rs_meta.num_segments();
4359
2.51k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4360
2.51k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4361
2.51k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4362
2.51k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4363
2.51k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4364
2.51k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4365
2.51k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4366
2.51k
        resource_ids.emplace(rs_meta.resource_id());
4367
2.51k
    }
4368
4369
    // get restore job rowset in tablet
4370
246
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
4371
246
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
4372
246
    if (code != MetaServiceCode::OK) {
4373
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
4374
0
                .tag("tablet id", tablet_id)
4375
0
                .tag("msg", msg)
4376
0
                .tag("code", code)
4377
0
                .tag("instance id", instance_id_);
4378
0
        return -1;
4379
0
    }
4380
4381
246
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
4382
0
        if (!rs_meta.has_resource_id()) {
4383
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4384
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4385
0
                    .tag("instance_id", instance_id_)
4386
0
                    .tag("tablet_id", tablet_id);
4387
0
            return -1;
4388
0
        }
4389
4390
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4391
        // possible if the accessor is not initilized correctly
4392
0
        if (it == accessor_map_.end()) [[unlikely]] {
4393
0
            LOG_WARNING(
4394
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4395
0
                    "recycle process")
4396
0
                    .tag("tablet id", tablet_id)
4397
0
                    .tag("instance_id", instance_id_)
4398
0
                    .tag("resource_id", rs_meta.resource_id())
4399
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4400
0
            return -1;
4401
0
        }
4402
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4403
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
4404
0
                    .tag("instance_id", instance_id_)
4405
0
                    .tag("tablet_id", tablet_id)
4406
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4407
0
            return -1;
4408
0
        }
4409
0
        recycle_restore_job_rowsets_number += 1;
4410
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
4411
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
4412
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
4413
0
        resource_ids.emplace(rs_meta.resource_id());
4414
0
    }
4415
4416
246
    LOG_INFO("recycle tablet start to delete object")
4417
246
            .tag("instance id", instance_id_)
4418
246
            .tag("tablet id", tablet_id)
4419
246
            .tag("recycle tablet resource ids are",
4420
246
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
4421
246
                                 [](std::string rs_id, const auto& it) {
4422
205
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4423
205
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
4421
205
                                 [](std::string rs_id, const auto& it) {
4422
205
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4423
205
                                 }));
4424
4425
246
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
4426
246
            _thread_pool_group.s3_producer_pool,
4427
246
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4428
246
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
4428
206
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
4429
4430
    // delete all rowset data in this tablet
4431
    // ATTN: there may be data leak if not all accessor initilized successfully
4432
    //       partial data deleted if the tablet is stored cross-storage vault
4433
    //       vault id is not attached to TabletMeta...
4434
246
    for (const auto& resource_id : resource_ids) {
4435
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
4436
206
        concurrent_delete_executor.add(
4437
206
                [&, rs_id = resource_id,
4438
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4439
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4440
206
                    if (res != 0) {
4441
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4442
2
                                     << " path=" << accessor_ptr->uri()
4443
2
                                     << " task type=" << metrics_context.operation_type;
4444
2
                        return std::make_pair(-1, rs_id);
4445
2
                    }
4446
204
                    return std::make_pair(0, rs_id);
4447
206
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
4438
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4439
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4440
206
                    if (res != 0) {
4441
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4442
2
                                     << " path=" << accessor_ptr->uri()
4443
2
                                     << " task type=" << metrics_context.operation_type;
4444
2
                        return std::make_pair(-1, rs_id);
4445
2
                    }
4446
204
                    return std::make_pair(0, rs_id);
4447
206
                });
4448
206
    }
4449
4450
246
    bool finished = true;
4451
246
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
4452
246
    for (auto& r : rets) {
4453
206
        if (r.first != 0) {
4454
2
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
4455
2
            ret = -1;
4456
2
        }
4457
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
4458
206
    }
4459
246
    ret = finished ? ret : -1;
4460
4461
246
    if (ret != 0) { // failed recycle tablet data
4462
2
        LOG_WARNING("ret!=0")
4463
2
                .tag("finished", finished)
4464
2
                .tag("ret", ret)
4465
2
                .tag("instance_id", instance_id_)
4466
2
                .tag("tablet_id", tablet_id);
4467
2
        return ret;
4468
2
    }
4469
4470
244
    tablet_metrics_context_.total_recycled_data_size +=
4471
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4472
244
    tablet_metrics_context_.total_recycled_num += 1;
4473
244
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4474
244
    segment_metrics_context_.total_recycled_data_size +=
4475
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4476
244
    metrics_context.total_recycled_data_size +=
4477
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4478
244
    tablet_metrics_context_.report();
4479
244
    segment_metrics_context_.report();
4480
244
    metrics_context.report();
4481
4482
244
    txn.reset();
4483
244
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4484
0
        LOG_WARNING("failed to recycle tablet ")
4485
0
                .tag("tablet id", tablet_id)
4486
0
                .tag("instance_id", instance_id_)
4487
0
                .tag("reason", "failed to create txn");
4488
0
        ret = -1;
4489
0
    }
4490
    // delete all rowset kv in this tablet
4491
244
    txn->remove(rs_key0, rs_key1);
4492
244
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4493
244
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4494
4495
    // remove delete bitmap for MoW table
4496
244
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4497
244
    txn->remove(pending_key);
4498
244
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4499
244
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4500
244
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4501
4502
244
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4503
244
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4504
244
    txn->remove(dbm_start_key, dbm_end_key);
4505
244
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4506
244
              << " end=" << hex(dbm_end_key);
4507
4508
244
    TxnErrorCode err = txn->commit();
4509
244
    if (err != TxnErrorCode::TXN_OK) {
4510
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4511
0
        ret = -1;
4512
0
    }
4513
4514
244
    if (ret == 0) {
4515
        // All object files under tablet have been deleted
4516
244
        std::lock_guard lock(recycled_tablets_mtx_);
4517
244
        recycled_tablets_.insert(tablet_id);
4518
244
    }
4519
4520
244
    return ret;
4521
246
}
4522
4523
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
4524
11
                                               RecyclerMetricsContext& metrics_context) {
4525
11
    int ret = 0;
4526
11
    auto start_time = steady_clock::now();
4527
4528
11
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4529
4530
    // collect resource ids
4531
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4532
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4533
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4534
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4535
4536
11
    int64_t recycle_rowsets_number = 0;
4537
11
    int64_t recycle_segments_number = 0;
4538
11
    int64_t recycle_rowsets_data_size = 0;
4539
11
    int64_t recycle_rowsets_index_size = 0;
4540
11
    int64_t max_rowset_version = 0;
4541
11
    int64_t min_rowset_creation_time = INT64_MAX;
4542
11
    int64_t max_rowset_creation_time = 0;
4543
11
    int64_t min_rowset_expiration_time = INT64_MAX;
4544
11
    int64_t max_rowset_expiration_time = 0;
4545
4546
11
    DORIS_CLOUD_DEFER {
4547
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4548
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4549
11
                .tag("instance_id", instance_id_)
4550
11
                .tag("tablet_id", tablet_id)
4551
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4552
11
                .tag("recycle segments number", recycle_segments_number)
4553
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4554
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4555
11
                .tag("max rowset version", max_rowset_version)
4556
11
                .tag("min rowset creation time", min_rowset_creation_time)
4557
11
                .tag("max rowset creation time", max_rowset_creation_time)
4558
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4559
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4560
11
                .tag("ret", ret);
4561
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4546
11
    DORIS_CLOUD_DEFER {
4547
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4548
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4549
11
                .tag("instance_id", instance_id_)
4550
11
                .tag("tablet_id", tablet_id)
4551
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4552
11
                .tag("recycle segments number", recycle_segments_number)
4553
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4554
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4555
11
                .tag("max rowset version", max_rowset_version)
4556
11
                .tag("min rowset creation time", min_rowset_creation_time)
4557
11
                .tag("max rowset creation time", max_rowset_creation_time)
4558
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4559
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4560
11
                .tag("ret", ret);
4561
11
    };
4562
4563
11
    std::unique_ptr<Transaction> txn;
4564
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4565
0
        LOG_WARNING("failed to recycle tablet ")
4566
0
                .tag("tablet id", tablet_id)
4567
0
                .tag("instance_id", instance_id_)
4568
0
                .tag("reason", "failed to create txn");
4569
0
        ret = -1;
4570
0
    }
4571
4572
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
4573
    // by the related operation logs.
4574
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
4575
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
4576
11
    MetaReader meta_reader(instance_id_);
4577
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
4578
11
    if (err == TxnErrorCode::TXN_OK) {
4579
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
4580
11
    }
4581
11
    if (err != TxnErrorCode::TXN_OK) {
4582
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4583
0
                .tag("tablet id", tablet_id)
4584
0
                .tag("err", err)
4585
0
                .tag("instance id", instance_id_);
4586
0
        ret = -1;
4587
0
    }
4588
4589
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
4590
11
             load_rowset_metas.size(), compact_rowset_metas.size())
4591
11
            .tag("instance_id", instance_id_)
4592
11
            .tag("tablet_id", tablet_id);
4593
4594
11
    SyncExecutor<int> concurrent_delete_executor(
4595
11
            _thread_pool_group.s3_producer_pool,
4596
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4597
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
4598
4599
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4600
60
        recycle_rowsets_number += 1;
4601
60
        recycle_segments_number += rs_meta.num_segments();
4602
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4603
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4604
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4605
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4606
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4607
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4608
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4609
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
4599
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4600
60
        recycle_rowsets_number += 1;
4601
60
        recycle_segments_number += rs_meta.num_segments();
4602
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4603
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4604
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4605
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4606
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4607
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4608
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4609
60
    };
4610
4611
11
    std::vector<RowsetDeleteTask> all_tasks;
4612
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
4613
60
        update_rowset_stats(rs_meta);
4614
        // Version 0-1 rowset has no resource_id and no actual data files,
4615
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4616
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4617
60
        RowsetDeleteTask task;
4618
60
        task.rowset_meta = rs_meta;
4619
60
        task.versioned_rowset_key =
4620
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
4621
60
        task.non_versioned_rowset_key =
4622
60
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4623
60
        task.versionstamp = versionstamp;
4624
60
        all_tasks.push_back(std::move(task));
4625
60
    }
4626
4627
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
4628
0
        update_rowset_stats(rs_meta);
4629
        // Version 0-1 rowset has no resource_id and no actual data files,
4630
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4631
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4632
0
        RowsetDeleteTask task;
4633
0
        task.rowset_meta = rs_meta;
4634
0
        task.versioned_rowset_key = versioned::meta_rowset_compact_key(
4635
0
                {instance_id_, tablet_id, rs_meta.end_version()});
4636
0
        task.non_versioned_rowset_key =
4637
0
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4638
0
        task.versionstamp = versionstamp;
4639
0
        all_tasks.push_back(std::move(task));
4640
0
    }
4641
4642
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
4643
0
        RecycleRowsetPB recycle_rowset;
4644
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
4645
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4646
0
            return -1;
4647
0
        }
4648
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
4649
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
4650
                // in old version, keep this key-value pair and it needs to be checked manually
4651
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4652
0
                return -1;
4653
0
            }
4654
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
4655
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4656
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4657
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
4658
0
                return -1;
4659
0
            }
4660
            // decode rowset_id
4661
0
            auto k1 = k;
4662
0
            k1.remove_prefix(1);
4663
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4664
0
            decode_key(&k1, &out);
4665
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4666
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4667
0
            LOG_INFO("delete old-version rowset data")
4668
0
                    .tag("instance_id", instance_id_)
4669
0
                    .tag("tablet_id", tablet_id)
4670
0
                    .tag("rowset_id", rowset_id);
4671
4672
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
4673
            // so we must use prefix deletion directly instead of batch delete.
4674
0
            concurrent_delete_executor.add(
4675
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
4676
                        // delete by prefix, the recycle rowset key will be deleted by range later.
4677
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
4678
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
4679
0
        } else {
4680
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
4681
            // Version 0-1 rowset has no resource_id and no actual data files,
4682
            // but still needs ref_count key cleanup, so we add it to all_tasks.
4683
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
4684
0
            RowsetDeleteTask task;
4685
0
            task.rowset_meta = rowset_meta;
4686
0
            task.recycle_rowset_key = k;
4687
0
            all_tasks.push_back(std::move(task));
4688
0
        }
4689
0
        return 0;
4690
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
4691
4692
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
4693
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
4694
0
                .tag("tablet id", tablet_id)
4695
0
                .tag("instance_id", instance_id_)
4696
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
4697
0
        ret = -1;
4698
0
    }
4699
4700
    // Phase 1: Classify tasks by ref_count
4701
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
4702
60
    for (auto& task : all_tasks) {
4703
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
4704
60
        if (classify_ret < 0) {
4705
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
4706
0
                    .tag("instance_id", instance_id_)
4707
0
                    .tag("tablet_id", tablet_id)
4708
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4709
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
4710
0
                return recycle_rowset_meta_and_data(t);
4711
0
            });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
4712
0
        }
4713
60
    }
4714
4715
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
4716
4717
11
    LOG_INFO("batch delete plan created")
4718
11
            .tag("instance_id", instance_id_)
4719
11
            .tag("tablet_id", tablet_id)
4720
11
            .tag("plan_count", batch_delete_tasks.size());
4721
4722
    // Phase 2: Execute batch delete using existing delete_rowset_data
4723
11
    if (!batch_delete_tasks.empty()) {
4724
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
4725
49
        for (const auto& task : batch_delete_tasks) {
4726
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
4727
49
            if (task.rowset_meta.resource_id().empty()) {
4728
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
4729
10
                        .tag("instance_id", instance_id_)
4730
10
                        .tag("tablet_id", tablet_id)
4731
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4732
10
                continue;
4733
10
            }
4734
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
4735
39
        }
4736
4737
        // Only call delete_rowset_data if there are rowsets with actual data to delete
4738
10
        bool delete_success = true;
4739
10
        if (!rowsets_to_delete.empty()) {
4740
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
4741
9
                                                         "batch_delete_versioned_tablet");
4742
9
            int delete_ret = delete_rowset_data(
4743
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
4744
9
            if (delete_ret != 0) {
4745
0
                LOG_WARNING("batch delete execution failed")
4746
0
                        .tag("instance_id", instance_id_)
4747
0
                        .tag("tablet_id", tablet_id);
4748
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
4749
0
                ret = -1;
4750
0
                delete_success = false;
4751
0
            }
4752
9
        }
4753
4754
        // Phase 3: Only cleanup metadata if data deletion succeeded.
4755
        // If deletion failed, keep recycle_rowset_key so next round will retry.
4756
10
        if (delete_success) {
4757
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
4758
10
            if (cleanup_ret != 0) {
4759
0
                LOG_WARNING("batch delete cleanup failed")
4760
0
                        .tag("instance_id", instance_id_)
4761
0
                        .tag("tablet_id", tablet_id);
4762
0
                ret = -1;
4763
0
            }
4764
10
        }
4765
10
    }
4766
4767
    // Always wait for fallback tasks to complete before returning
4768
11
    bool finished = true;
4769
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4770
11
    for (int r : rets) {
4771
0
        if (r != 0) {
4772
0
            ret = -1;
4773
0
        }
4774
0
    }
4775
4776
11
    ret = finished ? ret : -1;
4777
4778
11
    if (ret != 0) { // failed recycle tablet data
4779
0
        LOG_WARNING("recycle versioned tablet failed")
4780
0
                .tag("finished", finished)
4781
0
                .tag("ret", ret)
4782
0
                .tag("instance_id", instance_id_)
4783
0
                .tag("tablet_id", tablet_id);
4784
0
        return ret;
4785
0
    }
4786
4787
11
    tablet_metrics_context_.total_recycled_data_size +=
4788
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4789
11
    tablet_metrics_context_.total_recycled_num += 1;
4790
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4791
11
    segment_metrics_context_.total_recycled_data_size +=
4792
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4793
11
    metrics_context.total_recycled_data_size +=
4794
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4795
11
    tablet_metrics_context_.report();
4796
11
    segment_metrics_context_.report();
4797
11
    metrics_context.report();
4798
4799
11
    txn.reset();
4800
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4801
0
        LOG_WARNING("failed to recycle tablet ")
4802
0
                .tag("tablet id", tablet_id)
4803
0
                .tag("instance_id", instance_id_)
4804
0
                .tag("reason", "failed to create txn");
4805
0
        ret = -1;
4806
0
    }
4807
    // delete all rowset kv in this tablet
4808
11
    txn->remove(rs_key0, rs_key1);
4809
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4810
4811
    // remove delete bitmap for MoW table
4812
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4813
11
    txn->remove(pending_key);
4814
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4815
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4816
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4817
4818
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4819
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4820
11
    txn->remove(dbm_start_key, dbm_end_key);
4821
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4822
11
              << " end=" << hex(dbm_end_key);
4823
4824
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
4825
11
    std::string tablet_index_val;
4826
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
4827
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
4828
0
        LOG_WARNING("failed to get tablet index kv")
4829
0
                .tag("instance_id", instance_id_)
4830
0
                .tag("tablet_id", tablet_id)
4831
0
                .tag("err", err);
4832
0
        ret = -1;
4833
11
    } else if (err == TxnErrorCode::TXN_OK) {
4834
        // If the tablet index kv exists, we need to delete it
4835
10
        TabletIndexPB tablet_index_pb;
4836
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
4837
0
            LOG_WARNING("failed to parse tablet index pb")
4838
0
                    .tag("instance_id", instance_id_)
4839
0
                    .tag("tablet_id", tablet_id);
4840
0
            ret = -1;
4841
10
        } else {
4842
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
4843
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
4844
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
4845
10
            txn->remove(versioned_inverted_idx_key);
4846
10
            txn->remove(versioned_idx_key);
4847
10
        }
4848
10
    }
4849
4850
11
    err = txn->commit();
4851
11
    if (err != TxnErrorCode::TXN_OK) {
4852
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4853
0
        ret = -1;
4854
0
    }
4855
4856
11
    if (ret == 0) {
4857
        // All object files under tablet have been deleted
4858
11
        std::lock_guard lock(recycled_tablets_mtx_);
4859
11
        recycled_tablets_.insert(tablet_id);
4860
11
    }
4861
4862
11
    return ret;
4863
11
}
4864
4865
27
int InstanceRecycler::recycle_rowsets() {
4866
27
    if (should_recycle_versioned_keys()) {
4867
5
        return recycle_versioned_rowsets();
4868
5
    }
4869
4870
22
    const std::string task_name = "recycle_rowsets";
4871
22
    int64_t num_scanned = 0;
4872
22
    int64_t num_expired = 0;
4873
22
    int64_t num_prepare = 0;
4874
22
    int64_t num_compacted = 0;
4875
22
    int64_t num_empty_rowset = 0;
4876
22
    size_t total_rowset_key_size = 0;
4877
22
    size_t total_rowset_value_size = 0;
4878
22
    size_t expired_rowset_size = 0;
4879
22
    std::atomic_long num_recycled = 0;
4880
22
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4881
4882
22
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4883
22
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4884
22
    std::string recyc_rs_key0;
4885
22
    std::string recyc_rs_key1;
4886
22
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4887
22
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4888
4889
22
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4890
4891
22
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4892
22
    register_recycle_task(task_name, start_time);
4893
4894
22
    DORIS_CLOUD_DEFER {
4895
22
        unregister_recycle_task(task_name);
4896
22
        int64_t cost =
4897
22
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4898
22
        metrics_context.finish_report();
4899
22
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4900
22
                .tag("instance_id", instance_id_)
4901
22
                .tag("num_scanned", num_scanned)
4902
22
                .tag("num_expired", num_expired)
4903
22
                .tag("num_recycled", num_recycled)
4904
22
                .tag("num_recycled.prepare", num_prepare)
4905
22
                .tag("num_recycled.compacted", num_compacted)
4906
22
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4907
22
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4908
22
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4909
22
                .tag("expired_rowset_meta_size", expired_rowset_size);
4910
22
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4894
7
    DORIS_CLOUD_DEFER {
4895
7
        unregister_recycle_task(task_name);
4896
7
        int64_t cost =
4897
7
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4898
7
        metrics_context.finish_report();
4899
7
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4900
7
                .tag("instance_id", instance_id_)
4901
7
                .tag("num_scanned", num_scanned)
4902
7
                .tag("num_expired", num_expired)
4903
7
                .tag("num_recycled", num_recycled)
4904
7
                .tag("num_recycled.prepare", num_prepare)
4905
7
                .tag("num_recycled.compacted", num_compacted)
4906
7
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4907
7
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4908
7
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4909
7
                .tag("expired_rowset_meta_size", expired_rowset_size);
4910
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4894
15
    DORIS_CLOUD_DEFER {
4895
15
        unregister_recycle_task(task_name);
4896
15
        int64_t cost =
4897
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4898
15
        metrics_context.finish_report();
4899
15
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4900
15
                .tag("instance_id", instance_id_)
4901
15
                .tag("num_scanned", num_scanned)
4902
15
                .tag("num_expired", num_expired)
4903
15
                .tag("num_recycled", num_recycled)
4904
15
                .tag("num_recycled.prepare", num_prepare)
4905
15
                .tag("num_recycled.compacted", num_compacted)
4906
15
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4907
15
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4908
15
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4909
15
                .tag("expired_rowset_meta_size", expired_rowset_size);
4910
15
    };
4911
4912
22
    std::vector<std::string> rowset_keys;
4913
22
    std::vector<std::string> rowset_keys_to_mark_recycled;
4914
22
    std::vector<std::string> rowset_keys_to_abort;
4915
22
    std::vector<std::string> prepare_rowset_keys_to_delete;
4916
    // rowset_id -> rowset_meta
4917
    // store rowset id and meta for statistics rs size when delete
4918
22
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
4919
4920
    // Store keys of rowset recycled by background workers
4921
22
    std::mutex async_recycled_rowset_keys_mutex;
4922
22
    std::vector<std::string> async_recycled_rowset_keys;
4923
22
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4924
22
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4925
22
    worker_pool->start();
4926
    // TODO bacth delete
4927
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4928
4.00k
        std::string dbm_start_key =
4929
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4930
4.00k
        std::string dbm_end_key = dbm_start_key;
4931
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4932
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4933
4.00k
        if (ret != 0) {
4934
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4935
0
                         << instance_id_;
4936
0
        }
4937
4.00k
        return ret;
4938
4.00k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4927
2
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4928
2
        std::string dbm_start_key =
4929
2
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4930
2
        std::string dbm_end_key = dbm_start_key;
4931
2
        encode_int64(INT64_MAX, &dbm_end_key);
4932
2
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4933
2
        if (ret != 0) {
4934
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4935
0
                         << instance_id_;
4936
0
        }
4937
2
        return ret;
4938
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4927
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4928
4.00k
        std::string dbm_start_key =
4929
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4930
4.00k
        std::string dbm_end_key = dbm_start_key;
4931
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4932
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4933
4.00k
        if (ret != 0) {
4934
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4935
0
                         << instance_id_;
4936
0
        }
4937
4.00k
        return ret;
4938
4.00k
    };
4939
22
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4940
250
                                            int64_t tablet_id, const std::string& rowset_id) {
4941
        // Try to delete rowset data in background thread
4942
250
        int ret = worker_pool->submit_with_timeout(
4943
250
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4944
245
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4945
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4946
0
                        return;
4947
0
                    }
4948
245
                    std::vector<std::string> keys;
4949
245
                    {
4950
245
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4951
245
                        async_recycled_rowset_keys.push_back(std::move(key));
4952
245
                        if (async_recycled_rowset_keys.size() > 100) {
4953
2
                            keys.swap(async_recycled_rowset_keys);
4954
2
                        }
4955
245
                    }
4956
245
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4957
245
                    if (keys.empty()) return;
4958
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4959
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4960
0
                                     << instance_id_;
4961
2
                    } else {
4962
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4963
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4964
2
                                           num_recycled, start_time);
4965
2
                    }
4966
2
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4943
245
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4944
245
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4945
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4946
0
                        return;
4947
0
                    }
4948
245
                    std::vector<std::string> keys;
4949
245
                    {
4950
245
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4951
245
                        async_recycled_rowset_keys.push_back(std::move(key));
4952
245
                        if (async_recycled_rowset_keys.size() > 100) {
4953
2
                            keys.swap(async_recycled_rowset_keys);
4954
2
                        }
4955
245
                    }
4956
245
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4957
245
                    if (keys.empty()) return;
4958
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4959
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4960
0
                                     << instance_id_;
4961
2
                    } else {
4962
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4963
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4964
2
                                           num_recycled, start_time);
4965
2
                    }
4966
2
                },
4967
250
                0);
4968
250
        if (ret == 0) return 0;
4969
        // Submit task failed, delete rowset data in current thread
4970
5
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4971
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4972
0
            return -1;
4973
0
        }
4974
5
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4975
0
            return -1;
4976
0
        }
4977
5
        rowset_keys.push_back(std::move(key));
4978
5
        return 0;
4979
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4940
250
                                            int64_t tablet_id, const std::string& rowset_id) {
4941
        // Try to delete rowset data in background thread
4942
250
        int ret = worker_pool->submit_with_timeout(
4943
250
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4944
250
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4945
250
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4946
250
                        return;
4947
250
                    }
4948
250
                    std::vector<std::string> keys;
4949
250
                    {
4950
250
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4951
250
                        async_recycled_rowset_keys.push_back(std::move(key));
4952
250
                        if (async_recycled_rowset_keys.size() > 100) {
4953
250
                            keys.swap(async_recycled_rowset_keys);
4954
250
                        }
4955
250
                    }
4956
250
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4957
250
                    if (keys.empty()) return;
4958
250
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4959
250
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4960
250
                                     << instance_id_;
4961
250
                    } else {
4962
250
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4963
250
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4964
250
                                           num_recycled, start_time);
4965
250
                    }
4966
250
                },
4967
250
                0);
4968
250
        if (ret == 0) return 0;
4969
        // Submit task failed, delete rowset data in current thread
4970
5
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4971
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4972
0
            return -1;
4973
0
        }
4974
5
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4975
0
            return -1;
4976
0
        }
4977
5
        rowset_keys.push_back(std::move(key));
4978
5
        return 0;
4979
5
    };
4980
4981
22
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4982
4983
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4984
7.75k
        ++num_scanned;
4985
7.75k
        total_rowset_key_size += k.size();
4986
7.75k
        total_rowset_value_size += v.size();
4987
7.75k
        RecycleRowsetPB rowset;
4988
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4989
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4990
0
            return -1;
4991
0
        }
4992
4993
7.75k
        int64_t current_time = ::time(nullptr);
4994
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4995
4996
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4997
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4998
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4999
7.75k
        if (current_time < expiration) { // not expired
5000
0
            return 0;
5001
0
        }
5002
7.75k
        ++num_expired;
5003
7.75k
        expired_rowset_size += v.size();
5004
5005
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5006
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5007
                // in old version, keep this key-value pair and it needs to be checked manually
5008
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5009
0
                return -1;
5010
0
            }
5011
250
            if (rowset.resource_id().empty()) [[unlikely]] {
5012
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5013
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5014
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5015
0
                rowset_keys.emplace_back(k);
5016
0
                return -1;
5017
0
            }
5018
            // decode rowset_id
5019
250
            auto k1 = k;
5020
250
            k1.remove_prefix(1);
5021
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5022
250
            decode_key(&k1, &out);
5023
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5024
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5025
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5026
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5027
250
                      << " task_type=" << metrics_context.operation_type;
5028
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5029
250
                                             rowset.tablet_id(), rowset_id) != 0) {
5030
0
                return -1;
5031
0
            }
5032
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5033
250
            metrics_context.total_recycled_num++;
5034
250
            segment_metrics_context_.total_recycled_data_size +=
5035
250
                    rowset.rowset_meta().total_disk_size();
5036
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5037
250
            return 0;
5038
250
        }
5039
5040
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
5041
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
5042
7.50k
            if (need_mark_rowset_as_recycled(rowset)) {
5043
3.75k
                rowset_keys_to_mark_recycled.emplace_back(k);
5044
3.75k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5045
3.75k
                             "at next turn, instance_id="
5046
3.75k
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5047
3.75k
                          << " version=[" << rowset_meta->start_version() << '-'
5048
3.75k
                          << rowset_meta->end_version() << "]";
5049
3.75k
                return 0;
5050
3.75k
            }
5051
7.50k
        }
5052
5053
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5054
3.75k
            rowset_meta->end_version() != 1) {
5055
3.75k
            if (make_deferred_abort_task(rowset).has_value()) {
5056
2
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5057
2
                             "instance_id="
5058
2
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5059
2
                          << " version=[" << rowset_meta->start_version() << '-'
5060
2
                          << rowset_meta->end_version() << "]";
5061
2
                rowset_keys_to_abort.emplace_back(k);
5062
2
            }
5063
3.75k
        }
5064
5065
        // TODO(plat1ko): check rowset not referenced
5066
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5067
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5068
0
                LOG_INFO("recycle rowset that has empty resource id");
5069
0
            } else {
5070
                // other situations, keep this key-value pair and it needs to be checked manually
5071
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5072
0
                return -1;
5073
0
            }
5074
0
        }
5075
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5076
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
5077
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5078
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5079
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
5080
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5081
3.75k
                  << " rowset_meta_size=" << v.size()
5082
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
5083
3.75k
                  << " task_type=" << metrics_context.operation_type;
5084
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5085
            // unable to calculate file path, can only be deleted by rowset id prefix
5086
652
            num_prepare += 1;
5087
652
            prepare_rowset_keys_to_delete.emplace_back(k);
5088
3.10k
        } else {
5089
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5090
3.10k
            rowset_keys.emplace_back(k);
5091
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
5092
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
5093
3.10k
                ++num_empty_rowset;
5094
3.10k
            }
5095
3.10k
        }
5096
3.75k
        return 0;
5097
3.75k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4983
7
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4984
7
        ++num_scanned;
4985
7
        total_rowset_key_size += k.size();
4986
7
        total_rowset_value_size += v.size();
4987
7
        RecycleRowsetPB rowset;
4988
7
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4989
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4990
0
            return -1;
4991
0
        }
4992
4993
7
        int64_t current_time = ::time(nullptr);
4994
7
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4995
4996
7
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4997
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4998
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4999
7
        if (current_time < expiration) { // not expired
5000
0
            return 0;
5001
0
        }
5002
7
        ++num_expired;
5003
7
        expired_rowset_size += v.size();
5004
5005
7
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5006
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5007
                // in old version, keep this key-value pair and it needs to be checked manually
5008
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5009
0
                return -1;
5010
0
            }
5011
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5012
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5013
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5014
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5015
0
                rowset_keys.emplace_back(k);
5016
0
                return -1;
5017
0
            }
5018
            // decode rowset_id
5019
0
            auto k1 = k;
5020
0
            k1.remove_prefix(1);
5021
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5022
0
            decode_key(&k1, &out);
5023
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5024
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5025
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5026
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5027
0
                      << " task_type=" << metrics_context.operation_type;
5028
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5029
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5030
0
                return -1;
5031
0
            }
5032
0
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5033
0
            metrics_context.total_recycled_num++;
5034
0
            segment_metrics_context_.total_recycled_data_size +=
5035
0
                    rowset.rowset_meta().total_disk_size();
5036
0
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5037
0
            return 0;
5038
0
        }
5039
5040
7
        auto* rowset_meta = rowset.mutable_rowset_meta();
5041
7
        if (config::enable_mark_delete_rowset_before_recycle) {
5042
7
            if (need_mark_rowset_as_recycled(rowset)) {
5043
5
                rowset_keys_to_mark_recycled.emplace_back(k);
5044
5
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5045
5
                             "at next turn, instance_id="
5046
5
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5047
5
                          << " version=[" << rowset_meta->start_version() << '-'
5048
5
                          << rowset_meta->end_version() << "]";
5049
5
                return 0;
5050
5
            }
5051
7
        }
5052
5053
2
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5054
2
            rowset_meta->end_version() != 1) {
5055
2
            if (make_deferred_abort_task(rowset).has_value()) {
5056
2
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5057
2
                             "instance_id="
5058
2
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5059
2
                          << " version=[" << rowset_meta->start_version() << '-'
5060
2
                          << rowset_meta->end_version() << "]";
5061
2
                rowset_keys_to_abort.emplace_back(k);
5062
2
            }
5063
2
        }
5064
5065
        // TODO(plat1ko): check rowset not referenced
5066
2
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5067
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5068
0
                LOG_INFO("recycle rowset that has empty resource id");
5069
0
            } else {
5070
                // other situations, keep this key-value pair and it needs to be checked manually
5071
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5072
0
                return -1;
5073
0
            }
5074
0
        }
5075
2
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5076
2
                  << " tablet_id=" << rowset_meta->tablet_id()
5077
2
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5078
2
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5079
2
                  << "] txn_id=" << rowset_meta->txn_id()
5080
2
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5081
2
                  << " rowset_meta_size=" << v.size()
5082
2
                  << " creation_time=" << rowset_meta->creation_time()
5083
2
                  << " task_type=" << metrics_context.operation_type;
5084
2
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5085
            // unable to calculate file path, can only be deleted by rowset id prefix
5086
2
            num_prepare += 1;
5087
2
            prepare_rowset_keys_to_delete.emplace_back(k);
5088
2
        } else {
5089
0
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5090
0
            rowset_keys.emplace_back(k);
5091
0
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
5092
0
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
5093
0
                ++num_empty_rowset;
5094
0
            }
5095
0
        }
5096
2
        return 0;
5097
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4983
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4984
7.75k
        ++num_scanned;
4985
7.75k
        total_rowset_key_size += k.size();
4986
7.75k
        total_rowset_value_size += v.size();
4987
7.75k
        RecycleRowsetPB rowset;
4988
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4989
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4990
0
            return -1;
4991
0
        }
4992
4993
7.75k
        int64_t current_time = ::time(nullptr);
4994
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4995
4996
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4997
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4998
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4999
7.75k
        if (current_time < expiration) { // not expired
5000
0
            return 0;
5001
0
        }
5002
7.75k
        ++num_expired;
5003
7.75k
        expired_rowset_size += v.size();
5004
5005
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5006
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5007
                // in old version, keep this key-value pair and it needs to be checked manually
5008
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5009
0
                return -1;
5010
0
            }
5011
250
            if (rowset.resource_id().empty()) [[unlikely]] {
5012
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5013
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5014
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5015
0
                rowset_keys.emplace_back(k);
5016
0
                return -1;
5017
0
            }
5018
            // decode rowset_id
5019
250
            auto k1 = k;
5020
250
            k1.remove_prefix(1);
5021
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5022
250
            decode_key(&k1, &out);
5023
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5024
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5025
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5026
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
5027
250
                      << " task_type=" << metrics_context.operation_type;
5028
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5029
250
                                             rowset.tablet_id(), rowset_id) != 0) {
5030
0
                return -1;
5031
0
            }
5032
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
5033
250
            metrics_context.total_recycled_num++;
5034
250
            segment_metrics_context_.total_recycled_data_size +=
5035
250
                    rowset.rowset_meta().total_disk_size();
5036
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
5037
250
            return 0;
5038
250
        }
5039
5040
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
5041
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
5042
7.50k
            if (need_mark_rowset_as_recycled(rowset)) {
5043
3.75k
                rowset_keys_to_mark_recycled.emplace_back(k);
5044
3.75k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5045
3.75k
                             "at next turn, instance_id="
5046
3.75k
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5047
3.75k
                          << " version=[" << rowset_meta->start_version() << '-'
5048
3.75k
                          << rowset_meta->end_version() << "]";
5049
3.75k
                return 0;
5050
3.75k
            }
5051
7.50k
        }
5052
5053
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle &&
5054
3.75k
            rowset_meta->end_version() != 1) {
5055
3.75k
            if (make_deferred_abort_task(rowset).has_value()) {
5056
0
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5057
0
                             "instance_id="
5058
0
                          << instance_id_ << " tablet_id=" << rowset_meta->tablet_id()
5059
0
                          << " version=[" << rowset_meta->start_version() << '-'
5060
0
                          << rowset_meta->end_version() << "]";
5061
0
                rowset_keys_to_abort.emplace_back(k);
5062
0
            }
5063
3.75k
        }
5064
5065
        // TODO(plat1ko): check rowset not referenced
5066
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5067
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5068
0
                LOG_INFO("recycle rowset that has empty resource id");
5069
0
            } else {
5070
                // other situations, keep this key-value pair and it needs to be checked manually
5071
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5072
0
                return -1;
5073
0
            }
5074
0
        }
5075
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5076
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
5077
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5078
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5079
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
5080
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5081
3.75k
                  << " rowset_meta_size=" << v.size()
5082
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
5083
3.75k
                  << " task_type=" << metrics_context.operation_type;
5084
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5085
            // unable to calculate file path, can only be deleted by rowset id prefix
5086
650
            num_prepare += 1;
5087
650
            prepare_rowset_keys_to_delete.emplace_back(k);
5088
3.10k
        } else {
5089
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
5090
3.10k
            rowset_keys.emplace_back(k);
5091
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
5092
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
5093
3.10k
                ++num_empty_rowset;
5094
3.10k
            }
5095
3.10k
        }
5096
3.75k
        return 0;
5097
3.75k
    };
5098
5099
49
    auto loop_done = [&]() -> int {
5100
49
        std::vector<std::string> rowset_keys_to_delete;
5101
49
        std::vector<std::string> mark_keys_to_process;
5102
49
        std::vector<std::string> abort_keys_to_process;
5103
49
        std::vector<std::string> prepare_keys_to_process;
5104
        // rowset_id -> rowset_meta
5105
        // store rowset id and meta for statistics rs size when delete
5106
49
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5107
49
        rowset_keys_to_delete.swap(rowset_keys);
5108
49
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5109
49
        abort_keys_to_process.swap(rowset_keys_to_abort);
5110
49
        prepare_keys_to_process.swap(prepare_rowset_keys_to_delete);
5111
49
        rowsets_to_delete.swap(rowsets);
5112
49
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
5113
49
                             rowsets_to_delete = std::move(rowsets_to_delete),
5114
49
                             prepare_keys_to_process = std::move(prepare_keys_to_process),
5115
49
                             mark_keys_to_process = std::move(mark_keys_to_process),
5116
49
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5117
49
            if (!mark_keys_to_process.empty() &&
5118
49
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5119
26
                                                                mark_keys_to_process) != 0) {
5120
0
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5121
0
                             << instance_id_;
5122
0
                return;
5123
0
            }
5124
49
            if (!abort_keys_to_process.empty() &&
5125
49
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5126
2
                        0) {
5127
0
                return;
5128
0
            }
5129
49
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5130
49
            if (!prepare_keys_to_process.empty() &&
5131
49
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5132
23
                                             &prepare_delete_tasks) != 0) {
5133
0
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5134
0
                             << instance_id_;
5135
0
                return;
5136
0
            }
5137
49
            if (!prepare_delete_tasks.empty()) {
5138
23
                std::vector<std::string> prepare_rowset_keys_to_delete;
5139
23
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5140
652
                for (const auto& task : prepare_delete_tasks) {
5141
652
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5142
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5143
0
                        return;
5144
0
                    }
5145
652
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5146
0
                        return;
5147
0
                    }
5148
652
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5149
652
                }
5150
23
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5151
0
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5152
0
                                 << instance_id_;
5153
0
                    return;
5154
0
                }
5155
23
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5156
23
                                       std::memory_order_relaxed);
5157
23
            }
5158
49
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5159
49
                                   metrics_context) != 0) {
5160
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5161
0
                return;
5162
0
            }
5163
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
5164
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5165
0
                    return;
5166
0
                }
5167
3.10k
            }
5168
49
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5169
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5170
0
                return;
5171
0
            }
5172
49
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5173
49
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5116
7
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5117
7
            if (!mark_keys_to_process.empty() &&
5118
7
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5119
5
                                                                mark_keys_to_process) != 0) {
5120
0
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5121
0
                             << instance_id_;
5122
0
                return;
5123
0
            }
5124
7
            if (!abort_keys_to_process.empty() &&
5125
7
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5126
2
                        0) {
5127
0
                return;
5128
0
            }
5129
7
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5130
7
            if (!prepare_keys_to_process.empty() &&
5131
7
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5132
2
                                             &prepare_delete_tasks) != 0) {
5133
0
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5134
0
                             << instance_id_;
5135
0
                return;
5136
0
            }
5137
7
            if (!prepare_delete_tasks.empty()) {
5138
2
                std::vector<std::string> prepare_rowset_keys_to_delete;
5139
2
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5140
2
                for (const auto& task : prepare_delete_tasks) {
5141
2
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5142
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5143
0
                        return;
5144
0
                    }
5145
2
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5146
0
                        return;
5147
0
                    }
5148
2
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5149
2
                }
5150
2
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5151
0
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5152
0
                                 << instance_id_;
5153
0
                    return;
5154
0
                }
5155
2
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5156
2
                                       std::memory_order_relaxed);
5157
2
            }
5158
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5159
7
                                   metrics_context) != 0) {
5160
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5161
0
                return;
5162
0
            }
5163
7
            for (const auto& [_, rs] : rowsets_to_delete) {
5164
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5165
0
                    return;
5166
0
                }
5167
0
            }
5168
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5169
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5170
0
                return;
5171
0
            }
5172
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5173
7
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5116
42
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5117
42
            if (!mark_keys_to_process.empty() &&
5118
42
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5119
21
                                                                mark_keys_to_process) != 0) {
5120
0
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5121
0
                             << instance_id_;
5122
0
                return;
5123
0
            }
5124
42
            if (!abort_keys_to_process.empty() &&
5125
42
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5126
0
                        0) {
5127
0
                return;
5128
0
            }
5129
42
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5130
42
            if (!prepare_keys_to_process.empty() &&
5131
42
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5132
21
                                             &prepare_delete_tasks) != 0) {
5133
0
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5134
0
                             << instance_id_;
5135
0
                return;
5136
0
            }
5137
42
            if (!prepare_delete_tasks.empty()) {
5138
21
                std::vector<std::string> prepare_rowset_keys_to_delete;
5139
21
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5140
650
                for (const auto& task : prepare_delete_tasks) {
5141
650
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5142
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5143
0
                        return;
5144
0
                    }
5145
650
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5146
0
                        return;
5147
0
                    }
5148
650
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5149
650
                }
5150
21
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5151
0
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5152
0
                                 << instance_id_;
5153
0
                    return;
5154
0
                }
5155
21
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5156
21
                                       std::memory_order_relaxed);
5157
21
            }
5158
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5159
42
                                   metrics_context) != 0) {
5160
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5161
0
                return;
5162
0
            }
5163
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
5164
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5165
0
                    return;
5166
0
                }
5167
3.10k
            }
5168
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5169
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5170
0
                return;
5171
0
            }
5172
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5173
42
        });
5174
49
        return 0;
5175
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
5099
7
    auto loop_done = [&]() -> int {
5100
7
        std::vector<std::string> rowset_keys_to_delete;
5101
7
        std::vector<std::string> mark_keys_to_process;
5102
7
        std::vector<std::string> abort_keys_to_process;
5103
7
        std::vector<std::string> prepare_keys_to_process;
5104
        // rowset_id -> rowset_meta
5105
        // store rowset id and meta for statistics rs size when delete
5106
7
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5107
7
        rowset_keys_to_delete.swap(rowset_keys);
5108
7
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5109
7
        abort_keys_to_process.swap(rowset_keys_to_abort);
5110
7
        prepare_keys_to_process.swap(prepare_rowset_keys_to_delete);
5111
7
        rowsets_to_delete.swap(rowsets);
5112
7
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
5113
7
                             rowsets_to_delete = std::move(rowsets_to_delete),
5114
7
                             prepare_keys_to_process = std::move(prepare_keys_to_process),
5115
7
                             mark_keys_to_process = std::move(mark_keys_to_process),
5116
7
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5117
7
            if (!mark_keys_to_process.empty() &&
5118
7
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5119
7
                                                                mark_keys_to_process) != 0) {
5120
7
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5121
7
                             << instance_id_;
5122
7
                return;
5123
7
            }
5124
7
            if (!abort_keys_to_process.empty() &&
5125
7
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5126
7
                        0) {
5127
7
                return;
5128
7
            }
5129
7
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5130
7
            if (!prepare_keys_to_process.empty() &&
5131
7
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5132
7
                                             &prepare_delete_tasks) != 0) {
5133
7
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5134
7
                             << instance_id_;
5135
7
                return;
5136
7
            }
5137
7
            if (!prepare_delete_tasks.empty()) {
5138
7
                std::vector<std::string> prepare_rowset_keys_to_delete;
5139
7
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5140
7
                for (const auto& task : prepare_delete_tasks) {
5141
7
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5142
7
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5143
7
                        return;
5144
7
                    }
5145
7
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5146
7
                        return;
5147
7
                    }
5148
7
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5149
7
                }
5150
7
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5151
7
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5152
7
                                 << instance_id_;
5153
7
                    return;
5154
7
                }
5155
7
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5156
7
                                       std::memory_order_relaxed);
5157
7
            }
5158
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5159
7
                                   metrics_context) != 0) {
5160
7
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5161
7
                return;
5162
7
            }
5163
7
            for (const auto& [_, rs] : rowsets_to_delete) {
5164
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5165
7
                    return;
5166
7
                }
5167
7
            }
5168
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5169
7
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5170
7
                return;
5171
7
            }
5172
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5173
7
        });
5174
7
        return 0;
5175
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
5099
42
    auto loop_done = [&]() -> int {
5100
42
        std::vector<std::string> rowset_keys_to_delete;
5101
42
        std::vector<std::string> mark_keys_to_process;
5102
42
        std::vector<std::string> abort_keys_to_process;
5103
42
        std::vector<std::string> prepare_keys_to_process;
5104
        // rowset_id -> rowset_meta
5105
        // store rowset id and meta for statistics rs size when delete
5106
42
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
5107
42
        rowset_keys_to_delete.swap(rowset_keys);
5108
42
        mark_keys_to_process.swap(rowset_keys_to_mark_recycled);
5109
42
        abort_keys_to_process.swap(rowset_keys_to_abort);
5110
42
        prepare_keys_to_process.swap(prepare_rowset_keys_to_delete);
5111
42
        rowsets_to_delete.swap(rowsets);
5112
42
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
5113
42
                             rowsets_to_delete = std::move(rowsets_to_delete),
5114
42
                             prepare_keys_to_process = std::move(prepare_keys_to_process),
5115
42
                             mark_keys_to_process = std::move(mark_keys_to_process),
5116
42
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5117
42
            if (!mark_keys_to_process.empty() &&
5118
42
                batch_mark_rowsets_as_recycled<RecycleRowsetPB>(txn_kv_.get(), instance_id_,
5119
42
                                                                mark_keys_to_process) != 0) {
5120
42
                LOG(WARNING) << "failed to batch mark recycle rowsets as recycled, instance_id="
5121
42
                             << instance_id_;
5122
42
                return;
5123
42
            }
5124
42
            if (!abort_keys_to_process.empty() &&
5125
42
                batch_abort_txn_or_job_for_recycle<RecycleRowsetPB>(abort_keys_to_process, true) !=
5126
42
                        0) {
5127
42
                return;
5128
42
            }
5129
42
            std::vector<DeferredRecyclePrepareDeleteTask> prepare_delete_tasks;
5130
42
            if (!prepare_keys_to_process.empty() &&
5131
42
                collect_prepare_delete_tasks(txn_kv_.get(), instance_id_, prepare_keys_to_process,
5132
42
                                             &prepare_delete_tasks) != 0) {
5133
42
                LOG(WARNING) << "failed to collect prepare rowset delete tasks, instance_id="
5134
42
                             << instance_id_;
5135
42
                return;
5136
42
            }
5137
42
            if (!prepare_delete_tasks.empty()) {
5138
42
                std::vector<std::string> prepare_rowset_keys_to_delete;
5139
42
                prepare_rowset_keys_to_delete.reserve(prepare_delete_tasks.size());
5140
42
                for (const auto& task : prepare_delete_tasks) {
5141
42
                    if (delete_rowset_data(task.resource_id, task.tablet_id, task.rowset_id) != 0) {
5142
42
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(task.key);
5143
42
                        return;
5144
42
                    }
5145
42
                    if (delete_versioned_delete_bitmap_kvs(task.tablet_id, task.rowset_id) != 0) {
5146
42
                        return;
5147
42
                    }
5148
42
                    prepare_rowset_keys_to_delete.emplace_back(task.key);
5149
42
                }
5150
42
                if (txn_remove(txn_kv_.get(), prepare_rowset_keys_to_delete) != 0) {
5151
42
                    LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5152
42
                                 << instance_id_;
5153
42
                    return;
5154
42
                }
5155
42
                num_recycled.fetch_add(prepare_rowset_keys_to_delete.size(),
5156
42
                                       std::memory_order_relaxed);
5157
42
            }
5158
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
5159
42
                                   metrics_context) != 0) {
5160
42
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
5161
42
                return;
5162
42
            }
5163
42
            for (const auto& [_, rs] : rowsets_to_delete) {
5164
42
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5165
42
                    return;
5166
42
                }
5167
42
            }
5168
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
5169
42
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5170
42
                return;
5171
42
            }
5172
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
5173
42
        });
5174
42
        return 0;
5175
42
    };
5176
5177
22
    if (config::enable_recycler_stats_metrics) {
5178
0
        scan_and_statistics_rowsets();
5179
0
    }
5180
    // recycle_func and loop_done for scan and recycle
5181
22
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5182
22
                               std::move(loop_done));
5183
5184
22
    worker_pool->stop();
5185
5186
22
    if (!async_recycled_rowset_keys.empty()) {
5187
1
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5188
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5189
0
            return -1;
5190
1
        } else {
5191
1
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5192
1
        }
5193
1
    }
5194
5195
    // Report final metrics after all concurrent tasks completed
5196
22
    segment_metrics_context_.report();
5197
22
    metrics_context.report();
5198
5199
22
    return ret;
5200
22
}
5201
5202
13
int InstanceRecycler::recycle_restore_jobs() {
5203
13
    const std::string task_name = "recycle_restore_jobs";
5204
13
    int64_t num_scanned = 0;
5205
13
    int64_t num_expired = 0;
5206
13
    int64_t num_recycled = 0;
5207
13
    int64_t num_aborted = 0;
5208
5209
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5210
5211
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
5212
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
5213
13
    std::string restore_job_key0;
5214
13
    std::string restore_job_key1;
5215
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
5216
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
5217
5218
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
5219
5220
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5221
13
    register_recycle_task(task_name, start_time);
5222
5223
13
    DORIS_CLOUD_DEFER {
5224
13
        unregister_recycle_task(task_name);
5225
13
        int64_t cost =
5226
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5227
13
        metrics_context.finish_report();
5228
5229
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5230
13
                .tag("instance_id", instance_id_)
5231
13
                .tag("num_scanned", num_scanned)
5232
13
                .tag("num_expired", num_expired)
5233
13
                .tag("num_recycled", num_recycled)
5234
13
                .tag("num_aborted", num_aborted);
5235
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
5223
13
    DORIS_CLOUD_DEFER {
5224
13
        unregister_recycle_task(task_name);
5225
13
        int64_t cost =
5226
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5227
13
        metrics_context.finish_report();
5228
5229
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5230
13
                .tag("instance_id", instance_id_)
5231
13
                .tag("num_scanned", num_scanned)
5232
13
                .tag("num_expired", num_expired)
5233
13
                .tag("num_recycled", num_recycled)
5234
13
                .tag("num_aborted", num_aborted);
5235
13
    };
5236
5237
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5238
5239
13
    std::vector<std::string_view> restore_job_keys;
5240
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5241
41
        ++num_scanned;
5242
41
        RestoreJobCloudPB restore_job_pb;
5243
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5244
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5245
0
            return -1;
5246
0
        }
5247
41
        int64_t expiration =
5248
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5249
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5250
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5251
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5252
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5253
0
                   << " state=" << restore_job_pb.state();
5254
41
        int64_t current_time = ::time(nullptr);
5255
41
        if (current_time < expiration) { // not expired
5256
0
            return 0;
5257
0
        }
5258
41
        ++num_expired;
5259
5260
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5261
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5262
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5263
5264
41
        std::unique_ptr<Transaction> txn;
5265
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5266
41
        if (err != TxnErrorCode::TXN_OK) {
5267
0
            LOG_WARNING("failed to recycle restore job")
5268
0
                    .tag("err", err)
5269
0
                    .tag("tablet id", tablet_id)
5270
0
                    .tag("instance_id", instance_id_)
5271
0
                    .tag("reason", "failed to create txn");
5272
0
            return -1;
5273
0
        }
5274
5275
41
        std::string val;
5276
41
        err = txn->get(k, &val);
5277
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5278
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5279
0
            return 0;
5280
0
        }
5281
41
        if (err != TxnErrorCode::TXN_OK) {
5282
0
            LOG_WARNING("failed to get kv");
5283
0
            return -1;
5284
0
        }
5285
41
        restore_job_pb.Clear();
5286
41
        if (!restore_job_pb.ParseFromString(val)) {
5287
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5288
0
            return -1;
5289
0
        }
5290
5291
        // PREPARED or COMMITTED, change state to DROPPED and return
5292
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5293
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5294
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5295
0
            restore_job_pb.set_need_recycle_data(true);
5296
0
            txn->put(k, restore_job_pb.SerializeAsString());
5297
0
            err = txn->commit();
5298
0
            if (err != TxnErrorCode::TXN_OK) {
5299
0
                LOG_WARNING("failed to commit txn: {}", err);
5300
0
                return -1;
5301
0
            }
5302
0
            num_aborted++;
5303
0
            return 0;
5304
0
        }
5305
5306
        // Change state to RECYCLING
5307
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5308
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5309
21
            txn->put(k, restore_job_pb.SerializeAsString());
5310
21
            err = txn->commit();
5311
21
            if (err != TxnErrorCode::TXN_OK) {
5312
0
                LOG_WARNING("failed to commit txn: {}", err);
5313
0
                return -1;
5314
0
            }
5315
21
            return 0;
5316
21
        }
5317
5318
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5319
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5320
5321
        // Recycle all data associated with the restore job.
5322
        // This includes rowsets, segments, and related resources.
5323
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5324
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5325
0
            LOG_WARNING("failed to recycle tablet")
5326
0
                    .tag("tablet_id", tablet_id)
5327
0
                    .tag("instance_id", instance_id_);
5328
0
            return -1;
5329
0
        }
5330
5331
        // delete all restore job rowset kv
5332
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5333
5334
20
        err = txn->commit();
5335
20
        if (err != TxnErrorCode::TXN_OK) {
5336
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5337
0
                    .tag("err", err)
5338
0
                    .tag("tablet id", tablet_id)
5339
0
                    .tag("instance_id", instance_id_)
5340
0
                    .tag("reason", "failed to commit txn");
5341
0
            return -1;
5342
0
        }
5343
5344
20
        metrics_context.total_recycled_num = ++num_recycled;
5345
20
        metrics_context.report();
5346
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5347
20
        restore_job_keys.push_back(k);
5348
5349
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5350
20
                  << " tablet_id=" << tablet_id;
5351
20
        return 0;
5352
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5240
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5241
41
        ++num_scanned;
5242
41
        RestoreJobCloudPB restore_job_pb;
5243
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5244
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5245
0
            return -1;
5246
0
        }
5247
41
        int64_t expiration =
5248
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5249
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5250
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5251
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5252
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5253
0
                   << " state=" << restore_job_pb.state();
5254
41
        int64_t current_time = ::time(nullptr);
5255
41
        if (current_time < expiration) { // not expired
5256
0
            return 0;
5257
0
        }
5258
41
        ++num_expired;
5259
5260
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5261
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5262
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5263
5264
41
        std::unique_ptr<Transaction> txn;
5265
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5266
41
        if (err != TxnErrorCode::TXN_OK) {
5267
0
            LOG_WARNING("failed to recycle restore job")
5268
0
                    .tag("err", err)
5269
0
                    .tag("tablet id", tablet_id)
5270
0
                    .tag("instance_id", instance_id_)
5271
0
                    .tag("reason", "failed to create txn");
5272
0
            return -1;
5273
0
        }
5274
5275
41
        std::string val;
5276
41
        err = txn->get(k, &val);
5277
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5278
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5279
0
            return 0;
5280
0
        }
5281
41
        if (err != TxnErrorCode::TXN_OK) {
5282
0
            LOG_WARNING("failed to get kv");
5283
0
            return -1;
5284
0
        }
5285
41
        restore_job_pb.Clear();
5286
41
        if (!restore_job_pb.ParseFromString(val)) {
5287
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5288
0
            return -1;
5289
0
        }
5290
5291
        // PREPARED or COMMITTED, change state to DROPPED and return
5292
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5293
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5294
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5295
0
            restore_job_pb.set_need_recycle_data(true);
5296
0
            txn->put(k, restore_job_pb.SerializeAsString());
5297
0
            err = txn->commit();
5298
0
            if (err != TxnErrorCode::TXN_OK) {
5299
0
                LOG_WARNING("failed to commit txn: {}", err);
5300
0
                return -1;
5301
0
            }
5302
0
            num_aborted++;
5303
0
            return 0;
5304
0
        }
5305
5306
        // Change state to RECYCLING
5307
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5308
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5309
21
            txn->put(k, restore_job_pb.SerializeAsString());
5310
21
            err = txn->commit();
5311
21
            if (err != TxnErrorCode::TXN_OK) {
5312
0
                LOG_WARNING("failed to commit txn: {}", err);
5313
0
                return -1;
5314
0
            }
5315
21
            return 0;
5316
21
        }
5317
5318
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5319
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5320
5321
        // Recycle all data associated with the restore job.
5322
        // This includes rowsets, segments, and related resources.
5323
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5324
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5325
0
            LOG_WARNING("failed to recycle tablet")
5326
0
                    .tag("tablet_id", tablet_id)
5327
0
                    .tag("instance_id", instance_id_);
5328
0
            return -1;
5329
0
        }
5330
5331
        // delete all restore job rowset kv
5332
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5333
5334
20
        err = txn->commit();
5335
20
        if (err != TxnErrorCode::TXN_OK) {
5336
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5337
0
                    .tag("err", err)
5338
0
                    .tag("tablet id", tablet_id)
5339
0
                    .tag("instance_id", instance_id_)
5340
0
                    .tag("reason", "failed to commit txn");
5341
0
            return -1;
5342
0
        }
5343
5344
20
        metrics_context.total_recycled_num = ++num_recycled;
5345
20
        metrics_context.report();
5346
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5347
20
        restore_job_keys.push_back(k);
5348
5349
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5350
20
                  << " tablet_id=" << tablet_id;
5351
20
        return 0;
5352
20
    };
5353
5354
13
    auto loop_done = [&restore_job_keys, this]() -> int {
5355
3
        if (restore_job_keys.empty()) return 0;
5356
1
        DORIS_CLOUD_DEFER {
5357
1
            restore_job_keys.clear();
5358
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5356
1
        DORIS_CLOUD_DEFER {
5357
1
            restore_job_keys.clear();
5358
1
        };
5359
5360
1
        std::unique_ptr<Transaction> txn;
5361
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5362
1
        if (err != TxnErrorCode::TXN_OK) {
5363
0
            LOG_WARNING("failed to recycle restore job")
5364
0
                    .tag("err", err)
5365
0
                    .tag("instance_id", instance_id_)
5366
0
                    .tag("reason", "failed to create txn");
5367
0
            return -1;
5368
0
        }
5369
20
        for (auto& k : restore_job_keys) {
5370
20
            txn->remove(k);
5371
20
        }
5372
1
        err = txn->commit();
5373
1
        if (err != TxnErrorCode::TXN_OK) {
5374
0
            LOG_WARNING("failed to recycle restore job")
5375
0
                    .tag("err", err)
5376
0
                    .tag("instance_id", instance_id_)
5377
0
                    .tag("reason", "failed to commit txn");
5378
0
            return -1;
5379
0
        }
5380
1
        return 0;
5381
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
5354
3
    auto loop_done = [&restore_job_keys, this]() -> int {
5355
3
        if (restore_job_keys.empty()) return 0;
5356
1
        DORIS_CLOUD_DEFER {
5357
1
            restore_job_keys.clear();
5358
1
        };
5359
5360
1
        std::unique_ptr<Transaction> txn;
5361
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5362
1
        if (err != TxnErrorCode::TXN_OK) {
5363
0
            LOG_WARNING("failed to recycle restore job")
5364
0
                    .tag("err", err)
5365
0
                    .tag("instance_id", instance_id_)
5366
0
                    .tag("reason", "failed to create txn");
5367
0
            return -1;
5368
0
        }
5369
20
        for (auto& k : restore_job_keys) {
5370
20
            txn->remove(k);
5371
20
        }
5372
1
        err = txn->commit();
5373
1
        if (err != TxnErrorCode::TXN_OK) {
5374
0
            LOG_WARNING("failed to recycle restore job")
5375
0
                    .tag("err", err)
5376
0
                    .tag("instance_id", instance_id_)
5377
0
                    .tag("reason", "failed to commit txn");
5378
0
            return -1;
5379
0
        }
5380
1
        return 0;
5381
1
    };
5382
5383
13
    if (config::enable_recycler_stats_metrics) {
5384
0
        scan_and_statistics_restore_jobs();
5385
0
    }
5386
5387
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
5388
13
                            std::move(loop_done));
5389
13
}
5390
5391
10
int InstanceRecycler::recycle_versioned_rowsets() {
5392
10
    const std::string task_name = "recycle_rowsets";
5393
10
    int64_t num_scanned = 0;
5394
10
    int64_t num_expired = 0;
5395
10
    int64_t num_prepare = 0;
5396
10
    int64_t num_compacted = 0;
5397
10
    int64_t num_empty_rowset = 0;
5398
10
    size_t total_rowset_key_size = 0;
5399
10
    size_t total_rowset_value_size = 0;
5400
10
    size_t expired_rowset_size = 0;
5401
10
    std::atomic_long num_recycled = 0;
5402
10
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5403
5404
10
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
5405
10
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
5406
10
    std::string recyc_rs_key0;
5407
10
    std::string recyc_rs_key1;
5408
10
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
5409
10
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
5410
5411
10
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
5412
5413
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5414
10
    register_recycle_task(task_name, start_time);
5415
5416
10
    DORIS_CLOUD_DEFER {
5417
10
        unregister_recycle_task(task_name);
5418
10
        int64_t cost =
5419
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5420
10
        metrics_context.finish_report();
5421
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5422
10
                .tag("instance_id", instance_id_)
5423
10
                .tag("num_scanned", num_scanned)
5424
10
                .tag("num_expired", num_expired)
5425
10
                .tag("num_recycled", num_recycled)
5426
10
                .tag("num_recycled.prepare", num_prepare)
5427
10
                .tag("num_recycled.compacted", num_compacted)
5428
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5429
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5430
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5431
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5432
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
5416
10
    DORIS_CLOUD_DEFER {
5417
10
        unregister_recycle_task(task_name);
5418
10
        int64_t cost =
5419
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5420
10
        metrics_context.finish_report();
5421
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5422
10
                .tag("instance_id", instance_id_)
5423
10
                .tag("num_scanned", num_scanned)
5424
10
                .tag("num_expired", num_expired)
5425
10
                .tag("num_recycled", num_recycled)
5426
10
                .tag("num_recycled.prepare", num_prepare)
5427
10
                .tag("num_recycled.compacted", num_compacted)
5428
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5429
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5430
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5431
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5432
10
    };
5433
5434
10
    std::vector<std::string> orphan_rowset_keys;
5435
5436
    // Store keys of rowset recycled by background workers
5437
10
    std::mutex async_recycled_rowset_keys_mutex;
5438
10
    std::vector<std::string> async_recycled_rowset_keys;
5439
10
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5440
10
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
5441
10
    worker_pool->start();
5442
10
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
5443
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5444
        // Try to delete rowset data in background thread
5445
400
        int ret = worker_pool->submit_with_timeout(
5446
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5447
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5448
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5449
400
                        return;
5450
400
                    }
5451
                    // The async recycled rowsets are staled format or has not been used,
5452
                    // so we don't need to check the rowset ref count key.
5453
0
                    std::vector<std::string> keys;
5454
0
                    {
5455
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5456
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5457
0
                        if (async_recycled_rowset_keys.size() > 100) {
5458
0
                            keys.swap(async_recycled_rowset_keys);
5459
0
                        }
5460
0
                    }
5461
0
                    if (keys.empty()) return;
5462
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5463
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5464
0
                                     << instance_id_;
5465
0
                    } else {
5466
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5467
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5468
0
                                           num_recycled, start_time);
5469
0
                    }
5470
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5446
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5447
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5448
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5449
400
                        return;
5450
400
                    }
5451
                    // The async recycled rowsets are staled format or has not been used,
5452
                    // so we don't need to check the rowset ref count key.
5453
0
                    std::vector<std::string> keys;
5454
0
                    {
5455
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5456
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5457
0
                        if (async_recycled_rowset_keys.size() > 100) {
5458
0
                            keys.swap(async_recycled_rowset_keys);
5459
0
                        }
5460
0
                    }
5461
0
                    if (keys.empty()) return;
5462
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5463
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5464
0
                                     << instance_id_;
5465
0
                    } else {
5466
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5467
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5468
0
                                           num_recycled, start_time);
5469
0
                    }
5470
0
                },
5471
400
                0);
5472
400
        if (ret == 0) return 0;
5473
        // Submit task failed, delete rowset data in current thread
5474
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5475
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5476
0
            return -1;
5477
0
        }
5478
0
        orphan_rowset_keys.push_back(std::move(key));
5479
0
        return 0;
5480
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5443
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5444
        // Try to delete rowset data in background thread
5445
400
        int ret = worker_pool->submit_with_timeout(
5446
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5447
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5448
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5449
400
                        return;
5450
400
                    }
5451
                    // The async recycled rowsets are staled format or has not been used,
5452
                    // so we don't need to check the rowset ref count key.
5453
400
                    std::vector<std::string> keys;
5454
400
                    {
5455
400
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5456
400
                        async_recycled_rowset_keys.push_back(std::move(key));
5457
400
                        if (async_recycled_rowset_keys.size() > 100) {
5458
400
                            keys.swap(async_recycled_rowset_keys);
5459
400
                        }
5460
400
                    }
5461
400
                    if (keys.empty()) return;
5462
400
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5463
400
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5464
400
                                     << instance_id_;
5465
400
                    } else {
5466
400
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5467
400
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5468
400
                                           num_recycled, start_time);
5469
400
                    }
5470
400
                },
5471
400
                0);
5472
400
        if (ret == 0) return 0;
5473
        // Submit task failed, delete rowset data in current thread
5474
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5475
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5476
0
            return -1;
5477
0
        }
5478
0
        orphan_rowset_keys.push_back(std::move(key));
5479
0
        return 0;
5480
0
    };
5481
5482
10
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5483
5484
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5485
2.01k
        ++num_scanned;
5486
2.01k
        total_rowset_key_size += k.size();
5487
2.01k
        total_rowset_value_size += v.size();
5488
2.01k
        RecycleRowsetPB rowset;
5489
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5490
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5491
0
            return -1;
5492
0
        }
5493
5494
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5495
5496
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5497
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5498
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5499
2.01k
        int64_t current_time = ::time(nullptr);
5500
2.01k
        if (current_time < final_expiration) { // not expired
5501
0
            return 0;
5502
0
        }
5503
2.01k
        ++num_expired;
5504
2.01k
        expired_rowset_size += v.size();
5505
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5506
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5507
                // in old version, keep this key-value pair and it needs to be checked manually
5508
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5509
0
                return -1;
5510
0
            }
5511
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5512
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5513
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5514
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5515
0
                orphan_rowset_keys.emplace_back(k);
5516
0
                return -1;
5517
0
            }
5518
            // decode rowset_id
5519
0
            auto k1 = k;
5520
0
            k1.remove_prefix(1);
5521
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5522
0
            decode_key(&k1, &out);
5523
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5524
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5525
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5526
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5527
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5528
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5529
0
                return -1;
5530
0
            }
5531
0
            return 0;
5532
0
        }
5533
        // TODO(plat1ko): check rowset not referenced
5534
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5535
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5536
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5537
0
                LOG_INFO("recycle rowset that has empty resource id");
5538
0
            } else {
5539
                // other situations, keep this key-value pair and it needs to be checked manually
5540
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5541
0
                return -1;
5542
0
            }
5543
0
        }
5544
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5545
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5546
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5547
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5548
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5549
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5550
2.01k
                  << " rowset_meta_size=" << v.size()
5551
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5552
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5553
            // unable to calculate file path, can only be deleted by rowset id prefix
5554
400
            num_prepare += 1;
5555
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5556
400
                                             rowset_meta->tablet_id(),
5557
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5558
0
                return -1;
5559
0
            }
5560
1.61k
        } else {
5561
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5562
1.61k
            worker_pool->submit(
5563
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5564
                        // The load & compact rowset keys are recycled during recycling operation logs.
5565
1.61k
                        RowsetDeleteTask task;
5566
1.61k
                        task.rowset_meta = rowset_meta;
5567
1.61k
                        task.recycle_rowset_key = k;
5568
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5569
1.60k
                            return;
5570
1.60k
                        }
5571
13
                        num_compacted += is_compacted;
5572
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5573
13
                        if (rowset_meta.num_segments() == 0) {
5574
0
                            ++num_empty_rowset;
5575
0
                        }
5576
13
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
5563
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5564
                        // The load & compact rowset keys are recycled during recycling operation logs.
5565
1.61k
                        RowsetDeleteTask task;
5566
1.61k
                        task.rowset_meta = rowset_meta;
5567
1.61k
                        task.recycle_rowset_key = k;
5568
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5569
1.60k
                            return;
5570
1.60k
                        }
5571
13
                        num_compacted += is_compacted;
5572
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5573
13
                        if (rowset_meta.num_segments() == 0) {
5574
0
                            ++num_empty_rowset;
5575
0
                        }
5576
13
                    });
5577
1.61k
        }
5578
2.01k
        return 0;
5579
2.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5484
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5485
2.01k
        ++num_scanned;
5486
2.01k
        total_rowset_key_size += k.size();
5487
2.01k
        total_rowset_value_size += v.size();
5488
2.01k
        RecycleRowsetPB rowset;
5489
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5490
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5491
0
            return -1;
5492
0
        }
5493
5494
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5495
5496
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5497
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5498
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5499
2.01k
        int64_t current_time = ::time(nullptr);
5500
2.01k
        if (current_time < final_expiration) { // not expired
5501
0
            return 0;
5502
0
        }
5503
2.01k
        ++num_expired;
5504
2.01k
        expired_rowset_size += v.size();
5505
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5506
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5507
                // in old version, keep this key-value pair and it needs to be checked manually
5508
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5509
0
                return -1;
5510
0
            }
5511
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5512
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5513
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5514
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5515
0
                orphan_rowset_keys.emplace_back(k);
5516
0
                return -1;
5517
0
            }
5518
            // decode rowset_id
5519
0
            auto k1 = k;
5520
0
            k1.remove_prefix(1);
5521
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5522
0
            decode_key(&k1, &out);
5523
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5524
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5525
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5526
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5527
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5528
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5529
0
                return -1;
5530
0
            }
5531
0
            return 0;
5532
0
        }
5533
        // TODO(plat1ko): check rowset not referenced
5534
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5535
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5536
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5537
0
                LOG_INFO("recycle rowset that has empty resource id");
5538
0
            } else {
5539
                // other situations, keep this key-value pair and it needs to be checked manually
5540
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5541
0
                return -1;
5542
0
            }
5543
0
        }
5544
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5545
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5546
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5547
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5548
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5549
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5550
2.01k
                  << " rowset_meta_size=" << v.size()
5551
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5552
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5553
            // unable to calculate file path, can only be deleted by rowset id prefix
5554
400
            num_prepare += 1;
5555
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5556
400
                                             rowset_meta->tablet_id(),
5557
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5558
0
                return -1;
5559
0
            }
5560
1.61k
        } else {
5561
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5562
1.61k
            worker_pool->submit(
5563
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5564
                        // The load & compact rowset keys are recycled during recycling operation logs.
5565
1.61k
                        RowsetDeleteTask task;
5566
1.61k
                        task.rowset_meta = rowset_meta;
5567
1.61k
                        task.recycle_rowset_key = k;
5568
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5569
1.61k
                            return;
5570
1.61k
                        }
5571
1.61k
                        num_compacted += is_compacted;
5572
1.61k
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5573
1.61k
                        if (rowset_meta.num_segments() == 0) {
5574
1.61k
                            ++num_empty_rowset;
5575
1.61k
                        }
5576
1.61k
                    });
5577
1.61k
        }
5578
2.01k
        return 0;
5579
2.01k
    };
5580
5581
10
    if (config::enable_recycler_stats_metrics) {
5582
0
        scan_and_statistics_rowsets();
5583
0
    }
5584
5585
10
    auto loop_done = [&]() -> int {
5586
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5587
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5588
0
        }
5589
6
        orphan_rowset_keys.clear();
5590
6
        return 0;
5591
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
5585
6
    auto loop_done = [&]() -> int {
5586
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5587
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5588
0
        }
5589
6
        orphan_rowset_keys.clear();
5590
6
        return 0;
5591
6
    };
5592
5593
    // recycle_func and loop_done for scan and recycle
5594
10
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5595
10
                               std::move(loop_done));
5596
5597
10
    worker_pool->stop();
5598
5599
10
    if (!async_recycled_rowset_keys.empty()) {
5600
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5601
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5602
0
            return -1;
5603
0
        } else {
5604
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5605
0
        }
5606
0
    }
5607
5608
    // Report final metrics after all concurrent tasks completed
5609
10
    segment_metrics_context_.report();
5610
10
    metrics_context.report();
5611
5612
10
    return ret;
5613
10
}
5614
5615
1.61k
int InstanceRecycler::recycle_rowset_meta_and_data(const RowsetDeleteTask& task) {
5616
1.61k
    constexpr int MAX_RETRY = 10;
5617
1.61k
    const RowsetMetaCloudPB& rowset_meta = task.rowset_meta;
5618
1.61k
    int64_t tablet_id = rowset_meta.tablet_id();
5619
1.61k
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
5620
1.61k
    std::string_view reference_instance_id = instance_id_;
5621
1.61k
    if (rowset_meta.has_reference_instance_id()) {
5622
8
        reference_instance_id = rowset_meta.reference_instance_id();
5623
8
    }
5624
5625
1.61k
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
5626
1.61k
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
5627
1.61k
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(task.recycle_rowset_key));
5628
1.61k
    AnnotateTag instance_id_tag("instance_id", instance_id_);
5629
1.61k
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
5630
1.61k
    for (int i = 0; i < MAX_RETRY; ++i) {
5631
1.61k
        std::unique_ptr<Transaction> txn;
5632
1.61k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5633
1.61k
        if (err != TxnErrorCode::TXN_OK) {
5634
0
            LOG_WARNING("failed to create txn").tag("err", err);
5635
0
            return -1;
5636
0
        }
5637
5638
1.61k
        std::string rowset_ref_count_key =
5639
1.61k
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
5640
1.61k
        int64_t ref_count = 0;
5641
1.61k
        {
5642
1.61k
            std::string value;
5643
1.61k
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
5644
1.61k
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5645
                // This is the old version rowset, we could recycle it directly.
5646
1.60k
                ref_count = 1;
5647
1.60k
            } else if (err != TxnErrorCode::TXN_OK) {
5648
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
5649
0
                return -1;
5650
11
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
5651
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
5652
0
                return -1;
5653
0
            }
5654
1.61k
        }
5655
5656
1.61k
        if (ref_count == 1) {
5657
            // It would not be added since it is recycling.
5658
1.61k
            if (delete_rowset_data(rowset_meta) != 0) {
5659
1.60k
                LOG_WARNING("failed to delete rowset data");
5660
1.60k
                return -1;
5661
1.60k
            }
5662
5663
            // Reset the transaction to avoid timeout.
5664
10
            err = txn_kv_->create_txn(&txn);
5665
10
            if (err != TxnErrorCode::TXN_OK) {
5666
0
                LOG_WARNING("failed to create txn").tag("err", err);
5667
0
                return -1;
5668
0
            }
5669
10
            txn->remove(rowset_ref_count_key);
5670
10
            LOG_INFO("delete rowset data ref count key")
5671
10
                    .tag("txn_id", rowset_meta.txn_id())
5672
10
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5673
5674
10
            std::string dbm_start_key =
5675
10
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
5676
10
            std::string dbm_end_key = meta_delete_bitmap_key(
5677
10
                    {reference_instance_id, tablet_id, rowset_id,
5678
10
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
5679
10
            txn->remove(dbm_start_key, dbm_end_key);
5680
10
            LOG_INFO("remove delete bitmap kv")
5681
10
                    .tag("begin", hex(dbm_start_key))
5682
10
                    .tag("end", hex(dbm_end_key));
5683
5684
10
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
5685
10
                    {reference_instance_id, tablet_id, rowset_id});
5686
10
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
5687
10
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
5688
10
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5689
10
            LOG_INFO("remove versioned delete bitmap kv")
5690
10
                    .tag("begin", hex(versioned_dbm_start_key))
5691
10
                    .tag("end", hex(versioned_dbm_end_key));
5692
10
        } else {
5693
            // Decrease the rowset ref count.
5694
            //
5695
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
5696
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
5697
3
            txn->atomic_add(rowset_ref_count_key, -1);
5698
3
            LOG_INFO("decrease rowset data ref count")
5699
3
                    .tag("txn_id", rowset_meta.txn_id())
5700
3
                    .tag("ref_count", ref_count - 1)
5701
3
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5702
3
        }
5703
5704
13
        if (!task.versioned_rowset_key.empty()) {
5705
0
            versioned::document_remove<RowsetMetaCloudPB>(txn.get(), task.versioned_rowset_key,
5706
0
                                                          task.versionstamp);
5707
0
            LOG_INFO("remove versioned meta rowset key").tag("key", hex(task.versioned_rowset_key));
5708
0
        }
5709
5710
13
        if (!task.non_versioned_rowset_key.empty()) {
5711
0
            txn->remove(task.non_versioned_rowset_key);
5712
0
            LOG_INFO("remove non versioned rowset key")
5713
0
                    .tag("key", hex(task.non_versioned_rowset_key));
5714
0
        }
5715
5716
        // empty when recycle ref rowsets for deleted instance
5717
13
        if (!task.recycle_rowset_key.empty()) {
5718
13
            txn->remove(task.recycle_rowset_key);
5719
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(task.recycle_rowset_key));
5720
13
        }
5721
5722
13
        err = txn->commit();
5723
13
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
5724
            // The rowset ref count key has been changed, we need to retry.
5725
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
5726
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5727
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
5728
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5729
0
            continue;
5730
13
        } else if (err != TxnErrorCode::TXN_OK) {
5731
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
5732
0
            return -1;
5733
0
        }
5734
13
        LOG_INFO("recycle rowset meta and data success");
5735
13
        return 0;
5736
13
    }
5737
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
5738
0
            .tag("tablet_id", tablet_id)
5739
0
            .tag("rowset_id", rowset_id)
5740
0
            .tag("retry", MAX_RETRY);
5741
0
    return -1;
5742
1.61k
}
5743
5744
39
int InstanceRecycler::recycle_tmp_rowsets() {
5745
39
    const std::string task_name = "recycle_tmp_rowsets";
5746
39
    int64_t num_scanned = 0;
5747
39
    int64_t num_expired = 0;
5748
39
    std::atomic_long num_recycled = 0;
5749
39
    size_t expired_rowset_size = 0;
5750
39
    size_t total_rowset_key_size = 0;
5751
39
    size_t total_rowset_value_size = 0;
5752
39
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5753
5754
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5755
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5756
39
    std::string tmp_rs_key0;
5757
39
    std::string tmp_rs_key1;
5758
39
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5759
39
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5760
5761
39
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
5762
5763
39
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5764
39
    register_recycle_task(task_name, start_time);
5765
5766
39
    DORIS_CLOUD_DEFER {
5767
39
        unregister_recycle_task(task_name);
5768
39
        int64_t cost =
5769
39
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5770
39
        metrics_context.finish_report();
5771
39
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5772
39
                .tag("instance_id", instance_id_)
5773
39
                .tag("num_scanned", num_scanned)
5774
39
                .tag("num_expired", num_expired)
5775
39
                .tag("num_recycled", num_recycled)
5776
39
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5777
39
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5778
39
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5779
39
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5766
12
    DORIS_CLOUD_DEFER {
5767
12
        unregister_recycle_task(task_name);
5768
12
        int64_t cost =
5769
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5770
12
        metrics_context.finish_report();
5771
12
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5772
12
                .tag("instance_id", instance_id_)
5773
12
                .tag("num_scanned", num_scanned)
5774
12
                .tag("num_expired", num_expired)
5775
12
                .tag("num_recycled", num_recycled)
5776
12
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5777
12
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5778
12
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5779
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5766
27
    DORIS_CLOUD_DEFER {
5767
27
        unregister_recycle_task(task_name);
5768
27
        int64_t cost =
5769
27
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5770
27
        metrics_context.finish_report();
5771
27
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5772
27
                .tag("instance_id", instance_id_)
5773
27
                .tag("num_scanned", num_scanned)
5774
27
                .tag("num_expired", num_expired)
5775
27
                .tag("num_recycled", num_recycled)
5776
27
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5777
27
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5778
27
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5779
27
    };
5780
5781
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
5782
5783
39
    std::vector<std::string> tmp_rowset_keys;
5784
39
    std::vector<std::string> tmp_rowset_ref_count_keys;
5785
39
    std::vector<std::string> tmp_rowset_keys_to_mark_recycled;
5786
39
    std::vector<std::string> tmp_rowset_keys_to_abort;
5787
5788
    // rowset_id -> rowset_meta
5789
    // store tmp_rowset id and meta for statistics rs size when delete
5790
39
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
5791
39
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5792
39
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
5793
39
    worker_pool->start();
5794
5795
39
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5796
5797
39
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
5798
39
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
5799
39
                             &earlest_ts, &tmp_rowset_ref_count_keys,
5800
39
                             &tmp_rowset_keys_to_mark_recycled, &tmp_rowset_keys_to_abort, this,
5801
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5802
106k
        ++num_scanned;
5803
106k
        total_rowset_key_size += k.size();
5804
106k
        total_rowset_value_size += v.size();
5805
106k
        doris::RowsetMetaCloudPB rowset;
5806
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5807
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5808
0
            return -1;
5809
0
        }
5810
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5811
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5812
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5813
0
                   << " txn_expiration=" << rowset.txn_expiration()
5814
0
                   << " rowset_creation_time=" << rowset.creation_time();
5815
106k
        int64_t current_time = ::time(nullptr);
5816
106k
        if (current_time < expiration) { // not expired
5817
0
            return 0;
5818
0
        }
5819
5820
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5821
106k
            if (need_mark_rowset_as_recycled(rowset)) {
5822
52.0k
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5823
52.0k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5824
52.0k
                             "at next turn, instance_id="
5825
52.0k
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5826
52.0k
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5827
52.0k
                return 0;
5828
52.0k
            }
5829
106k
        }
5830
5831
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5832
54.0k
            if (make_deferred_abort_task(rowset).has_value()) {
5833
3
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5834
3
                             "instance_id="
5835
3
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5836
3
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5837
3
                tmp_rowset_keys_to_abort.emplace_back(k);
5838
3
            }
5839
54.0k
        }
5840
5841
54.0k
        ++num_expired;
5842
54.0k
        expired_rowset_size += v.size();
5843
54.0k
        if (!rowset.has_resource_id()) {
5844
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5845
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5846
0
                return -1;
5847
0
            }
5848
            // might be a delete pred rowset
5849
0
            tmp_rowset_keys.emplace_back(k);
5850
0
            return 0;
5851
0
        }
5852
        // TODO(plat1ko): check rowset not referenced
5853
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5854
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5855
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5856
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5857
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5858
54.0k
                  << " num_expired=" << num_expired
5859
54.0k
                  << " task_type=" << metrics_context.operation_type;
5860
5861
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5862
        // Remove the rowset ref count key directly since it has not been used.
5863
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5864
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5865
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5866
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5867
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5868
5869
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5870
54.0k
        return 0;
5871
54.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5801
16
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5802
16
        ++num_scanned;
5803
16
        total_rowset_key_size += k.size();
5804
16
        total_rowset_value_size += v.size();
5805
16
        doris::RowsetMetaCloudPB rowset;
5806
16
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5807
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5808
0
            return -1;
5809
0
        }
5810
16
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5811
16
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5812
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5813
0
                   << " txn_expiration=" << rowset.txn_expiration()
5814
0
                   << " rowset_creation_time=" << rowset.creation_time();
5815
16
        int64_t current_time = ::time(nullptr);
5816
16
        if (current_time < expiration) { // not expired
5817
0
            return 0;
5818
0
        }
5819
5820
16
        if (config::enable_mark_delete_rowset_before_recycle) {
5821
16
            if (need_mark_rowset_as_recycled(rowset)) {
5822
9
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5823
9
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5824
9
                             "at next turn, instance_id="
5825
9
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5826
9
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5827
9
                return 0;
5828
9
            }
5829
16
        }
5830
5831
7
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5832
7
            if (make_deferred_abort_task(rowset).has_value()) {
5833
3
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5834
3
                             "instance_id="
5835
3
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5836
3
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5837
3
                tmp_rowset_keys_to_abort.emplace_back(k);
5838
3
            }
5839
7
        }
5840
5841
7
        ++num_expired;
5842
7
        expired_rowset_size += v.size();
5843
7
        if (!rowset.has_resource_id()) {
5844
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5845
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5846
0
                return -1;
5847
0
            }
5848
            // might be a delete pred rowset
5849
0
            tmp_rowset_keys.emplace_back(k);
5850
0
            return 0;
5851
0
        }
5852
        // TODO(plat1ko): check rowset not referenced
5853
7
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5854
7
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5855
7
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5856
7
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5857
7
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5858
7
                  << " num_expired=" << num_expired
5859
7
                  << " task_type=" << metrics_context.operation_type;
5860
5861
7
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5862
        // Remove the rowset ref count key directly since it has not been used.
5863
7
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5864
7
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5865
7
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5866
7
                  << "key=" << hex(rowset_ref_count_key);
5867
7
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5868
5869
7
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5870
7
        return 0;
5871
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5801
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5802
106k
        ++num_scanned;
5803
106k
        total_rowset_key_size += k.size();
5804
106k
        total_rowset_value_size += v.size();
5805
106k
        doris::RowsetMetaCloudPB rowset;
5806
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5807
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5808
0
            return -1;
5809
0
        }
5810
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5811
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5812
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5813
0
                   << " txn_expiration=" << rowset.txn_expiration()
5814
0
                   << " rowset_creation_time=" << rowset.creation_time();
5815
106k
        int64_t current_time = ::time(nullptr);
5816
106k
        if (current_time < expiration) { // not expired
5817
0
            return 0;
5818
0
        }
5819
5820
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5821
106k
            if (need_mark_rowset_as_recycled(rowset)) {
5822
52.0k
                tmp_rowset_keys_to_mark_recycled.emplace_back(k);
5823
52.0k
                LOG(INFO) << "rowset queued to mark as recycled, recycler will delete data and kv "
5824
52.0k
                             "at next turn, instance_id="
5825
52.0k
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5826
52.0k
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5827
52.0k
                return 0;
5828
52.0k
            }
5829
106k
        }
5830
5831
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5832
54.0k
            if (make_deferred_abort_task(rowset).has_value()) {
5833
0
                LOG(INFO) << "rowset queued to abort related txn or job after current scan batch, "
5834
0
                             "instance_id="
5835
0
                          << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5836
0
                          << rowset.start_version() << '-' << rowset.end_version() << "]";
5837
0
                tmp_rowset_keys_to_abort.emplace_back(k);
5838
0
            }
5839
54.0k
        }
5840
5841
54.0k
        ++num_expired;
5842
54.0k
        expired_rowset_size += v.size();
5843
54.0k
        if (!rowset.has_resource_id()) {
5844
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5845
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5846
0
                return -1;
5847
0
            }
5848
            // might be a delete pred rowset
5849
0
            tmp_rowset_keys.emplace_back(k);
5850
0
            return 0;
5851
0
        }
5852
        // TODO(plat1ko): check rowset not referenced
5853
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5854
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5855
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5856
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5857
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5858
54.0k
                  << " num_expired=" << num_expired
5859
54.0k
                  << " task_type=" << metrics_context.operation_type;
5860
5861
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5862
        // Remove the rowset ref count key directly since it has not been used.
5863
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5864
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5865
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5866
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5867
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5868
5869
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5870
54.0k
        return 0;
5871
54.0k
    };
5872
5873
    // TODO bacth delete
5874
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5875
51.0k
        std::string dbm_start_key =
5876
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5877
51.0k
        std::string dbm_end_key = dbm_start_key;
5878
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5879
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5880
51.0k
        if (ret != 0) {
5881
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5882
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5883
0
                         << ", rowset_id=" << rowset_id;
5884
0
        }
5885
51.0k
        return ret;
5886
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5874
7
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5875
7
        std::string dbm_start_key =
5876
7
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5877
7
        std::string dbm_end_key = dbm_start_key;
5878
7
        encode_int64(INT64_MAX, &dbm_end_key);
5879
7
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5880
7
        if (ret != 0) {
5881
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5882
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5883
0
                         << ", rowset_id=" << rowset_id;
5884
0
        }
5885
7
        return ret;
5886
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5874
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5875
51.0k
        std::string dbm_start_key =
5876
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5877
51.0k
        std::string dbm_end_key = dbm_start_key;
5878
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5879
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5880
51.0k
        if (ret != 0) {
5881
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5882
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5883
0
                         << ", rowset_id=" << rowset_id;
5884
0
        }
5885
51.0k
        return ret;
5886
51.0k
    };
5887
5888
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5889
51.0k
        auto delete_bitmap_start =
5890
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5891
51.0k
        auto delete_bitmap_end =
5892
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5893
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5894
51.0k
        if (ret != 0) {
5895
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5896
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5897
0
        }
5898
51.0k
        return ret;
5899
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5888
7
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5889
7
        auto delete_bitmap_start =
5890
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5891
7
        auto delete_bitmap_end =
5892
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5893
7
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5894
7
        if (ret != 0) {
5895
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5896
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5897
0
        }
5898
7
        return ret;
5899
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5888
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5889
51.0k
        auto delete_bitmap_start =
5890
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5891
51.0k
        auto delete_bitmap_end =
5892
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5893
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5894
51.0k
        if (ret != 0) {
5895
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5896
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5897
0
        }
5898
51.0k
        return ret;
5899
51.0k
    };
5900
5901
39
    auto loop_done = [&]() -> int {
5902
32
        std::vector<std::string> tmp_rowset_keys_to_delete;
5903
32
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
5904
32
        std::vector<std::string> mark_keys_to_process;
5905
32
        std::vector<std::string> abort_keys_to_process;
5906
32
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
5907
32
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
5908
32
        tmp_rowsets_to_delete.swap(tmp_rowsets);
5909
32
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
5910
32
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
5911
32
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
5912
32
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
5913
32
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
5914
32
                             tmp_rowset_ref_count_keys_to_delete =
5915
32
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
5916
32
                             mark_keys_to_process = std::move(mark_keys_to_process),
5917
32
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5918
32
            if (!mark_keys_to_process.empty() &&
5919
32
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5920
16
                                                                  mark_keys_to_process) != 0) {
5921
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5922
0
                             << instance_id_;
5923
0
                return;
5924
0
            }
5925
32
            if (!abort_keys_to_process.empty() &&
5926
32
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5927
3
                                                                      false) != 0) {
5928
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5929
0
                             << instance_id_;
5930
0
                return;
5931
0
            }
5932
32
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5933
32
                                   metrics_context) != 0) {
5934
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5935
3
                return;
5936
3
            }
5937
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5938
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5939
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5940
0
                                 << rs.ShortDebugString();
5941
0
                    return;
5942
0
                }
5943
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5944
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5945
0
                                 << rs.ShortDebugString();
5946
0
                    return;
5947
0
                }
5948
51.0k
            }
5949
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5950
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5951
0
                return;
5952
0
            }
5953
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5954
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5955
0
                return;
5956
0
            }
5957
29
            num_recycled += tmp_rowset_keys_to_delete.size();
5958
29
            return;
5959
29
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5917
12
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5918
12
            if (!mark_keys_to_process.empty() &&
5919
12
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5920
7
                                                                  mark_keys_to_process) != 0) {
5921
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5922
0
                             << instance_id_;
5923
0
                return;
5924
0
            }
5925
12
            if (!abort_keys_to_process.empty() &&
5926
12
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5927
3
                                                                      false) != 0) {
5928
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5929
0
                             << instance_id_;
5930
0
                return;
5931
0
            }
5932
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5933
12
                                   metrics_context) != 0) {
5934
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5935
0
                return;
5936
0
            }
5937
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5938
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5939
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5940
0
                                 << rs.ShortDebugString();
5941
0
                    return;
5942
0
                }
5943
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5944
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5945
0
                                 << rs.ShortDebugString();
5946
0
                    return;
5947
0
                }
5948
7
            }
5949
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5950
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5951
0
                return;
5952
0
            }
5953
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5954
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5955
0
                return;
5956
0
            }
5957
12
            num_recycled += tmp_rowset_keys_to_delete.size();
5958
12
            return;
5959
12
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEvENUlvE_clEv
Line
Count
Source
5917
20
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5918
20
            if (!mark_keys_to_process.empty() &&
5919
20
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5920
9
                                                                  mark_keys_to_process) != 0) {
5921
0
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5922
0
                             << instance_id_;
5923
0
                return;
5924
0
            }
5925
20
            if (!abort_keys_to_process.empty() &&
5926
20
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5927
0
                                                                      false) != 0) {
5928
0
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5929
0
                             << instance_id_;
5930
0
                return;
5931
0
            }
5932
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5933
20
                                   metrics_context) != 0) {
5934
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5935
3
                return;
5936
3
            }
5937
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5938
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5939
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5940
0
                                 << rs.ShortDebugString();
5941
0
                    return;
5942
0
                }
5943
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5944
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5945
0
                                 << rs.ShortDebugString();
5946
0
                    return;
5947
0
                }
5948
51.0k
            }
5949
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5950
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5951
0
                return;
5952
0
            }
5953
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5954
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5955
0
                return;
5956
0
            }
5957
17
            num_recycled += tmp_rowset_keys_to_delete.size();
5958
17
            return;
5959
17
        });
5960
32
        return 0;
5961
32
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
5901
12
    auto loop_done = [&]() -> int {
5902
12
        std::vector<std::string> tmp_rowset_keys_to_delete;
5903
12
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
5904
12
        std::vector<std::string> mark_keys_to_process;
5905
12
        std::vector<std::string> abort_keys_to_process;
5906
12
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
5907
12
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
5908
12
        tmp_rowsets_to_delete.swap(tmp_rowsets);
5909
12
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
5910
12
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
5911
12
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
5912
12
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
5913
12
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
5914
12
                             tmp_rowset_ref_count_keys_to_delete =
5915
12
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
5916
12
                             mark_keys_to_process = std::move(mark_keys_to_process),
5917
12
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5918
12
            if (!mark_keys_to_process.empty() &&
5919
12
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5920
12
                                                                  mark_keys_to_process) != 0) {
5921
12
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5922
12
                             << instance_id_;
5923
12
                return;
5924
12
            }
5925
12
            if (!abort_keys_to_process.empty() &&
5926
12
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5927
12
                                                                      false) != 0) {
5928
12
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5929
12
                             << instance_id_;
5930
12
                return;
5931
12
            }
5932
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5933
12
                                   metrics_context) != 0) {
5934
12
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5935
12
                return;
5936
12
            }
5937
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5938
12
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5939
12
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5940
12
                                 << rs.ShortDebugString();
5941
12
                    return;
5942
12
                }
5943
12
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5944
12
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5945
12
                                 << rs.ShortDebugString();
5946
12
                    return;
5947
12
                }
5948
12
            }
5949
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5950
12
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5951
12
                return;
5952
12
            }
5953
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5954
12
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5955
12
                return;
5956
12
            }
5957
12
            num_recycled += tmp_rowset_keys_to_delete.size();
5958
12
            return;
5959
12
        });
5960
12
        return 0;
5961
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clEv
Line
Count
Source
5901
20
    auto loop_done = [&]() -> int {
5902
20
        std::vector<std::string> tmp_rowset_keys_to_delete;
5903
20
        std::vector<std::string> tmp_rowset_ref_count_keys_to_delete;
5904
20
        std::vector<std::string> mark_keys_to_process;
5905
20
        std::vector<std::string> abort_keys_to_process;
5906
20
        std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets_to_delete;
5907
20
        tmp_rowset_keys_to_delete.swap(tmp_rowset_keys);
5908
20
        tmp_rowsets_to_delete.swap(tmp_rowsets);
5909
20
        tmp_rowset_ref_count_keys_to_delete.swap(tmp_rowset_ref_count_keys);
5910
20
        mark_keys_to_process.swap(tmp_rowset_keys_to_mark_recycled);
5911
20
        abort_keys_to_process.swap(tmp_rowset_keys_to_abort);
5912
20
        worker_pool->submit([&, tmp_rowset_keys_to_delete = std::move(tmp_rowset_keys_to_delete),
5913
20
                             tmp_rowsets_to_delete = std::move(tmp_rowsets_to_delete),
5914
20
                             tmp_rowset_ref_count_keys_to_delete =
5915
20
                                     std::move(tmp_rowset_ref_count_keys_to_delete),
5916
20
                             mark_keys_to_process = std::move(mark_keys_to_process),
5917
20
                             abort_keys_to_process = std::move(abort_keys_to_process)]() mutable {
5918
20
            if (!mark_keys_to_process.empty() &&
5919
20
                batch_mark_rowsets_as_recycled<RowsetMetaCloudPB>(txn_kv_.get(), instance_id_,
5920
20
                                                                  mark_keys_to_process) != 0) {
5921
20
                LOG(WARNING) << "failed to batch mark tmp rowsets as recycled, instance_id="
5922
20
                             << instance_id_;
5923
20
                return;
5924
20
            }
5925
20
            if (!abort_keys_to_process.empty() &&
5926
20
                batch_abort_txn_or_job_for_recycle<RowsetMetaCloudPB>(abort_keys_to_process,
5927
20
                                                                      false) != 0) {
5928
20
                LOG(WARNING) << "failed to batch abort txn or job for releated rowset, instance_id="
5929
20
                             << instance_id_;
5930
20
                return;
5931
20
            }
5932
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5933
20
                                   metrics_context) != 0) {
5934
20
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5935
20
                return;
5936
20
            }
5937
20
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5938
20
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5939
20
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5940
20
                                 << rs.ShortDebugString();
5941
20
                    return;
5942
20
                }
5943
20
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5944
20
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5945
20
                                 << rs.ShortDebugString();
5946
20
                    return;
5947
20
                }
5948
20
            }
5949
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5950
20
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5951
20
                return;
5952
20
            }
5953
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5954
20
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5955
20
                return;
5956
20
            }
5957
20
            num_recycled += tmp_rowset_keys_to_delete.size();
5958
20
            return;
5959
20
        });
5960
20
        return 0;
5961
20
    };
5962
5963
39
    if (config::enable_recycler_stats_metrics) {
5964
0
        scan_and_statistics_tmp_rowsets();
5965
0
    }
5966
    // recycle_func and loop_done for scan and recycle
5967
39
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
5968
39
                               std::move(loop_done));
5969
5970
39
    worker_pool->stop();
5971
5972
    // Report final metrics after all concurrent tasks completed
5973
39
    segment_metrics_context_.report();
5974
39
    metrics_context.report();
5975
5976
39
    return ret;
5977
39
}
5978
5979
int InstanceRecycler::scan_and_recycle(
5980
        std::string begin, std::string_view end,
5981
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
5982
268
        std::function<int()> loop_done) {
5983
268
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
5984
268
    int ret = 0;
5985
268
    int64_t cnt = 0;
5986
268
    int get_range_retried = 0;
5987
268
    std::string err;
5988
268
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5989
268
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5990
268
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5991
268
                  << " ret=" << ret << " err=" << err;
5992
268
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5988
31
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5989
31
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5990
31
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5991
31
                  << " ret=" << ret << " err=" << err;
5992
31
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5988
237
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5989
237
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5990
237
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5991
237
                  << " ret=" << ret << " err=" << err;
5992
237
    };
5993
5994
268
    std::unique_ptr<RangeGetIterator> it;
5995
449
    while (it == nullptr /* may be not init */ || (it->more() && !stopped())) {
5996
321
        if (get_range_retried > 1000) {
5997
0
            err = "txn_get exceeds max retry(1000), may not scan all keys";
5998
0
            ret = -3;
5999
0
            return ret;
6000
0
        }
6001
321
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
6002
321
        if (get_ret != 0) { // txn kv may complain "Request for future version"
6003
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
6004
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
6005
0
                         << " get_range_retried=" << get_range_retried;
6006
0
            ++get_range_retried;
6007
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
6008
0
            continue; // try again
6009
0
        }
6010
321
        if (!it->has_next()) {
6011
140
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
6012
140
            break; // scan finished
6013
140
        }
6014
154k
        while (it->has_next()) {
6015
154k
            ++cnt;
6016
            // recycle corresponding resources
6017
154k
            auto [k, v] = it->next();
6018
154k
            if (!it->has_next()) {
6019
181
                begin = k;
6020
181
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
6021
181
            }
6022
            // FIXME(gavin): if we want to continue scanning, the recycle_func should not return non-zero
6023
154k
            if (recycle_func(k, v) != 0) {
6024
4.00k
                err = "recycle_func error";
6025
4.00k
                ret = -1;
6026
4.00k
            }
6027
154k
        }
6028
181
        begin.push_back('\x00'); // Update to next smallest key for iteration
6029
        // FIXME(gavin): if we want to continue scanning, the loop_done should not return non-zero
6030
181
        if (loop_done && loop_done() != 0) {
6031
4
            err = "loop_done error";
6032
4
            ret = -1;
6033
4
        }
6034
181
    }
6035
268
    return ret;
6036
268
}
6037
6038
19
int InstanceRecycler::abort_timeout_txn() {
6039
19
    const std::string task_name = "abort_timeout_txn";
6040
19
    int64_t num_scanned = 0;
6041
19
    int64_t num_timeout = 0;
6042
19
    int64_t num_abort = 0;
6043
19
    int64_t num_advance = 0;
6044
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6045
6046
19
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
6047
19
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6048
19
    std::string begin_txn_running_key;
6049
19
    std::string end_txn_running_key;
6050
19
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
6051
19
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
6052
6053
19
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
6054
6055
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6056
19
    register_recycle_task(task_name, start_time);
6057
6058
19
    DORIS_CLOUD_DEFER {
6059
19
        unregister_recycle_task(task_name);
6060
19
        int64_t cost =
6061
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6062
19
        metrics_context.finish_report();
6063
19
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6064
19
                .tag("instance_id", instance_id_)
6065
19
                .tag("num_scanned", num_scanned)
6066
19
                .tag("num_timeout", num_timeout)
6067
19
                .tag("num_abort", num_abort)
6068
19
                .tag("num_advance", num_advance);
6069
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
6058
3
    DORIS_CLOUD_DEFER {
6059
3
        unregister_recycle_task(task_name);
6060
3
        int64_t cost =
6061
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6062
3
        metrics_context.finish_report();
6063
3
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6064
3
                .tag("instance_id", instance_id_)
6065
3
                .tag("num_scanned", num_scanned)
6066
3
                .tag("num_timeout", num_timeout)
6067
3
                .tag("num_abort", num_abort)
6068
3
                .tag("num_advance", num_advance);
6069
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
6058
16
    DORIS_CLOUD_DEFER {
6059
16
        unregister_recycle_task(task_name);
6060
16
        int64_t cost =
6061
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6062
16
        metrics_context.finish_report();
6063
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
6064
16
                .tag("instance_id", instance_id_)
6065
16
                .tag("num_scanned", num_scanned)
6066
16
                .tag("num_timeout", num_timeout)
6067
16
                .tag("num_abort", num_abort)
6068
16
                .tag("num_advance", num_advance);
6069
16
    };
6070
6071
19
    int64_t current_time =
6072
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6073
6074
19
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
6075
19
                                  &current_time, &metrics_context,
6076
19
                                  this](std::string_view k, std::string_view v) -> int {
6077
9
        ++num_scanned;
6078
6079
9
        std::unique_ptr<Transaction> txn;
6080
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6081
9
        if (err != TxnErrorCode::TXN_OK) {
6082
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6083
0
            return -1;
6084
0
        }
6085
9
        std::string_view k1 = k;
6086
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6087
9
        k1.remove_prefix(1); // Remove key space
6088
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6089
9
        if (decode_key(&k1, &out) != 0) {
6090
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6091
0
            return -1;
6092
0
        }
6093
9
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6094
9
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6095
9
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6096
        // Update txn_info
6097
9
        std::string txn_inf_key, txn_inf_val;
6098
9
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6099
9
        err = txn->get(txn_inf_key, &txn_inf_val);
6100
9
        if (err != TxnErrorCode::TXN_OK) {
6101
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6102
0
            return -1;
6103
0
        }
6104
9
        TxnInfoPB txn_info;
6105
9
        if (!txn_info.ParseFromString(txn_inf_val)) {
6106
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6107
0
            return -1;
6108
0
        }
6109
6110
9
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6111
3
            txn.reset();
6112
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6113
3
            std::shared_ptr<TxnLazyCommitTask> task =
6114
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6115
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6116
3
            if (ret.first != MetaServiceCode::OK) {
6117
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6118
0
                             << "msg=" << ret.second;
6119
0
                return -1;
6120
0
            }
6121
3
            ++num_advance;
6122
3
            return 0;
6123
6
        } else {
6124
6
            TxnRunningPB txn_running_pb;
6125
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6126
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6127
0
                return -1;
6128
0
            }
6129
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6130
4
                return 0;
6131
4
            }
6132
2
            ++num_timeout;
6133
6134
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6135
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6136
2
            txn_info.set_finish_time(current_time);
6137
2
            txn_info.set_reason("timeout");
6138
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6139
2
            txn_inf_val.clear();
6140
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6141
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6142
0
                return -1;
6143
0
            }
6144
2
            txn->put(txn_inf_key, txn_inf_val);
6145
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6146
            // Put recycle txn key
6147
2
            std::string recyc_txn_key, recyc_txn_val;
6148
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6149
2
            RecycleTxnPB recycle_txn_pb;
6150
2
            recycle_txn_pb.set_creation_time(current_time);
6151
2
            recycle_txn_pb.set_label(txn_info.label());
6152
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6153
0
                LOG_WARNING("failed to serialize txn recycle info")
6154
0
                        .tag("key", hex(k))
6155
0
                        .tag("db_id", db_id)
6156
0
                        .tag("txn_id", txn_id);
6157
0
                return -1;
6158
0
            }
6159
2
            txn->put(recyc_txn_key, recyc_txn_val);
6160
            // Remove txn running key
6161
2
            txn->remove(k);
6162
2
            err = txn->commit();
6163
2
            if (err != TxnErrorCode::TXN_OK) {
6164
0
                LOG_WARNING("failed to commit txn err={}", err)
6165
0
                        .tag("key", hex(k))
6166
0
                        .tag("db_id", db_id)
6167
0
                        .tag("txn_id", txn_id);
6168
0
                return -1;
6169
0
            }
6170
2
            metrics_context.total_recycled_num = ++num_abort;
6171
2
            metrics_context.report();
6172
2
        }
6173
6174
2
        return 0;
6175
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6076
3
                                  this](std::string_view k, std::string_view v) -> int {
6077
3
        ++num_scanned;
6078
6079
3
        std::unique_ptr<Transaction> txn;
6080
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6081
3
        if (err != TxnErrorCode::TXN_OK) {
6082
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6083
0
            return -1;
6084
0
        }
6085
3
        std::string_view k1 = k;
6086
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6087
3
        k1.remove_prefix(1); // Remove key space
6088
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6089
3
        if (decode_key(&k1, &out) != 0) {
6090
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6091
0
            return -1;
6092
0
        }
6093
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6094
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6095
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6096
        // Update txn_info
6097
3
        std::string txn_inf_key, txn_inf_val;
6098
3
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6099
3
        err = txn->get(txn_inf_key, &txn_inf_val);
6100
3
        if (err != TxnErrorCode::TXN_OK) {
6101
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6102
0
            return -1;
6103
0
        }
6104
3
        TxnInfoPB txn_info;
6105
3
        if (!txn_info.ParseFromString(txn_inf_val)) {
6106
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6107
0
            return -1;
6108
0
        }
6109
6110
3
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6111
3
            txn.reset();
6112
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6113
3
            std::shared_ptr<TxnLazyCommitTask> task =
6114
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6115
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6116
3
            if (ret.first != MetaServiceCode::OK) {
6117
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6118
0
                             << "msg=" << ret.second;
6119
0
                return -1;
6120
0
            }
6121
3
            ++num_advance;
6122
3
            return 0;
6123
3
        } else {
6124
0
            TxnRunningPB txn_running_pb;
6125
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6126
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6127
0
                return -1;
6128
0
            }
6129
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6130
0
                return 0;
6131
0
            }
6132
0
            ++num_timeout;
6133
6134
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6135
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6136
0
            txn_info.set_finish_time(current_time);
6137
0
            txn_info.set_reason("timeout");
6138
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6139
0
            txn_inf_val.clear();
6140
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6141
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6142
0
                return -1;
6143
0
            }
6144
0
            txn->put(txn_inf_key, txn_inf_val);
6145
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6146
            // Put recycle txn key
6147
0
            std::string recyc_txn_key, recyc_txn_val;
6148
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6149
0
            RecycleTxnPB recycle_txn_pb;
6150
0
            recycle_txn_pb.set_creation_time(current_time);
6151
0
            recycle_txn_pb.set_label(txn_info.label());
6152
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6153
0
                LOG_WARNING("failed to serialize txn recycle info")
6154
0
                        .tag("key", hex(k))
6155
0
                        .tag("db_id", db_id)
6156
0
                        .tag("txn_id", txn_id);
6157
0
                return -1;
6158
0
            }
6159
0
            txn->put(recyc_txn_key, recyc_txn_val);
6160
            // Remove txn running key
6161
0
            txn->remove(k);
6162
0
            err = txn->commit();
6163
0
            if (err != TxnErrorCode::TXN_OK) {
6164
0
                LOG_WARNING("failed to commit txn err={}", err)
6165
0
                        .tag("key", hex(k))
6166
0
                        .tag("db_id", db_id)
6167
0
                        .tag("txn_id", txn_id);
6168
0
                return -1;
6169
0
            }
6170
0
            metrics_context.total_recycled_num = ++num_abort;
6171
0
            metrics_context.report();
6172
0
        }
6173
6174
0
        return 0;
6175
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6076
6
                                  this](std::string_view k, std::string_view v) -> int {
6077
6
        ++num_scanned;
6078
6079
6
        std::unique_ptr<Transaction> txn;
6080
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6081
6
        if (err != TxnErrorCode::TXN_OK) {
6082
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6083
0
            return -1;
6084
0
        }
6085
6
        std::string_view k1 = k;
6086
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
6087
6
        k1.remove_prefix(1); // Remove key space
6088
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6089
6
        if (decode_key(&k1, &out) != 0) {
6090
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
6091
0
            return -1;
6092
0
        }
6093
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6094
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6095
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6096
        // Update txn_info
6097
6
        std::string txn_inf_key, txn_inf_val;
6098
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6099
6
        err = txn->get(txn_inf_key, &txn_inf_val);
6100
6
        if (err != TxnErrorCode::TXN_OK) {
6101
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
6102
0
            return -1;
6103
0
        }
6104
6
        TxnInfoPB txn_info;
6105
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
6106
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
6107
0
            return -1;
6108
0
        }
6109
6110
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
6111
0
            txn.reset();
6112
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
6113
0
            std::shared_ptr<TxnLazyCommitTask> task =
6114
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
6115
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
6116
0
            if (ret.first != MetaServiceCode::OK) {
6117
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
6118
0
                             << "msg=" << ret.second;
6119
0
                return -1;
6120
0
            }
6121
0
            ++num_advance;
6122
0
            return 0;
6123
6
        } else {
6124
6
            TxnRunningPB txn_running_pb;
6125
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6126
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6127
0
                return -1;
6128
0
            }
6129
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6130
4
                return 0;
6131
4
            }
6132
2
            ++num_timeout;
6133
6134
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
6135
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
6136
2
            txn_info.set_finish_time(current_time);
6137
2
            txn_info.set_reason("timeout");
6138
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
6139
2
            txn_inf_val.clear();
6140
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
6141
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
6142
0
                return -1;
6143
0
            }
6144
2
            txn->put(txn_inf_key, txn_inf_val);
6145
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
6146
            // Put recycle txn key
6147
2
            std::string recyc_txn_key, recyc_txn_val;
6148
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
6149
2
            RecycleTxnPB recycle_txn_pb;
6150
2
            recycle_txn_pb.set_creation_time(current_time);
6151
2
            recycle_txn_pb.set_label(txn_info.label());
6152
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
6153
0
                LOG_WARNING("failed to serialize txn recycle info")
6154
0
                        .tag("key", hex(k))
6155
0
                        .tag("db_id", db_id)
6156
0
                        .tag("txn_id", txn_id);
6157
0
                return -1;
6158
0
            }
6159
2
            txn->put(recyc_txn_key, recyc_txn_val);
6160
            // Remove txn running key
6161
2
            txn->remove(k);
6162
2
            err = txn->commit();
6163
2
            if (err != TxnErrorCode::TXN_OK) {
6164
0
                LOG_WARNING("failed to commit txn err={}", err)
6165
0
                        .tag("key", hex(k))
6166
0
                        .tag("db_id", db_id)
6167
0
                        .tag("txn_id", txn_id);
6168
0
                return -1;
6169
0
            }
6170
2
            metrics_context.total_recycled_num = ++num_abort;
6171
2
            metrics_context.report();
6172
2
        }
6173
6174
2
        return 0;
6175
6
    };
6176
6177
19
    if (config::enable_recycler_stats_metrics) {
6178
0
        scan_and_statistics_abort_timeout_txn();
6179
0
    }
6180
    // recycle_func and loop_done for scan and recycle
6181
19
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
6182
19
                            std::move(handle_txn_running_kv));
6183
19
}
6184
6185
19
int InstanceRecycler::recycle_expired_txn_label() {
6186
19
    const std::string task_name = "recycle_expired_txn_label";
6187
19
    int64_t num_scanned = 0;
6188
19
    int64_t num_expired = 0;
6189
19
    std::atomic_long num_recycled = 0;
6190
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6191
19
    int ret = 0;
6192
6193
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
6194
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6195
19
    std::string begin_recycle_txn_key;
6196
19
    std::string end_recycle_txn_key;
6197
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
6198
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
6199
19
    std::vector<std::string> recycle_txn_info_keys;
6200
6201
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
6202
6203
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6204
19
    register_recycle_task(task_name, start_time);
6205
19
    DORIS_CLOUD_DEFER {
6206
19
        unregister_recycle_task(task_name);
6207
19
        int64_t cost =
6208
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6209
19
        metrics_context.finish_report();
6210
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6211
19
                .tag("instance_id", instance_id_)
6212
19
                .tag("num_scanned", num_scanned)
6213
19
                .tag("num_expired", num_expired)
6214
19
                .tag("num_recycled", num_recycled);
6215
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
6205
1
    DORIS_CLOUD_DEFER {
6206
1
        unregister_recycle_task(task_name);
6207
1
        int64_t cost =
6208
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6209
1
        metrics_context.finish_report();
6210
1
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6211
1
                .tag("instance_id", instance_id_)
6212
1
                .tag("num_scanned", num_scanned)
6213
1
                .tag("num_expired", num_expired)
6214
1
                .tag("num_recycled", num_recycled);
6215
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
6205
18
    DORIS_CLOUD_DEFER {
6206
18
        unregister_recycle_task(task_name);
6207
18
        int64_t cost =
6208
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6209
18
        metrics_context.finish_report();
6210
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
6211
18
                .tag("instance_id", instance_id_)
6212
18
                .tag("num_scanned", num_scanned)
6213
18
                .tag("num_expired", num_expired)
6214
18
                .tag("num_recycled", num_recycled);
6215
18
    };
6216
6217
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6218
6219
19
    SyncExecutor<int> concurrent_delete_executor(
6220
19
            _thread_pool_group.s3_producer_pool,
6221
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
6222
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
6222
1
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
6222
23.0k
            [](const int& ret) { return ret != 0; });
6223
6224
19
    int64_t current_time_ms =
6225
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6226
6227
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6228
30.0k
        ++num_scanned;
6229
30.0k
        RecycleTxnPB recycle_txn_pb;
6230
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6231
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6232
0
            return -1;
6233
0
        }
6234
30.0k
        if ((config::force_immediate_recycle) ||
6235
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6236
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6237
30.0k
             current_time_ms)) {
6238
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6239
23.0k
            num_expired++;
6240
23.0k
            recycle_txn_info_keys.emplace_back(k);
6241
23.0k
        }
6242
30.0k
        return 0;
6243
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6227
1
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6228
1
        ++num_scanned;
6229
1
        RecycleTxnPB recycle_txn_pb;
6230
1
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6231
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6232
0
            return -1;
6233
0
        }
6234
1
        if ((config::force_immediate_recycle) ||
6235
1
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6236
1
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6237
1
             current_time_ms)) {
6238
1
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6239
1
            num_expired++;
6240
1
            recycle_txn_info_keys.emplace_back(k);
6241
1
        }
6242
1
        return 0;
6243
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6227
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6228
30.0k
        ++num_scanned;
6229
30.0k
        RecycleTxnPB recycle_txn_pb;
6230
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6231
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6232
0
            return -1;
6233
0
        }
6234
30.0k
        if ((config::force_immediate_recycle) ||
6235
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6236
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6237
30.0k
             current_time_ms)) {
6238
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6239
23.0k
            num_expired++;
6240
23.0k
            recycle_txn_info_keys.emplace_back(k);
6241
23.0k
        }
6242
30.0k
        return 0;
6243
30.0k
    };
6244
6245
    // int 0 for success, 1 for conflict, -1 for error
6246
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6247
23.0k
        std::string_view k1 = k;
6248
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6249
23.0k
        k1.remove_prefix(1); // Remove key space
6250
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6251
23.0k
        int ret = decode_key(&k1, &out);
6252
23.0k
        if (ret != 0) {
6253
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6254
0
            return -1;
6255
0
        }
6256
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6257
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6258
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6259
23.0k
        std::unique_ptr<Transaction> txn;
6260
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6261
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6262
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6263
0
            return -1;
6264
0
        }
6265
        // Remove txn index kv
6266
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6267
23.0k
        txn->remove(index_key);
6268
        // Remove txn info kv
6269
23.0k
        std::string info_key, info_val;
6270
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6271
23.0k
        err = txn->get(info_key, &info_val);
6272
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6273
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6274
0
            return -1;
6275
0
        }
6276
23.0k
        TxnInfoPB txn_info;
6277
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6278
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6279
0
            return -1;
6280
0
        }
6281
23.0k
        txn->remove(info_key);
6282
        // Remove sub txn index kvs
6283
23.0k
        std::vector<std::string> sub_txn_index_keys;
6284
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6285
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6286
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6287
22.9k
        }
6288
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6289
22.9k
            txn->remove(sub_txn_index_key);
6290
22.9k
        }
6291
        // Update txn label
6292
23.0k
        std::string label_key, label_val;
6293
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6294
23.0k
        err = txn->get(label_key, &label_val);
6295
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6296
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6297
0
                         << " err=" << err;
6298
0
            return -1;
6299
0
        }
6300
23.0k
        TxnLabelPB txn_label;
6301
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6302
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6303
0
            return -1;
6304
0
        }
6305
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6306
23.0k
        if (it != txn_label.txn_ids().end()) {
6307
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6308
23.0k
        }
6309
23.0k
        if (txn_label.txn_ids().empty()) {
6310
23.0k
            txn->remove(label_key);
6311
23.0k
            TEST_SYNC_POINT_CALLBACK(
6312
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6313
23.0k
        } else {
6314
72
            if (!txn_label.SerializeToString(&label_val)) {
6315
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6316
0
                return -1;
6317
0
            }
6318
72
            TEST_SYNC_POINT_CALLBACK(
6319
72
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6320
72
            txn->atomic_set_ver_value(label_key, label_val);
6321
72
            TEST_SYNC_POINT_CALLBACK(
6322
72
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6323
72
        }
6324
        // Remove recycle txn kv
6325
23.0k
        txn->remove(k);
6326
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6327
23.0k
        err = txn->commit();
6328
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6329
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6330
62
                TEST_SYNC_POINT_CALLBACK(
6331
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6332
                // log the txn_id and label
6333
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6334
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6335
62
                             << " txn_label=" << txn_info.label();
6336
62
                return 1;
6337
62
            }
6338
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6339
0
            return -1;
6340
62
        }
6341
23.0k
        ++num_recycled;
6342
6343
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6344
23.0k
        return 0;
6345
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6246
1
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6247
1
        std::string_view k1 = k;
6248
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6249
1
        k1.remove_prefix(1); // Remove key space
6250
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6251
1
        int ret = decode_key(&k1, &out);
6252
1
        if (ret != 0) {
6253
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6254
0
            return -1;
6255
0
        }
6256
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6257
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6258
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6259
1
        std::unique_ptr<Transaction> txn;
6260
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6261
1
        if (err != TxnErrorCode::TXN_OK) {
6262
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6263
0
            return -1;
6264
0
        }
6265
        // Remove txn index kv
6266
1
        auto index_key = txn_index_key({instance_id_, txn_id});
6267
1
        txn->remove(index_key);
6268
        // Remove txn info kv
6269
1
        std::string info_key, info_val;
6270
1
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6271
1
        err = txn->get(info_key, &info_val);
6272
1
        if (err != TxnErrorCode::TXN_OK) {
6273
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6274
0
            return -1;
6275
0
        }
6276
1
        TxnInfoPB txn_info;
6277
1
        if (!txn_info.ParseFromString(info_val)) {
6278
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6279
0
            return -1;
6280
0
        }
6281
1
        txn->remove(info_key);
6282
        // Remove sub txn index kvs
6283
1
        std::vector<std::string> sub_txn_index_keys;
6284
1
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6285
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6286
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
6287
0
        }
6288
1
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6289
0
            txn->remove(sub_txn_index_key);
6290
0
        }
6291
        // Update txn label
6292
1
        std::string label_key, label_val;
6293
1
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6294
1
        err = txn->get(label_key, &label_val);
6295
1
        if (err != TxnErrorCode::TXN_OK) {
6296
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6297
0
                         << " err=" << err;
6298
0
            return -1;
6299
0
        }
6300
1
        TxnLabelPB txn_label;
6301
1
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6302
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6303
0
            return -1;
6304
0
        }
6305
1
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6306
1
        if (it != txn_label.txn_ids().end()) {
6307
1
            txn_label.mutable_txn_ids()->erase(it);
6308
1
        }
6309
1
        if (txn_label.txn_ids().empty()) {
6310
1
            txn->remove(label_key);
6311
1
            TEST_SYNC_POINT_CALLBACK(
6312
1
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6313
1
        } else {
6314
0
            if (!txn_label.SerializeToString(&label_val)) {
6315
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6316
0
                return -1;
6317
0
            }
6318
0
            TEST_SYNC_POINT_CALLBACK(
6319
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6320
0
            txn->atomic_set_ver_value(label_key, label_val);
6321
0
            TEST_SYNC_POINT_CALLBACK(
6322
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6323
0
        }
6324
        // Remove recycle txn kv
6325
1
        txn->remove(k);
6326
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6327
1
        err = txn->commit();
6328
1
        if (err != TxnErrorCode::TXN_OK) {
6329
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
6330
0
                TEST_SYNC_POINT_CALLBACK(
6331
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6332
                // log the txn_id and label
6333
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6334
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6335
0
                             << " txn_label=" << txn_info.label();
6336
0
                return 1;
6337
0
            }
6338
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6339
0
            return -1;
6340
0
        }
6341
1
        ++num_recycled;
6342
6343
1
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6344
1
        return 0;
6345
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6246
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6247
23.0k
        std::string_view k1 = k;
6248
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6249
23.0k
        k1.remove_prefix(1); // Remove key space
6250
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6251
23.0k
        int ret = decode_key(&k1, &out);
6252
23.0k
        if (ret != 0) {
6253
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6254
0
            return -1;
6255
0
        }
6256
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6257
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6258
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6259
23.0k
        std::unique_ptr<Transaction> txn;
6260
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6261
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6262
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6263
0
            return -1;
6264
0
        }
6265
        // Remove txn index kv
6266
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6267
23.0k
        txn->remove(index_key);
6268
        // Remove txn info kv
6269
23.0k
        std::string info_key, info_val;
6270
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6271
23.0k
        err = txn->get(info_key, &info_val);
6272
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6273
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6274
0
            return -1;
6275
0
        }
6276
23.0k
        TxnInfoPB txn_info;
6277
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6278
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6279
0
            return -1;
6280
0
        }
6281
23.0k
        txn->remove(info_key);
6282
        // Remove sub txn index kvs
6283
23.0k
        std::vector<std::string> sub_txn_index_keys;
6284
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6285
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6286
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6287
22.9k
        }
6288
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6289
22.9k
            txn->remove(sub_txn_index_key);
6290
22.9k
        }
6291
        // Update txn label
6292
23.0k
        std::string label_key, label_val;
6293
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6294
23.0k
        err = txn->get(label_key, &label_val);
6295
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6296
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6297
0
                         << " err=" << err;
6298
0
            return -1;
6299
0
        }
6300
23.0k
        TxnLabelPB txn_label;
6301
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6302
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6303
0
            return -1;
6304
0
        }
6305
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6306
23.0k
        if (it != txn_label.txn_ids().end()) {
6307
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6308
23.0k
        }
6309
23.0k
        if (txn_label.txn_ids().empty()) {
6310
23.0k
            txn->remove(label_key);
6311
23.0k
            TEST_SYNC_POINT_CALLBACK(
6312
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6313
23.0k
        } else {
6314
72
            if (!txn_label.SerializeToString(&label_val)) {
6315
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6316
0
                return -1;
6317
0
            }
6318
72
            TEST_SYNC_POINT_CALLBACK(
6319
72
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6320
72
            txn->atomic_set_ver_value(label_key, label_val);
6321
72
            TEST_SYNC_POINT_CALLBACK(
6322
72
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6323
72
        }
6324
        // Remove recycle txn kv
6325
23.0k
        txn->remove(k);
6326
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6327
23.0k
        err = txn->commit();
6328
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6329
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6330
62
                TEST_SYNC_POINT_CALLBACK(
6331
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6332
                // log the txn_id and label
6333
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6334
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6335
62
                             << " txn_label=" << txn_info.label();
6336
62
                return 1;
6337
62
            }
6338
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6339
0
            return -1;
6340
62
        }
6341
23.0k
        ++num_recycled;
6342
6343
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6344
23.0k
        return 0;
6345
23.0k
    };
6346
6347
19
    auto loop_done = [&]() -> int {
6348
10
        DORIS_CLOUD_DEFER {
6349
10
            recycle_txn_info_keys.clear();
6350
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6348
1
        DORIS_CLOUD_DEFER {
6349
1
            recycle_txn_info_keys.clear();
6350
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6348
9
        DORIS_CLOUD_DEFER {
6349
9
            recycle_txn_info_keys.clear();
6350
9
        };
6351
10
        TEST_SYNC_POINT_CALLBACK(
6352
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6353
10
                &recycle_txn_info_keys);
6354
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6355
23.0k
            concurrent_delete_executor.add([&]() {
6356
23.0k
                int ret = delete_recycle_txn_kv(k);
6357
23.0k
                if (ret == 1) {
6358
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6359
54
                    for (int i = 1; i <= max_retry; ++i) {
6360
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6361
54
                        ret = delete_recycle_txn_kv(k);
6362
                        // clang-format off
6363
54
                        TEST_SYNC_POINT_CALLBACK(
6364
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6365
                        // clang-format off
6366
54
                        if (ret != 1) {
6367
18
                            break;
6368
18
                        }
6369
                        // random sleep 0-100 ms to retry
6370
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6371
36
                    }
6372
18
                }
6373
23.0k
                if (ret != 0) {
6374
9
                    LOG_WARNING("failed to delete recycle txn kv")
6375
9
                            .tag("instance id", instance_id_)
6376
9
                            .tag("key", hex(k));
6377
9
                    return -1;
6378
9
                }
6379
23.0k
                return 0;
6380
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6355
1
            concurrent_delete_executor.add([&]() {
6356
1
                int ret = delete_recycle_txn_kv(k);
6357
1
                if (ret == 1) {
6358
0
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6359
0
                    for (int i = 1; i <= max_retry; ++i) {
6360
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6361
0
                        ret = delete_recycle_txn_kv(k);
6362
                        // clang-format off
6363
0
                        TEST_SYNC_POINT_CALLBACK(
6364
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6365
                        // clang-format off
6366
0
                        if (ret != 1) {
6367
0
                            break;
6368
0
                        }
6369
                        // random sleep 0-100 ms to retry
6370
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6371
0
                    }
6372
0
                }
6373
1
                if (ret != 0) {
6374
0
                    LOG_WARNING("failed to delete recycle txn kv")
6375
0
                            .tag("instance id", instance_id_)
6376
0
                            .tag("key", hex(k));
6377
0
                    return -1;
6378
0
                }
6379
1
                return 0;
6380
1
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6355
23.0k
            concurrent_delete_executor.add([&]() {
6356
23.0k
                int ret = delete_recycle_txn_kv(k);
6357
23.0k
                if (ret == 1) {
6358
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6359
54
                    for (int i = 1; i <= max_retry; ++i) {
6360
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6361
54
                        ret = delete_recycle_txn_kv(k);
6362
                        // clang-format off
6363
54
                        TEST_SYNC_POINT_CALLBACK(
6364
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6365
                        // clang-format off
6366
54
                        if (ret != 1) {
6367
18
                            break;
6368
18
                        }
6369
                        // random sleep 0-100 ms to retry
6370
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6371
36
                    }
6372
18
                }
6373
23.0k
                if (ret != 0) {
6374
9
                    LOG_WARNING("failed to delete recycle txn kv")
6375
9
                            .tag("instance id", instance_id_)
6376
9
                            .tag("key", hex(k));
6377
9
                    return -1;
6378
9
                }
6379
23.0k
                return 0;
6380
23.0k
            });
6381
23.0k
        }
6382
10
        bool finished = true;
6383
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6384
23.0k
        for (int r : rets) {
6385
23.0k
            if (r != 0) {
6386
9
                ret = -1;
6387
9
            }
6388
23.0k
        }
6389
6390
10
        ret = finished ? ret : -1;
6391
6392
        // Update metrics after all concurrent tasks completed
6393
10
        metrics_context.total_recycled_num = num_recycled.load();
6394
10
        metrics_context.report();
6395
6396
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6397
6398
10
        if (ret != 0) {
6399
3
            LOG_WARNING("recycle txn kv ret!=0")
6400
3
                    .tag("finished", finished)
6401
3
                    .tag("ret", ret)
6402
3
                    .tag("instance_id", instance_id_);
6403
3
            return ret;
6404
3
        }
6405
7
        return ret;
6406
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6347
1
    auto loop_done = [&]() -> int {
6348
1
        DORIS_CLOUD_DEFER {
6349
1
            recycle_txn_info_keys.clear();
6350
1
        };
6351
1
        TEST_SYNC_POINT_CALLBACK(
6352
1
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6353
1
                &recycle_txn_info_keys);
6354
1
        for (const auto& k : recycle_txn_info_keys) {
6355
1
            concurrent_delete_executor.add([&]() {
6356
1
                int ret = delete_recycle_txn_kv(k);
6357
1
                if (ret == 1) {
6358
1
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6359
1
                    for (int i = 1; i <= max_retry; ++i) {
6360
1
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6361
1
                        ret = delete_recycle_txn_kv(k);
6362
                        // clang-format off
6363
1
                        TEST_SYNC_POINT_CALLBACK(
6364
1
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6365
                        // clang-format off
6366
1
                        if (ret != 1) {
6367
1
                            break;
6368
1
                        }
6369
                        // random sleep 0-100 ms to retry
6370
1
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6371
1
                    }
6372
1
                }
6373
1
                if (ret != 0) {
6374
1
                    LOG_WARNING("failed to delete recycle txn kv")
6375
1
                            .tag("instance id", instance_id_)
6376
1
                            .tag("key", hex(k));
6377
1
                    return -1;
6378
1
                }
6379
1
                return 0;
6380
1
            });
6381
1
        }
6382
1
        bool finished = true;
6383
1
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6384
1
        for (int r : rets) {
6385
1
            if (r != 0) {
6386
0
                ret = -1;
6387
0
            }
6388
1
        }
6389
6390
1
        ret = finished ? ret : -1;
6391
6392
        // Update metrics after all concurrent tasks completed
6393
1
        metrics_context.total_recycled_num = num_recycled.load();
6394
1
        metrics_context.report();
6395
6396
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6397
6398
1
        if (ret != 0) {
6399
0
            LOG_WARNING("recycle txn kv ret!=0")
6400
0
                    .tag("finished", finished)
6401
0
                    .tag("ret", ret)
6402
0
                    .tag("instance_id", instance_id_);
6403
0
            return ret;
6404
0
        }
6405
1
        return ret;
6406
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6347
9
    auto loop_done = [&]() -> int {
6348
9
        DORIS_CLOUD_DEFER {
6349
9
            recycle_txn_info_keys.clear();
6350
9
        };
6351
9
        TEST_SYNC_POINT_CALLBACK(
6352
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6353
9
                &recycle_txn_info_keys);
6354
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6355
23.0k
            concurrent_delete_executor.add([&]() {
6356
23.0k
                int ret = delete_recycle_txn_kv(k);
6357
23.0k
                if (ret == 1) {
6358
23.0k
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6359
23.0k
                    for (int i = 1; i <= max_retry; ++i) {
6360
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6361
23.0k
                        ret = delete_recycle_txn_kv(k);
6362
                        // clang-format off
6363
23.0k
                        TEST_SYNC_POINT_CALLBACK(
6364
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6365
                        // clang-format off
6366
23.0k
                        if (ret != 1) {
6367
23.0k
                            break;
6368
23.0k
                        }
6369
                        // random sleep 0-100 ms to retry
6370
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6371
23.0k
                    }
6372
23.0k
                }
6373
23.0k
                if (ret != 0) {
6374
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
6375
23.0k
                            .tag("instance id", instance_id_)
6376
23.0k
                            .tag("key", hex(k));
6377
23.0k
                    return -1;
6378
23.0k
                }
6379
23.0k
                return 0;
6380
23.0k
            });
6381
23.0k
        }
6382
9
        bool finished = true;
6383
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6384
23.0k
        for (int r : rets) {
6385
23.0k
            if (r != 0) {
6386
9
                ret = -1;
6387
9
            }
6388
23.0k
        }
6389
6390
9
        ret = finished ? ret : -1;
6391
6392
        // Update metrics after all concurrent tasks completed
6393
9
        metrics_context.total_recycled_num = num_recycled.load();
6394
9
        metrics_context.report();
6395
6396
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6397
6398
9
        if (ret != 0) {
6399
3
            LOG_WARNING("recycle txn kv ret!=0")
6400
3
                    .tag("finished", finished)
6401
3
                    .tag("ret", ret)
6402
3
                    .tag("instance_id", instance_id_);
6403
3
            return ret;
6404
3
        }
6405
6
        return ret;
6406
9
    };
6407
6408
19
    if (config::enable_recycler_stats_metrics) {
6409
0
        scan_and_statistics_expired_txn_label();
6410
0
    }
6411
    // recycle_func and loop_done for scan and recycle
6412
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
6413
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
6414
19
}
6415
6416
struct CopyJobIdTuple {
6417
    std::string instance_id;
6418
    std::string stage_id;
6419
    long table_id;
6420
    std::string copy_id;
6421
    std::string stage_path;
6422
};
6423
struct BatchObjStoreAccessor {
6424
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
6425
                          TxnKv* txn_kv)
6426
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
6427
3
    ~BatchObjStoreAccessor() {
6428
3
        if (!paths_.empty()) {
6429
3
            consume();
6430
3
        }
6431
3
    }
6432
6433
    /**
6434
    * To implicitely do batch work and submit the batch delete task to s3
6435
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
6436
    *
6437
    * @param copy_job The protubuf struct consists of the copy job files.
6438
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
6439
    *            it would last until we finish the delete task, here we need pass one string value
6440
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
6441
    */
6442
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
6443
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
6444
5
        auto& file_keys = copy_file_keys_[key];
6445
5
        file_keys.log_trace =
6446
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
6447
5
                            instance_id, stage_id, table_id, copy_id, path);
6448
5
        std::string_view log_trace = file_keys.log_trace;
6449
2.03k
        for (const auto& file : copy_job.object_files()) {
6450
2.03k
            auto relative_path = file.relative_path();
6451
2.03k
            paths_.push_back(relative_path);
6452
2.03k
            file_keys.keys.push_back(copy_file_key(
6453
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
6454
2.03k
            LOG_INFO(log_trace)
6455
2.03k
                    .tag("relative_path", relative_path)
6456
2.03k
                    .tag("batch_count", batch_count_);
6457
2.03k
        }
6458
5
        LOG_INFO(log_trace)
6459
5
                .tag("objects_num", copy_job.object_files().size())
6460
5
                .tag("batch_count", batch_count_);
6461
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
6462
        // recommend using delete objects when objects num is less than 10)
6463
5
        if (paths_.size() < 1000) {
6464
3
            return;
6465
3
        }
6466
2
        consume();
6467
2
    }
6468
6469
private:
6470
5
    void consume() {
6471
5
        DORIS_CLOUD_DEFER {
6472
5
            paths_.clear();
6473
5
            copy_file_keys_.clear();
6474
5
            batch_count_++;
6475
6476
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
6477
5
                        batch_count_);
6478
5
        };
6479
6480
5
        StopWatch sw;
6481
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
6482
5
        if (0 != accessor_->delete_files(paths_)) {
6483
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
6484
2
                        paths_.size(), batch_count_, sw.elapsed_us());
6485
2
            return;
6486
2
        }
6487
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
6488
3
                    paths_.size(), batch_count_, sw.elapsed_us());
6489
        // delete fdb's keys
6490
3
        for (auto& file_keys : copy_file_keys_) {
6491
3
            auto& [log_trace, keys] = file_keys.second;
6492
3
            std::unique_ptr<Transaction> txn;
6493
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
6494
0
                LOG(WARNING) << "failed to create txn";
6495
0
                continue;
6496
0
            }
6497
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6498
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6499
            // limited, should not cause the txn commit failed.
6500
1.02k
            for (const auto& key : keys) {
6501
1.02k
                txn->remove(key);
6502
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
6503
1.02k
            }
6504
3
            txn->remove(file_keys.first);
6505
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
6506
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
6507
0
                continue;
6508
0
            }
6509
3
        }
6510
3
    }
6511
    std::shared_ptr<StorageVaultAccessor> accessor_;
6512
    // the path of the s3 files to be deleted
6513
    std::vector<std::string> paths_;
6514
    struct CopyFiles {
6515
        std::string log_trace;
6516
        std::vector<std::string> keys;
6517
    };
6518
    // pair<std::string, std::vector<std::string>>
6519
    // first: instance_id_ stage_id table_id query_id
6520
    // second: keys to be deleted
6521
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
6522
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
6523
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
6524
    // which can together uniquely identifies different tasks for tracing log
6525
    uint64_t& batch_count_;
6526
    TxnKv* txn_kv_;
6527
};
6528
6529
13
int InstanceRecycler::recycle_copy_jobs() {
6530
13
    int64_t num_scanned = 0;
6531
13
    int64_t num_finished = 0;
6532
13
    int64_t num_expired = 0;
6533
13
    int64_t num_recycled = 0;
6534
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
6535
13
    uint64_t batch_count = 0;
6536
13
    const std::string task_name = "recycle_copy_jobs";
6537
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6538
6539
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
6540
6541
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6542
13
    register_recycle_task(task_name, start_time);
6543
6544
13
    DORIS_CLOUD_DEFER {
6545
13
        unregister_recycle_task(task_name);
6546
13
        int64_t cost =
6547
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6548
13
        metrics_context.finish_report();
6549
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6550
13
                .tag("instance_id", instance_id_)
6551
13
                .tag("num_scanned", num_scanned)
6552
13
                .tag("num_finished", num_finished)
6553
13
                .tag("num_expired", num_expired)
6554
13
                .tag("num_recycled", num_recycled);
6555
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
6544
13
    DORIS_CLOUD_DEFER {
6545
13
        unregister_recycle_task(task_name);
6546
13
        int64_t cost =
6547
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6548
13
        metrics_context.finish_report();
6549
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6550
13
                .tag("instance_id", instance_id_)
6551
13
                .tag("num_scanned", num_scanned)
6552
13
                .tag("num_finished", num_finished)
6553
13
                .tag("num_expired", num_expired)
6554
13
                .tag("num_recycled", num_recycled);
6555
13
    };
6556
6557
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6558
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6559
13
    std::string key0;
6560
13
    std::string key1;
6561
13
    copy_job_key(key_info0, &key0);
6562
13
    copy_job_key(key_info1, &key1);
6563
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
6564
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
6565
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
6566
16
                         this](std::string_view k, std::string_view v) -> int {
6567
16
        ++num_scanned;
6568
16
        CopyJobPB copy_job;
6569
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6570
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6571
0
            return -1;
6572
0
        }
6573
6574
        // decode copy job key
6575
16
        auto k1 = k;
6576
16
        k1.remove_prefix(1);
6577
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6578
16
        decode_key(&k1, &out);
6579
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6580
        // -> CopyJobPB
6581
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6582
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6583
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6584
6585
16
        bool check_storage = true;
6586
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6587
12
            ++num_finished;
6588
6589
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6590
7
                auto it = stage_accessor_map.find(stage_id);
6591
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6592
7
                std::string_view path;
6593
7
                if (it != stage_accessor_map.end()) {
6594
2
                    accessor = it->second;
6595
5
                } else {
6596
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6597
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6598
5
                                                      &inner_accessor);
6599
5
                    if (ret < 0) { // error
6600
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6601
0
                        return -1;
6602
5
                    } else if (ret == 0) {
6603
3
                        path = inner_accessor->uri();
6604
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6605
3
                                inner_accessor, batch_count, txn_kv_.get());
6606
3
                        stage_accessor_map.emplace(stage_id, accessor);
6607
3
                    } else { // stage not found, skip check storage
6608
2
                        check_storage = false;
6609
2
                    }
6610
5
                }
6611
7
                if (check_storage) {
6612
                    // TODO delete objects with key and etag is not supported
6613
5
                    accessor->add(std::move(copy_job), std::string(k),
6614
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6615
5
                    return 0;
6616
5
                }
6617
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6618
5
                int64_t current_time =
6619
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6620
5
                if (copy_job.finish_time_ms() > 0) {
6621
2
                    if (!config::force_immediate_recycle &&
6622
2
                        current_time < copy_job.finish_time_ms() +
6623
2
                                               config::copy_job_max_retention_second * 1000) {
6624
1
                        return 0;
6625
1
                    }
6626
3
                } else {
6627
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6628
3
                    if (!config::force_immediate_recycle &&
6629
3
                        current_time < copy_job.start_time_ms() +
6630
3
                                               config::copy_job_max_retention_second * 1000) {
6631
1
                        return 0;
6632
1
                    }
6633
3
                }
6634
5
            }
6635
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6636
4
            int64_t current_time =
6637
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6638
            // if copy job is timeout: delete all copy file kvs and copy job kv
6639
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6640
2
                return 0;
6641
2
            }
6642
2
            ++num_expired;
6643
2
        }
6644
6645
        // delete all copy files
6646
7
        std::vector<std::string> copy_file_keys;
6647
70
        for (auto& file : copy_job.object_files()) {
6648
70
            copy_file_keys.push_back(copy_file_key(
6649
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6650
70
        }
6651
7
        std::unique_ptr<Transaction> txn;
6652
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6653
0
            LOG(WARNING) << "failed to create txn";
6654
0
            return -1;
6655
0
        }
6656
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6657
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6658
        // limited, should not cause the txn commit failed.
6659
70
        for (const auto& key : copy_file_keys) {
6660
70
            txn->remove(key);
6661
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6662
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6663
70
                      << ", query_id=" << copy_id;
6664
70
        }
6665
7
        txn->remove(k);
6666
7
        TxnErrorCode err = txn->commit();
6667
7
        if (err != TxnErrorCode::TXN_OK) {
6668
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6669
0
            return -1;
6670
0
        }
6671
6672
7
        metrics_context.total_recycled_num = ++num_recycled;
6673
7
        metrics_context.report();
6674
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6675
7
        return 0;
6676
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6566
16
                         this](std::string_view k, std::string_view v) -> int {
6567
16
        ++num_scanned;
6568
16
        CopyJobPB copy_job;
6569
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6570
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6571
0
            return -1;
6572
0
        }
6573
6574
        // decode copy job key
6575
16
        auto k1 = k;
6576
16
        k1.remove_prefix(1);
6577
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6578
16
        decode_key(&k1, &out);
6579
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6580
        // -> CopyJobPB
6581
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6582
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6583
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6584
6585
16
        bool check_storage = true;
6586
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6587
12
            ++num_finished;
6588
6589
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6590
7
                auto it = stage_accessor_map.find(stage_id);
6591
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6592
7
                std::string_view path;
6593
7
                if (it != stage_accessor_map.end()) {
6594
2
                    accessor = it->second;
6595
5
                } else {
6596
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6597
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6598
5
                                                      &inner_accessor);
6599
5
                    if (ret < 0) { // error
6600
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6601
0
                        return -1;
6602
5
                    } else if (ret == 0) {
6603
3
                        path = inner_accessor->uri();
6604
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6605
3
                                inner_accessor, batch_count, txn_kv_.get());
6606
3
                        stage_accessor_map.emplace(stage_id, accessor);
6607
3
                    } else { // stage not found, skip check storage
6608
2
                        check_storage = false;
6609
2
                    }
6610
5
                }
6611
7
                if (check_storage) {
6612
                    // TODO delete objects with key and etag is not supported
6613
5
                    accessor->add(std::move(copy_job), std::string(k),
6614
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6615
5
                    return 0;
6616
5
                }
6617
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6618
5
                int64_t current_time =
6619
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6620
5
                if (copy_job.finish_time_ms() > 0) {
6621
2
                    if (!config::force_immediate_recycle &&
6622
2
                        current_time < copy_job.finish_time_ms() +
6623
2
                                               config::copy_job_max_retention_second * 1000) {
6624
1
                        return 0;
6625
1
                    }
6626
3
                } else {
6627
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6628
3
                    if (!config::force_immediate_recycle &&
6629
3
                        current_time < copy_job.start_time_ms() +
6630
3
                                               config::copy_job_max_retention_second * 1000) {
6631
1
                        return 0;
6632
1
                    }
6633
3
                }
6634
5
            }
6635
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6636
4
            int64_t current_time =
6637
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6638
            // if copy job is timeout: delete all copy file kvs and copy job kv
6639
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6640
2
                return 0;
6641
2
            }
6642
2
            ++num_expired;
6643
2
        }
6644
6645
        // delete all copy files
6646
7
        std::vector<std::string> copy_file_keys;
6647
70
        for (auto& file : copy_job.object_files()) {
6648
70
            copy_file_keys.push_back(copy_file_key(
6649
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6650
70
        }
6651
7
        std::unique_ptr<Transaction> txn;
6652
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6653
0
            LOG(WARNING) << "failed to create txn";
6654
0
            return -1;
6655
0
        }
6656
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6657
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6658
        // limited, should not cause the txn commit failed.
6659
70
        for (const auto& key : copy_file_keys) {
6660
70
            txn->remove(key);
6661
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6662
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6663
70
                      << ", query_id=" << copy_id;
6664
70
        }
6665
7
        txn->remove(k);
6666
7
        TxnErrorCode err = txn->commit();
6667
7
        if (err != TxnErrorCode::TXN_OK) {
6668
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6669
0
            return -1;
6670
0
        }
6671
6672
7
        metrics_context.total_recycled_num = ++num_recycled;
6673
7
        metrics_context.report();
6674
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6675
7
        return 0;
6676
7
    };
6677
6678
13
    if (config::enable_recycler_stats_metrics) {
6679
0
        scan_and_statistics_copy_jobs();
6680
0
    }
6681
    // recycle_func and loop_done for scan and recycle
6682
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
6683
13
}
6684
6685
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
6686
                                             const StagePB::StageType& stage_type,
6687
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
6688
5
#ifdef UNIT_TEST
6689
    // In unit test, external use the same accessor as the internal stage
6690
5
    auto it = accessor_map_.find(stage_id);
6691
5
    if (it != accessor_map_.end()) {
6692
3
        *accessor = it->second;
6693
3
    } else {
6694
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
6695
2
        return 1;
6696
2
    }
6697
#else
6698
    // init s3 accessor and add to accessor map
6699
    auto stage_it =
6700
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
6701
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
6702
6703
    if (stage_it == instance_info_.stages().end()) {
6704
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
6705
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
6706
        return 1;
6707
    }
6708
6709
    const auto& object_store_info = stage_it->obj_info();
6710
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
6711
6712
    S3Conf s3_conf;
6713
    if (stage_type == StagePB::EXTERNAL) {
6714
        if (stage_access_type == StagePB::AKSK) {
6715
            auto conf = S3Conf::from_obj_store_info(object_store_info);
6716
            if (!conf) {
6717
                return -1;
6718
            }
6719
6720
            s3_conf = std::move(*conf);
6721
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
6722
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
6723
            if (!conf) {
6724
                return -1;
6725
            }
6726
6727
            s3_conf = std::move(*conf);
6728
            if (instance_info_.ram_user().has_encryption_info()) {
6729
                AkSkPair plain_ak_sk_pair;
6730
                int ret = decrypt_ak_sk_helper(
6731
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
6732
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
6733
                if (ret != 0) {
6734
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
6735
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
6736
                    return -1;
6737
                }
6738
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
6739
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
6740
            } else {
6741
                s3_conf.ak = instance_info_.ram_user().ak();
6742
                s3_conf.sk = instance_info_.ram_user().sk();
6743
            }
6744
        } else {
6745
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
6746
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
6747
            return -1;
6748
        }
6749
    } else if (stage_type == StagePB::INTERNAL) {
6750
        int idx = stoi(object_store_info.id());
6751
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6752
            LOG(WARNING) << "invalid idx: " << idx;
6753
            return -1;
6754
        }
6755
6756
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6757
        auto conf = S3Conf::from_obj_store_info(old_obj);
6758
        if (!conf) {
6759
            return -1;
6760
        }
6761
6762
        s3_conf = std::move(*conf);
6763
        s3_conf.prefix = object_store_info.prefix();
6764
    } else {
6765
        LOG(WARNING) << "unknown stage type " << stage_type;
6766
        return -1;
6767
    }
6768
6769
    std::shared_ptr<S3Accessor> s3_accessor;
6770
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
6771
    if (ret != 0) {
6772
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
6773
        return -1;
6774
    }
6775
6776
    *accessor = std::move(s3_accessor);
6777
#endif
6778
3
    return 0;
6779
5
}
6780
6781
11
int InstanceRecycler::recycle_stage() {
6782
11
    int64_t num_scanned = 0;
6783
11
    int64_t num_recycled = 0;
6784
11
    const std::string task_name = "recycle_stage";
6785
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6786
6787
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
6788
6789
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6790
11
    register_recycle_task(task_name, start_time);
6791
6792
11
    DORIS_CLOUD_DEFER {
6793
11
        unregister_recycle_task(task_name);
6794
11
        int64_t cost =
6795
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6796
11
        metrics_context.finish_report();
6797
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6798
11
                .tag("instance_id", instance_id_)
6799
11
                .tag("num_scanned", num_scanned)
6800
11
                .tag("num_recycled", num_recycled);
6801
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
6792
11
    DORIS_CLOUD_DEFER {
6793
11
        unregister_recycle_task(task_name);
6794
11
        int64_t cost =
6795
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6796
11
        metrics_context.finish_report();
6797
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6798
11
                .tag("instance_id", instance_id_)
6799
11
                .tag("num_scanned", num_scanned)
6800
11
                .tag("num_recycled", num_recycled);
6801
11
    };
6802
6803
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6804
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6805
11
    std::string key0 = recycle_stage_key(key_info0);
6806
11
    std::string key1 = recycle_stage_key(key_info1);
6807
6808
11
    std::vector<std::string_view> stage_keys;
6809
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
6810
11
                         this](std::string_view k, std::string_view v) -> int {
6811
1
        ++num_scanned;
6812
1
        RecycleStagePB recycle_stage;
6813
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6814
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6815
0
            return -1;
6816
0
        }
6817
6818
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6819
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6820
0
            LOG(WARNING) << "invalid idx: " << idx;
6821
0
            return -1;
6822
0
        }
6823
6824
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6825
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6826
1
                [&] {
6827
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6828
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6829
1
                    if (!s3_conf) {
6830
1
                        return -1;
6831
1
                    }
6832
6833
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6834
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6835
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6836
1
                    if (ret != 0) {
6837
1
                        return -1;
6838
1
                    }
6839
6840
1
                    accessor = std::move(s3_accessor);
6841
1
                    return 0;
6842
1
                }(),
6843
1
                "recycle_stage:get_accessor", &accessor);
6844
6845
1
        if (ret != 0) {
6846
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6847
0
            return ret;
6848
0
        }
6849
6850
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6851
1
                .tag("instance_id", instance_id_)
6852
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6853
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6854
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6855
1
                .tag("obj_info_id", idx)
6856
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6857
1
        ret = accessor->delete_all();
6858
1
        if (ret != 0) {
6859
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6860
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6861
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6862
0
                         << ", ret=" << ret;
6863
0
            return -1;
6864
0
        }
6865
1
        metrics_context.total_recycled_num = ++num_recycled;
6866
1
        metrics_context.report();
6867
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6868
1
        stage_keys.push_back(k);
6869
1
        return 0;
6870
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6810
1
                         this](std::string_view k, std::string_view v) -> int {
6811
1
        ++num_scanned;
6812
1
        RecycleStagePB recycle_stage;
6813
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6814
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6815
0
            return -1;
6816
0
        }
6817
6818
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6819
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6820
0
            LOG(WARNING) << "invalid idx: " << idx;
6821
0
            return -1;
6822
0
        }
6823
6824
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6825
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6826
1
                [&] {
6827
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6828
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6829
1
                    if (!s3_conf) {
6830
1
                        return -1;
6831
1
                    }
6832
6833
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6834
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6835
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6836
1
                    if (ret != 0) {
6837
1
                        return -1;
6838
1
                    }
6839
6840
1
                    accessor = std::move(s3_accessor);
6841
1
                    return 0;
6842
1
                }(),
6843
1
                "recycle_stage:get_accessor", &accessor);
6844
6845
1
        if (ret != 0) {
6846
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6847
0
            return ret;
6848
0
        }
6849
6850
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6851
1
                .tag("instance_id", instance_id_)
6852
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6853
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6854
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6855
1
                .tag("obj_info_id", idx)
6856
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6857
1
        ret = accessor->delete_all();
6858
1
        if (ret != 0) {
6859
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6860
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6861
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6862
0
                         << ", ret=" << ret;
6863
0
            return -1;
6864
0
        }
6865
1
        metrics_context.total_recycled_num = ++num_recycled;
6866
1
        metrics_context.report();
6867
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6868
1
        stage_keys.push_back(k);
6869
1
        return 0;
6870
1
    };
6871
6872
11
    auto loop_done = [&stage_keys, this]() -> int {
6873
1
        if (stage_keys.empty()) return 0;
6874
1
        DORIS_CLOUD_DEFER {
6875
1
            stage_keys.clear();
6876
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6874
1
        DORIS_CLOUD_DEFER {
6875
1
            stage_keys.clear();
6876
1
        };
6877
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6878
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6879
0
            return -1;
6880
0
        }
6881
1
        return 0;
6882
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
6872
1
    auto loop_done = [&stage_keys, this]() -> int {
6873
1
        if (stage_keys.empty()) return 0;
6874
1
        DORIS_CLOUD_DEFER {
6875
1
            stage_keys.clear();
6876
1
        };
6877
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6878
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6879
0
            return -1;
6880
0
        }
6881
1
        return 0;
6882
1
    };
6883
11
    if (config::enable_recycler_stats_metrics) {
6884
0
        scan_and_statistics_stage();
6885
0
    }
6886
    // recycle_func and loop_done for scan and recycle
6887
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
6888
11
}
6889
6890
10
int InstanceRecycler::recycle_expired_stage_objects() {
6891
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
6892
6893
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6894
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6895
6896
10
    DORIS_CLOUD_DEFER {
6897
10
        int64_t cost =
6898
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6899
10
        metrics_context.finish_report();
6900
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6901
10
                .tag("instance_id", instance_id_);
6902
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
6896
10
    DORIS_CLOUD_DEFER {
6897
10
        int64_t cost =
6898
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6899
10
        metrics_context.finish_report();
6900
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6901
10
                .tag("instance_id", instance_id_);
6902
10
    };
6903
6904
10
    int ret = 0;
6905
6906
10
    if (config::enable_recycler_stats_metrics) {
6907
0
        scan_and_statistics_expired_stage_objects();
6908
0
    }
6909
6910
10
    for (const auto& stage : instance_info_.stages()) {
6911
0
        std::stringstream ss;
6912
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
6913
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
6914
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
6915
0
           << ", prefix=" << stage.obj_info().prefix();
6916
6917
0
        if (stopped()) {
6918
0
            break;
6919
0
        }
6920
0
        if (stage.type() == StagePB::EXTERNAL) {
6921
0
            continue;
6922
0
        }
6923
0
        int idx = stoi(stage.obj_info().id());
6924
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6925
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
6926
0
            continue;
6927
0
        }
6928
6929
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6930
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6931
0
        if (!s3_conf) {
6932
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
6933
0
            continue;
6934
0
        }
6935
6936
0
        s3_conf->prefix = stage.obj_info().prefix();
6937
0
        std::shared_ptr<S3Accessor> accessor;
6938
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
6939
0
        if (ret1 != 0) {
6940
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
6941
0
            ret = -1;
6942
0
            continue;
6943
0
        }
6944
6945
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6946
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
6947
0
            ret = -1;
6948
0
            continue;
6949
0
        }
6950
6951
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
6952
0
        int64_t expiration_time =
6953
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
6954
0
                config::internal_stage_objects_expire_time_second;
6955
0
        if (config::force_immediate_recycle) {
6956
0
            expiration_time = INT64_MAX;
6957
0
        }
6958
0
        ret1 = accessor->delete_all(expiration_time);
6959
0
        if (ret1 != 0) {
6960
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
6961
0
                         << ss.str();
6962
0
            ret = -1;
6963
0
            continue;
6964
0
        }
6965
0
        metrics_context.total_recycled_num++;
6966
0
        metrics_context.report();
6967
0
    }
6968
10
    return ret;
6969
10
}
6970
6971
193
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
6972
193
    std::lock_guard lock(recycle_tasks_mutex);
6973
193
    running_recycle_tasks[task_name] = start_time;
6974
193
}
6975
6976
193
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
6977
193
    std::lock_guard lock(recycle_tasks_mutex);
6978
193
    DCHECK(running_recycle_tasks[task_name] > 0);
6979
193
    running_recycle_tasks.erase(task_name);
6980
193
}
6981
6982
21
bool InstanceRecycler::check_recycle_tasks() {
6983
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
6984
21
    {
6985
21
        std::lock_guard lock(recycle_tasks_mutex);
6986
21
        tmp_running_recycle_tasks = running_recycle_tasks;
6987
21
    }
6988
6989
21
    bool found = false;
6990
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6991
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
6992
20
        int64_t cost = now - start_time;
6993
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
6994
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
6995
20
                    .tag("instance_id", instance_id_)
6996
20
                    .tag("task", task_name);
6997
20
            found = true;
6998
20
        }
6999
20
    }
7000
7001
21
    return found;
7002
21
}
7003
7004
// Scan and statistics indexes that need to be recycled
7005
0
int InstanceRecycler::scan_and_statistics_indexes() {
7006
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
7007
7008
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
7009
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
7010
0
    std::string index_key0;
7011
0
    std::string index_key1;
7012
0
    recycle_index_key(index_key_info0, &index_key0);
7013
0
    recycle_index_key(index_key_info1, &index_key1);
7014
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7015
7016
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
7017
0
        RecycleIndexPB index_pb;
7018
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
7019
0
            return 0;
7020
0
        }
7021
0
        int64_t current_time = ::time(nullptr);
7022
0
        if (current_time <
7023
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
7024
0
            return 0;
7025
0
        }
7026
        // decode index_id
7027
0
        auto k1 = k;
7028
0
        k1.remove_prefix(1);
7029
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7030
0
        decode_key(&k1, &out);
7031
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
7032
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
7033
0
        std::unique_ptr<Transaction> txn;
7034
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7035
0
        if (err != TxnErrorCode::TXN_OK) {
7036
0
            return 0;
7037
0
        }
7038
0
        std::string val;
7039
0
        err = txn->get(k, &val);
7040
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7041
0
            return 0;
7042
0
        }
7043
0
        if (err != TxnErrorCode::TXN_OK) {
7044
0
            return 0;
7045
0
        }
7046
0
        index_pb.Clear();
7047
0
        if (!index_pb.ParseFromString(val)) {
7048
0
            return 0;
7049
0
        }
7050
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
7051
0
            return 0;
7052
0
        }
7053
0
        metrics_context.total_need_recycle_num++;
7054
0
        return 0;
7055
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7056
7057
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
7058
0
    metrics_context.report(true);
7059
0
    segment_metrics_context_.report(true);
7060
0
    tablet_metrics_context_.report(true);
7061
0
    return ret;
7062
0
}
7063
7064
// Scan and statistics partitions that need to be recycled
7065
0
int InstanceRecycler::scan_and_statistics_partitions() {
7066
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
7067
7068
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
7069
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
7070
0
    std::string part_key0;
7071
0
    std::string part_key1;
7072
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7073
7074
0
    recycle_partition_key(part_key_info0, &part_key0);
7075
0
    recycle_partition_key(part_key_info1, &part_key1);
7076
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
7077
0
        RecyclePartitionPB part_pb;
7078
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
7079
0
            return 0;
7080
0
        }
7081
0
        int64_t current_time = ::time(nullptr);
7082
0
        if (current_time <
7083
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
7084
0
            return 0;
7085
0
        }
7086
        // decode partition_id
7087
0
        auto k1 = k;
7088
0
        k1.remove_prefix(1);
7089
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7090
0
        decode_key(&k1, &out);
7091
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
7092
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
7093
        // Change state to RECYCLING
7094
0
        std::unique_ptr<Transaction> txn;
7095
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7096
0
        if (err != TxnErrorCode::TXN_OK) {
7097
0
            return 0;
7098
0
        }
7099
0
        std::string val;
7100
0
        err = txn->get(k, &val);
7101
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7102
0
            return 0;
7103
0
        }
7104
0
        if (err != TxnErrorCode::TXN_OK) {
7105
0
            return 0;
7106
0
        }
7107
0
        part_pb.Clear();
7108
0
        if (!part_pb.ParseFromString(val)) {
7109
0
            return 0;
7110
0
        }
7111
        // Partitions with PREPARED state MUST have no data
7112
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
7113
0
        int ret = 0;
7114
0
        for (int64_t index_id : part_pb.index_id()) {
7115
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
7116
0
                                            partition_id, is_empty_tablet) != 0) {
7117
0
                ret = 0;
7118
0
            }
7119
0
        }
7120
0
        metrics_context.total_need_recycle_num++;
7121
0
        return ret;
7122
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7123
7124
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
7125
0
    metrics_context.report(true);
7126
0
    segment_metrics_context_.report(true);
7127
0
    tablet_metrics_context_.report(true);
7128
0
    return ret;
7129
0
}
7130
7131
// Scan and statistics rowsets that need to be recycled
7132
0
int InstanceRecycler::scan_and_statistics_rowsets() {
7133
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
7134
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
7135
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
7136
0
    std::string recyc_rs_key0;
7137
0
    std::string recyc_rs_key1;
7138
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
7139
0
                recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
7140
0
       int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7141
7142
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
7143
0
        RecycleRowsetPB rowset;
7144
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
7145
0
            return 0;
7146
0
        }
7147
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
7148
0
        int64_t current_time = ::time(nullptr);
7149
0
        if (current_time <
7150
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
7151
0
            return 0;
7152
0
        }
7153
7154
0
        if (!rowset.has_type()) {
7155
0
            if (!rowset.has_resource_id()) [[unlikely]] {
7156
0
                return 0;
7157
0
            }
7158
0
            if (rowset.resource_id().empty()) [[unlikely]] {
7159
0
                return 0;
7160
0
            }
7161
0
            metrics_context.total_need_recycle_num++;
7162
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
7163
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
7164
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
7165
0
            return 0;
7166
0
        }
7167
7168
0
        if(!rowset_meta->has_is_recycled() || !rowset_meta->is_recycled()) {
7169
0
            return 0;
7170
0
        }
7171
7172
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
7173
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
7174
0
                return 0;
7175
0
            }
7176
0
        }
7177
0
        metrics_context.total_need_recycle_num++;
7178
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
7179
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
7180
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
7181
0
        return 0;
7182
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7183
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
7184
0
    metrics_context.report(true);
7185
0
    segment_metrics_context_.report(true);
7186
0
    return ret;
7187
0
}
7188
7189
// Scan and statistics tmp_rowsets that need to be recycled
7190
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
7191
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
7192
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
7193
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
7194
0
    std::string tmp_rs_key0;
7195
0
    std::string tmp_rs_key1;
7196
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
7197
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
7198
7199
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7200
7201
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
7202
0
        doris::RowsetMetaCloudPB rowset;
7203
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
7204
0
            return 0;
7205
0
        }
7206
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
7207
0
        int64_t current_time = ::time(nullptr);
7208
0
        if (current_time < expiration) {
7209
0
            return 0;
7210
0
        }
7211
7212
0
        DCHECK_GT(rowset.txn_id(), 0)
7213
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
7214
7215
0
        if(!rowset.has_is_recycled() || !rowset.is_recycled()) {
7216
0
            return 0;
7217
0
        }
7218
7219
0
        if (!rowset.has_resource_id()) {
7220
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
7221
0
                return 0;
7222
0
            }
7223
0
            return 0;
7224
0
        }
7225
7226
0
        metrics_context.total_need_recycle_num++;
7227
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
7228
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
7229
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
7230
0
        return 0;
7231
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7232
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
7233
0
    metrics_context.report(true);
7234
0
    segment_metrics_context_.report(true);
7235
0
    return ret;
7236
0
}
7237
7238
// Scan and statistics abort_timeout_txn that need to be recycled
7239
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
7240
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
7241
7242
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
7243
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7244
0
    std::string begin_txn_running_key;
7245
0
    std::string end_txn_running_key;
7246
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
7247
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
7248
7249
0
    int64_t current_time =
7250
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7251
7252
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
7253
0
                                               std::string_view k, std::string_view v) -> int {
7254
0
        std::unique_ptr<Transaction> txn;
7255
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7256
0
        if (err != TxnErrorCode::TXN_OK) {
7257
0
            return 0;
7258
0
        }
7259
0
        std::string_view k1 = k;
7260
0
        k1.remove_prefix(1);
7261
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7262
0
        if (decode_key(&k1, &out) != 0) {
7263
0
            return 0;
7264
0
        }
7265
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
7266
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
7267
        // Update txn_info
7268
0
        std::string txn_inf_key, txn_inf_val;
7269
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
7270
0
        err = txn->get(txn_inf_key, &txn_inf_val);
7271
0
        if (err != TxnErrorCode::TXN_OK) {
7272
0
            return 0;
7273
0
        }
7274
0
        TxnInfoPB txn_info;
7275
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
7276
0
            return 0;
7277
0
        }
7278
7279
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
7280
0
            TxnRunningPB txn_running_pb;
7281
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
7282
0
                return 0;
7283
0
            }
7284
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
7285
0
                return 0;
7286
0
            }
7287
0
            metrics_context.total_need_recycle_num++;
7288
0
        }
7289
0
        return 0;
7290
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7291
7292
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
7293
0
    metrics_context.report(true);
7294
0
    return ret;
7295
0
}
7296
7297
// Scan and statistics expired_txn_label that need to be recycled
7298
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
7299
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
7300
7301
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
7302
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7303
0
    std::string begin_recycle_txn_key;
7304
0
    std::string end_recycle_txn_key;
7305
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
7306
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
7307
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7308
0
    int64_t current_time_ms =
7309
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7310
7311
    // for calculate the total num or bytes of recyled objects
7312
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
7313
0
        RecycleTxnPB recycle_txn_pb;
7314
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
7315
0
            return 0;
7316
0
        }
7317
0
        if ((config::force_immediate_recycle) ||
7318
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
7319
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
7320
0
             current_time_ms)) {
7321
0
            metrics_context.total_need_recycle_num++;
7322
0
        }
7323
0
        return 0;
7324
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7325
7326
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
7327
0
    metrics_context.report(true);
7328
0
    return ret;
7329
0
}
7330
7331
// Scan and statistics copy_jobs that need to be recycled
7332
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
7333
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
7334
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
7335
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
7336
0
    std::string key0;
7337
0
    std::string key1;
7338
0
    copy_job_key(key_info0, &key0);
7339
0
    copy_job_key(key_info1, &key1);
7340
7341
    // for calculate the total num or bytes of recyled objects
7342
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
7343
0
        CopyJobPB copy_job;
7344
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
7345
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
7346
0
            return 0;
7347
0
        }
7348
7349
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
7350
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
7351
0
                int64_t current_time =
7352
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7353
0
                if (copy_job.finish_time_ms() > 0) {
7354
0
                    if (!config::force_immediate_recycle &&
7355
0
                        current_time < copy_job.finish_time_ms() +
7356
0
                                               config::copy_job_max_retention_second * 1000) {
7357
0
                        return 0;
7358
0
                    }
7359
0
                } else {
7360
0
                    if (!config::force_immediate_recycle &&
7361
0
                        current_time < copy_job.start_time_ms() +
7362
0
                                               config::copy_job_max_retention_second * 1000) {
7363
0
                        return 0;
7364
0
                    }
7365
0
                }
7366
0
            }
7367
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
7368
0
            int64_t current_time =
7369
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7370
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
7371
0
                return 0;
7372
0
            }
7373
0
        }
7374
0
        metrics_context.total_need_recycle_num++;
7375
0
        return 0;
7376
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7377
7378
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7379
0
    metrics_context.report(true);
7380
0
    return ret;
7381
0
}
7382
7383
// Scan and statistics stage that need to be recycled
7384
0
int InstanceRecycler::scan_and_statistics_stage() {
7385
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
7386
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
7387
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
7388
0
    std::string key0 = recycle_stage_key(key_info0);
7389
0
    std::string key1 = recycle_stage_key(key_info1);
7390
7391
    // for calculate the total num or bytes of recyled objects
7392
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
7393
0
                                                        std::string_view v) -> int {
7394
0
        RecycleStagePB recycle_stage;
7395
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
7396
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
7397
0
            return 0;
7398
0
        }
7399
7400
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
7401
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7402
0
            LOG(WARNING) << "invalid idx: " << idx;
7403
0
            return 0;
7404
0
        }
7405
7406
0
        std::shared_ptr<StorageVaultAccessor> accessor;
7407
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
7408
0
                [&] {
7409
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
7410
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7411
0
                    if (!s3_conf) {
7412
0
                        return 0;
7413
0
                    }
7414
7415
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
7416
0
                    std::shared_ptr<S3Accessor> s3_accessor;
7417
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
7418
0
                    if (ret != 0) {
7419
0
                        return 0;
7420
0
                    }
7421
7422
0
                    accessor = std::move(s3_accessor);
7423
0
                    return 0;
7424
0
                }(),
7425
0
                "recycle_stage:get_accessor", &accessor);
7426
7427
0
        if (ret != 0) {
7428
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
7429
0
            return 0;
7430
0
        }
7431
7432
0
        metrics_context.total_need_recycle_num++;
7433
0
        return 0;
7434
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7435
7436
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7437
0
    metrics_context.report(true);
7438
0
    return ret;
7439
0
}
7440
7441
// Scan and statistics expired_stage_objects that need to be recycled
7442
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
7443
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
7444
7445
    // for calculate the total num or bytes of recyled objects
7446
0
    auto scan_and_statistics = [&metrics_context, this]() {
7447
0
        for (const auto& stage : instance_info_.stages()) {
7448
0
            if (stopped()) {
7449
0
                break;
7450
0
            }
7451
0
            if (stage.type() == StagePB::EXTERNAL) {
7452
0
                continue;
7453
0
            }
7454
0
            int idx = stoi(stage.obj_info().id());
7455
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
7456
0
                continue;
7457
0
            }
7458
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
7459
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7460
0
            if (!s3_conf) {
7461
0
                continue;
7462
0
            }
7463
0
            s3_conf->prefix = stage.obj_info().prefix();
7464
0
            std::shared_ptr<S3Accessor> accessor;
7465
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
7466
0
            if (ret1 != 0) {
7467
0
                continue;
7468
0
            }
7469
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
7470
0
                continue;
7471
0
            }
7472
0
            metrics_context.total_need_recycle_num++;
7473
0
        }
7474
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
7475
7476
0
    scan_and_statistics();
7477
0
    metrics_context.report(true);
7478
0
    return 0;
7479
0
}
7480
7481
// Scan and statistics versions that need to be recycled
7482
0
int InstanceRecycler::scan_and_statistics_versions() {
7483
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
7484
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
7485
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
7486
7487
0
    int64_t last_scanned_table_id = 0;
7488
0
    bool is_recycled = false; // Is last scanned kv recycled
7489
    // for calculate the total num or bytes of recyled objects
7490
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
7491
0
                                       std::string_view k, std::string_view) {
7492
0
        auto k1 = k;
7493
0
        k1.remove_prefix(1);
7494
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
7495
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7496
0
        decode_key(&k1, &out);
7497
0
        DCHECK_EQ(out.size(), 6) << k;
7498
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
7499
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
7500
0
            metrics_context.total_need_recycle_num +=
7501
0
                    is_recycled; // Version kv of this table has been recycled
7502
0
            return 0;
7503
0
        }
7504
0
        last_scanned_table_id = table_id;
7505
0
        is_recycled = false;
7506
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
7507
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
7508
0
        std::unique_ptr<Transaction> txn;
7509
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7510
0
        if (err != TxnErrorCode::TXN_OK) {
7511
0
            return 0;
7512
0
        }
7513
0
        std::unique_ptr<RangeGetIterator> iter;
7514
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
7515
0
        if (err != TxnErrorCode::TXN_OK) {
7516
0
            return 0;
7517
0
        }
7518
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
7519
0
            return 0;
7520
0
        }
7521
0
        metrics_context.total_need_recycle_num++;
7522
0
        is_recycled = true;
7523
0
        return 0;
7524
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7525
7526
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
7527
0
    metrics_context.report(true);
7528
0
    return ret;
7529
0
}
7530
7531
// Scan and statistics restore jobs that need to be recycled
7532
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
7533
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
7534
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
7535
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
7536
0
    std::string restore_job_key0;
7537
0
    std::string restore_job_key1;
7538
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
7539
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
7540
7541
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7542
7543
    // for calculate the total num or bytes of recyled objects
7544
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
7545
0
        RestoreJobCloudPB restore_job_pb;
7546
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
7547
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
7548
0
            return 0;
7549
0
        }
7550
0
        int64_t expiration =
7551
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
7552
0
        int64_t current_time = ::time(nullptr);
7553
0
        if (current_time < expiration) { // not expired
7554
0
            return 0;
7555
0
        }
7556
0
        metrics_context.total_need_recycle_num++;
7557
0
        if(restore_job_pb.need_recycle_data()) {
7558
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
7559
0
        }
7560
0
        return 0;
7561
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7562
7563
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
7564
0
    metrics_context.report(true);
7565
0
    return ret;
7566
0
}
7567
7568
3
void InstanceRecycler::scan_and_statistics_operation_logs() {
7569
3
    if (!should_recycle_versioned_keys()) {
7570
0
        return;
7571
0
    }
7572
7573
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_operation_logs");
7574
7575
3
    OperationLogRecycleChecker recycle_checker(instance_id_, txn_kv_.get(), instance_info_);
7576
3
    if (recycle_checker.init() != 0) {
7577
0
        return;
7578
0
    }
7579
7580
3
    std::string log_key_prefix = versioned::log_key(instance_id_);
7581
3
    std::string begin_key = encode_versioned_key(log_key_prefix, Versionstamp::min());
7582
3
    std::string end_key = encode_versioned_key(log_key_prefix, Versionstamp::max());
7583
7584
3
    std::unique_ptr<BlobIterator> iter = blob_get_range(txn_kv_, begin_key, end_key);
7585
8
    for (; iter->valid(); iter->next()) {
7586
5
        OperationLogPB operation_log;
7587
5
        if (!iter->parse_value(&operation_log)) {
7588
0
            continue;
7589
0
        }
7590
7591
5
        std::string_view key = iter->key();
7592
5
        Versionstamp log_versionstamp;
7593
5
        if (!decode_versioned_key(&key, &log_versionstamp)) {
7594
0
            continue;
7595
0
        }
7596
7597
5
        OperationLogReferenceInfo ref_info;
7598
5
        if (recycle_checker.can_recycle(log_versionstamp, operation_log.min_timestamp(),
7599
5
                                         &ref_info)) {
7600
4
            metrics_context.total_need_recycle_num++;
7601
4
            metrics_context.total_need_recycle_data_size += operation_log.ByteSizeLong();
7602
4
        }
7603
5
    }
7604
7605
3
    metrics_context.report(true);
7606
3
}
7607
7608
int InstanceRecycler::classify_rowset_task_by_ref_count(
7609
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
7610
60
    constexpr int MAX_RETRY = 10;
7611
60
    const auto& rowset_meta = task.rowset_meta;
7612
60
    int64_t tablet_id = rowset_meta.tablet_id();
7613
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
7614
60
    std::string_view reference_instance_id = instance_id_;
7615
60
    if (rowset_meta.has_reference_instance_id()) {
7616
5
        reference_instance_id = rowset_meta.reference_instance_id();
7617
5
    }
7618
7619
61
    for (int i = 0; i < MAX_RETRY; ++i) {
7620
61
        std::unique_ptr<Transaction> txn;
7621
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7622
61
        if (err != TxnErrorCode::TXN_OK) {
7623
0
            LOG_WARNING("failed to create txn when classifying rowset task")
7624
0
                    .tag("instance_id", instance_id_)
7625
0
                    .tag("tablet_id", tablet_id)
7626
0
                    .tag("rowset_id", rowset_id)
7627
0
                    .tag("err", err);
7628
0
            return -1;
7629
0
        }
7630
7631
61
        std::string rowset_ref_count_key =
7632
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
7633
61
        task.rowset_ref_count_key = rowset_ref_count_key;
7634
7635
61
        int64_t ref_count = 0;
7636
61
        {
7637
61
            std::string value;
7638
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
7639
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7640
0
                ref_count = 1;
7641
61
            } else if (err != TxnErrorCode::TXN_OK) {
7642
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
7643
0
                        .tag("instance_id", instance_id_)
7644
0
                        .tag("tablet_id", tablet_id)
7645
0
                        .tag("rowset_id", rowset_id)
7646
0
                        .tag("err", err);
7647
0
                return -1;
7648
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
7649
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
7650
0
                        .tag("instance_id", instance_id_)
7651
0
                        .tag("tablet_id", tablet_id)
7652
0
                        .tag("rowset_id", rowset_id)
7653
0
                        .tag("value", hex(value));
7654
0
                return -1;
7655
0
            }
7656
61
        }
7657
7658
61
        if (ref_count > 1) {
7659
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
7660
12
            txn->atomic_add(rowset_ref_count_key, -1);
7661
12
            LOG_INFO("decrease rowset data ref count in classification phase")
7662
12
                    .tag("instance_id", instance_id_)
7663
12
                    .tag("tablet_id", tablet_id)
7664
12
                    .tag("rowset_id", rowset_id)
7665
12
                    .tag("ref_count", ref_count - 1)
7666
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
7667
7668
12
            if (!task.recycle_rowset_key.empty()) {
7669
0
                txn->remove(task.recycle_rowset_key);
7670
0
                LOG_INFO("remove recycle rowset key in classification phase")
7671
0
                        .tag("key", hex(task.recycle_rowset_key));
7672
0
            }
7673
12
            if (!task.non_versioned_rowset_key.empty()) {
7674
12
                txn->remove(task.non_versioned_rowset_key);
7675
12
                LOG_INFO("remove non versioned rowset key in classification phase")
7676
12
                        .tag("key", hex(task.non_versioned_rowset_key));
7677
12
            }
7678
7679
12
            err = txn->commit();
7680
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
7681
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
7682
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
7683
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
7684
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
7685
1
                continue;
7686
11
            } else if (err != TxnErrorCode::TXN_OK) {
7687
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
7688
0
                        .tag("instance_id", instance_id_)
7689
0
                        .tag("tablet_id", tablet_id)
7690
0
                        .tag("rowset_id", rowset_id)
7691
0
                        .tag("err", err);
7692
0
                return -1;
7693
0
            }
7694
11
            return 1; // handled, not added to batch delete
7695
49
        } else {
7696
            // ref_count == 1: Add to batch delete plan without modifying any KV.
7697
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
7698
49
            LOG_INFO("add rowset to batch delete plan")
7699
49
                    .tag("instance_id", instance_id_)
7700
49
                    .tag("tablet_id", tablet_id)
7701
49
                    .tag("rowset_id", rowset_id)
7702
49
                    .tag("resource_id", rowset_meta.resource_id())
7703
49
                    .tag("ref_count", ref_count);
7704
7705
49
            batch_delete_tasks.push_back(std::move(task));
7706
49
            return 0; // added to batch delete
7707
49
        }
7708
61
    }
7709
7710
0
    LOG_WARNING("failed to classify rowset task after retry")
7711
0
            .tag("instance_id", instance_id_)
7712
0
            .tag("tablet_id", tablet_id)
7713
0
            .tag("rowset_id", rowset_id)
7714
0
            .tag("retry", MAX_RETRY);
7715
0
    return -1;
7716
60
}
7717
7718
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
7719
10
    int ret = 0;
7720
49
    for (const auto& task : tasks) {
7721
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
7722
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
7723
7724
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
7725
        // so we don't need to call it again here.
7726
7727
        // Remove all metadata keys in one transaction
7728
49
        std::unique_ptr<Transaction> txn;
7729
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7730
49
        if (err != TxnErrorCode::TXN_OK) {
7731
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
7732
0
                    .tag("instance_id", instance_id_)
7733
0
                    .tag("tablet_id", tablet_id)
7734
0
                    .tag("rowset_id", rowset_id)
7735
0
                    .tag("err", err);
7736
0
            ret = -1;
7737
0
            continue;
7738
0
        }
7739
7740
49
        std::string_view reference_instance_id = instance_id_;
7741
49
        if (task.rowset_meta.has_reference_instance_id()) {
7742
5
            reference_instance_id = task.rowset_meta.reference_instance_id();
7743
5
        }
7744
7745
49
        txn->remove(task.rowset_ref_count_key);
7746
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
7747
49
                .tag("instance_id", instance_id_)
7748
49
                .tag("tablet_id", tablet_id)
7749
49
                .tag("rowset_id", rowset_id)
7750
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
7751
7752
49
        std::string dbm_start_key =
7753
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
7754
49
        std::string dbm_end_key = meta_delete_bitmap_key(
7755
49
                {reference_instance_id, tablet_id, rowset_id,
7756
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
7757
49
        txn->remove(dbm_start_key, dbm_end_key);
7758
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
7759
49
                .tag("instance_id", instance_id_)
7760
49
                .tag("tablet_id", tablet_id)
7761
49
                .tag("rowset_id", rowset_id)
7762
49
                .tag("begin", hex(dbm_start_key))
7763
49
                .tag("end", hex(dbm_end_key));
7764
7765
49
        std::string versioned_dbm_start_key =
7766
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
7767
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
7768
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
7769
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
7770
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
7771
49
                .tag("instance_id", instance_id_)
7772
49
                .tag("tablet_id", tablet_id)
7773
49
                .tag("rowset_id", rowset_id)
7774
49
                .tag("begin", hex(versioned_dbm_start_key))
7775
49
                .tag("end", hex(versioned_dbm_end_key));
7776
7777
        // Remove versioned meta rowset key
7778
49
        if (!task.versioned_rowset_key.empty()) {
7779
49
            versioned::document_remove<RowsetMetaCloudPB>(
7780
49
                txn.get(), task.versioned_rowset_key, task.versionstamp);
7781
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
7782
49
                    .tag("instance_id", instance_id_)
7783
49
                    .tag("tablet_id", tablet_id)
7784
49
                    .tag("rowset_id", rowset_id)
7785
49
                    .tag("key_prefix", hex(task.versioned_rowset_key));
7786
49
        }
7787
7788
49
        if (!task.non_versioned_rowset_key.empty()) {
7789
49
            txn->remove(task.non_versioned_rowset_key);
7790
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
7791
49
                    .tag("instance_id", instance_id_)
7792
49
                    .tag("tablet_id", tablet_id)
7793
49
                    .tag("rowset_id", rowset_id)
7794
49
                    .tag("key", hex(task.non_versioned_rowset_key));
7795
49
        }
7796
7797
        // Remove recycle_rowset_key last to ensure retry safety:
7798
        // if cleanup fails, this key remains and triggers next round retry.
7799
49
        if (!task.recycle_rowset_key.empty()) {
7800
0
            txn->remove(task.recycle_rowset_key);
7801
0
            LOG_INFO("remove recycle rowset key in cleanup phase")
7802
0
                    .tag("instance_id", instance_id_)
7803
0
                    .tag("tablet_id", tablet_id)
7804
0
                    .tag("rowset_id", rowset_id)
7805
0
                    .tag("key", hex(task.recycle_rowset_key));
7806
0
        }
7807
7808
49
        err = txn->commit();
7809
49
        if (err != TxnErrorCode::TXN_OK) {
7810
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
7811
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
7812
0
                    .tag("instance_id", instance_id_)
7813
0
                    .tag("tablet_id", tablet_id)
7814
0
                    .tag("rowset_id", rowset_id)
7815
0
                    .tag("err", err);
7816
0
            ret = -1;
7817
0
            continue;
7818
0
        }
7819
7820
49
        LOG_INFO("cleanup rowset metadata success")
7821
49
                .tag("instance_id", instance_id_)
7822
49
                .tag("tablet_id", tablet_id)
7823
49
                .tag("rowset_id", rowset_id);
7824
49
    }
7825
10
    return ret;
7826
10
}
7827
7828
} // namespace doris::cloud