Coverage Report

Created: 2026-03-11 11:45

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <random>
40
#include <string>
41
#include <string_view>
42
#include <thread>
43
#include <unordered_map>
44
#include <utility>
45
#include <variant>
46
47
#include "common/defer.h"
48
#include "common/stopwatch.h"
49
#include "meta-service/meta_service.h"
50
#include "meta-service/meta_service_helper.h"
51
#include "meta-service/meta_service_schema.h"
52
#include "meta-store/blob_message.h"
53
#include "meta-store/meta_reader.h"
54
#include "meta-store/txn_kv.h"
55
#include "meta-store/txn_kv_error.h"
56
#include "meta-store/versioned_value.h"
57
#include "recycler/checker.h"
58
#ifdef ENABLE_HDFS_STORAGE_VAULT
59
#include "recycler/hdfs_accessor.h"
60
#endif
61
#include "recycler/s3_accessor.h"
62
#include "recycler/storage_vault_accessor.h"
63
#ifdef UNIT_TEST
64
#include "../test/mock_accessor.h"
65
#endif
66
#include "common/bvars.h"
67
#include "common/config.h"
68
#include "common/encryption_util.h"
69
#include "common/logging.h"
70
#include "common/simple_thread_pool.h"
71
#include "common/util.h"
72
#include "cpp/sync_point.h"
73
#include "meta-store/codec.h"
74
#include "meta-store/document_message.h"
75
#include "meta-store/keys.h"
76
#include "recycler/recycler_service.h"
77
#include "recycler/sync_executor.h"
78
#include "recycler/util.h"
79
80
namespace doris::cloud {
81
82
using namespace std::chrono;
83
84
namespace {
85
86
0
int64_t packed_file_retry_sleep_ms() {
87
0
    const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms);
88
0
    const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms);
89
0
    thread_local std::mt19937_64 gen(std::random_device {}());
90
0
    std::uniform_int_distribution<int64_t> dist(min_ms, max_ms);
91
0
    return dist(gen);
92
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
93
94
0
void sleep_for_packed_file_retry() {
95
0
    std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms()));
96
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
97
98
} // namespace
99
100
// return 0 for success get a key, 1 for key not found, negative for error
101
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
102
0
    std::unique_ptr<Transaction> txn;
103
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
104
0
    if (err != TxnErrorCode::TXN_OK) {
105
0
        return -1;
106
0
    }
107
0
    switch (txn->get(key, &val, true)) {
108
0
    case TxnErrorCode::TXN_OK:
109
0
        return 0;
110
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
111
0
        return 1;
112
0
    default:
113
0
        return -1;
114
0
    };
115
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
116
117
// 0 for success, negative for error
118
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
119
337
                   std::unique_ptr<RangeGetIterator>& it) {
120
337
    std::unique_ptr<Transaction> txn;
121
337
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
337
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
337
    switch (txn->get(begin, end, &it, true)) {
126
337
    case TxnErrorCode::TXN_OK:
127
337
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
337
    };
133
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
119
31
                   std::unique_ptr<RangeGetIterator>& it) {
120
31
    std::unique_ptr<Transaction> txn;
121
31
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
31
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
31
    switch (txn->get(begin, end, &it, true)) {
126
31
    case TxnErrorCode::TXN_OK:
127
31
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
31
    };
133
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
119
306
                   std::unique_ptr<RangeGetIterator>& it) {
120
306
    std::unique_ptr<Transaction> txn;
121
306
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
306
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
306
    switch (txn->get(begin, end, &it, true)) {
126
306
    case TxnErrorCode::TXN_OK:
127
306
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
306
    };
133
0
}
134
135
// return 0 for success otherwise error
136
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
6
    std::unique_ptr<Transaction> txn;
138
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
6
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
10
    for (auto k : keys) {
143
10
        txn->remove(k);
144
10
    }
145
6
    switch (txn->commit()) {
146
6
    case TxnErrorCode::TXN_OK:
147
6
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
6
    }
153
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
136
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
1
    std::unique_ptr<Transaction> txn;
138
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
1
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
1
    for (auto k : keys) {
143
1
        txn->remove(k);
144
1
    }
145
1
    switch (txn->commit()) {
146
1
    case TxnErrorCode::TXN_OK:
147
1
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
1
    }
153
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
136
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
5
    std::unique_ptr<Transaction> txn;
138
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
5
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
9
    for (auto k : keys) {
143
9
        txn->remove(k);
144
9
    }
145
5
    switch (txn->commit()) {
146
5
    case TxnErrorCode::TXN_OK:
147
5
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
5
    }
153
5
}
154
155
// return 0 for success otherwise error
156
125
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
125
    std::unique_ptr<Transaction> txn;
158
125
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
125
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
106k
    for (auto& k : keys) {
163
106k
        txn->remove(k);
164
106k
    }
165
125
    switch (txn->commit()) {
166
125
    case TxnErrorCode::TXN_OK:
167
125
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
125
    }
173
125
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
156
33
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
33
    std::unique_ptr<Transaction> txn;
158
33
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
33
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
33
    for (auto& k : keys) {
163
16
        txn->remove(k);
164
16
    }
165
33
    switch (txn->commit()) {
166
33
    case TxnErrorCode::TXN_OK:
167
33
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
33
    }
173
33
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
156
92
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
92
    std::unique_ptr<Transaction> txn;
158
92
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
92
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
106k
    for (auto& k : keys) {
163
106k
        txn->remove(k);
164
106k
    }
165
92
    switch (txn->commit()) {
166
92
    case TxnErrorCode::TXN_OK:
167
92
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
92
    }
173
92
}
174
175
// return 0 for success otherwise error
176
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
177
106k
                                       std::string_view end) {
178
106k
    std::unique_ptr<Transaction> txn;
179
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
106k
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
106k
    txn->remove(begin, end);
184
106k
    switch (txn->commit()) {
185
106k
    case TxnErrorCode::TXN_OK:
186
106k
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
106k
    }
192
106k
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
177
16
                                       std::string_view end) {
178
16
    std::unique_ptr<Transaction> txn;
179
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
16
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
16
    txn->remove(begin, end);
184
16
    switch (txn->commit()) {
185
16
    case TxnErrorCode::TXN_OK:
186
16
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
16
    }
192
16
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
177
106k
                                       std::string_view end) {
178
106k
    std::unique_ptr<Transaction> txn;
179
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
106k
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
106k
    txn->remove(begin, end);
184
106k
    switch (txn->commit()) {
185
106k
    case TxnErrorCode::TXN_OK:
186
106k
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
106k
    }
192
106k
}
193
194
void scan_restore_job_rowset(
195
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
196
        std::string& msg,
197
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
198
199
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
200
                                      int64_t num_scanned, int64_t num_recycled,
201
52
                                      int64_t start_time) {
202
52
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
52
    return;
214
52
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
201
2
                                      int64_t start_time) {
202
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
2
    return;
214
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
201
50
                                      int64_t start_time) {
202
50
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
50
    return;
214
50
}
215
216
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
217
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
218
219
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
220
4
                                                               "s3_producer_pool");
221
4
    s3_producer_pool->start();
222
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
223
4
                                                                  "recycle_tablet_pool");
224
4
    recycle_tablet_pool->start();
225
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
226
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
227
4
    group_recycle_function_pool->start();
228
4
    _thread_pool_group =
229
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
230
4
                                    std::move(group_recycle_function_pool));
231
232
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
233
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
234
4
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
235
4
}
236
237
4
Recycler::~Recycler() {
238
4
    if (!stopped()) {
239
0
        stop();
240
0
    }
241
4
}
242
243
4
void Recycler::instance_scanner_callback() {
244
    // sleep 60 seconds before scheduling for the launch procedure to complete:
245
    // some bad hdfs connection may cause some log to stdout stderr
246
    // which may pollute .out file and affect the script to check success
247
4
    std::this_thread::sleep_for(
248
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
249
8
    while (!stopped()) {
250
4
        std::vector<InstanceInfoPB> instances;
251
4
        get_all_instances(txn_kv_.get(), instances);
252
        // TODO(plat1ko): delete job recycle kv of non-existent instances
253
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
254
4
            std::stringstream ss;
255
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
256
4
            return ss.str();
257
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
253
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
254
4
            std::stringstream ss;
255
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
256
4
            return ss.str();
257
4
        }();
258
4
        if (!instances.empty()) {
259
            // enqueue instances
260
3
            std::lock_guard lock(mtx_);
261
30
            for (auto& instance : instances) {
262
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
263
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
264
                // skip instance already in pending queue
265
30
                if (success) {
266
30
                    pending_instance_queue_.push_back(std::move(instance));
267
30
                }
268
30
            }
269
3
            pending_instance_cond_.notify_all();
270
3
        }
271
4
        {
272
4
            std::unique_lock lock(mtx_);
273
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
274
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
274
7
                               [&]() { return stopped(); });
275
4
        }
276
4
    }
277
4
}
278
279
8
void Recycler::recycle_callback() {
280
38
    while (!stopped()) {
281
37
        InstanceInfoPB instance;
282
37
        {
283
37
            std::unique_lock lock(mtx_);
284
37
            pending_instance_cond_.wait(
285
50
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
285
50
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
286
37
            if (stopped()) {
287
7
                return;
288
7
            }
289
30
            instance = std::move(pending_instance_queue_.front());
290
30
            pending_instance_queue_.pop_front();
291
30
            pending_instance_set_.erase(instance.instance_id());
292
30
        }
293
0
        auto& instance_id = instance.instance_id();
294
30
        {
295
30
            std::lock_guard lock(mtx_);
296
            // skip instance in recycling
297
30
            if (recycling_instance_map_.count(instance_id)) continue;
298
30
        }
299
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
300
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
301
302
30
        if (int r = instance_recycler->init(); r != 0) {
303
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
304
0
                         << " ret=" << r;
305
0
            continue;
306
0
        }
307
30
        std::string recycle_job_key;
308
30
        job_recycle_key({instance_id}, &recycle_job_key);
309
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
310
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
311
30
        if (ret != 0) { // Prepare failed
312
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
313
20
                         << " ret=" << ret;
314
20
            continue;
315
20
        } else {
316
10
            std::lock_guard lock(mtx_);
317
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
318
10
        }
319
10
        if (stopped()) return;
320
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
321
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
322
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
323
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
324
10
        ret = instance_recycler->do_recycle();
325
        // If instance recycler has been aborted, don't finish this job
326
327
10
        if (!instance_recycler->stopped()) {
328
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
329
10
                                        ret == 0, ctime_ms);
330
10
        }
331
10
        if (instance_recycler->stopped() || ret != 0) {
332
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
333
0
        }
334
10
        {
335
10
            std::lock_guard lock(mtx_);
336
10
            recycling_instance_map_.erase(instance_id);
337
10
        }
338
339
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
340
10
        auto elpased_ms = now - ctime_ms;
341
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
342
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
343
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
344
10
                                             now + config::recycle_interval_seconds * 1000);
345
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
346
10
        LOG(INFO) << "recycle instance done, "
347
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
348
10
                  << " now: " << now;
349
350
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
351
352
10
        LOG_WARNING("finish recycle instance")
353
10
                .tag("instance_id", instance_id)
354
10
                .tag("cost_ms", elpased_ms);
355
10
    }
356
8
}
357
358
4
void Recycler::lease_recycle_jobs() {
359
54
    while (!stopped()) {
360
50
        std::vector<std::string> instances;
361
50
        instances.reserve(recycling_instance_map_.size());
362
50
        {
363
50
            std::lock_guard lock(mtx_);
364
50
            for (auto& [id, _] : recycling_instance_map_) {
365
30
                instances.push_back(id);
366
30
            }
367
50
        }
368
50
        for (auto& i : instances) {
369
30
            std::string recycle_job_key;
370
30
            job_recycle_key({i}, &recycle_job_key);
371
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
372
30
            if (ret == 1) {
373
0
                std::lock_guard lock(mtx_);
374
0
                if (auto it = recycling_instance_map_.find(i);
375
0
                    it != recycling_instance_map_.end()) {
376
0
                    it->second->stop();
377
0
                }
378
0
            }
379
30
        }
380
50
        {
381
50
            std::unique_lock lock(mtx_);
382
50
            notifier_.wait_for(lock,
383
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
384
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
384
100
                               [&]() { return stopped(); });
385
50
        }
386
50
    }
387
4
}
388
389
4
void Recycler::check_recycle_tasks() {
390
7
    while (!stopped()) {
391
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
392
3
        {
393
3
            std::lock_guard lock(mtx_);
394
3
            recycling_instance_map = recycling_instance_map_;
395
3
        }
396
3
        for (auto& entry : recycling_instance_map) {
397
0
            entry.second->check_recycle_tasks();
398
0
        }
399
400
3
        std::unique_lock lock(mtx_);
401
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
402
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
402
6
                           [&]() { return stopped(); });
403
3
    }
404
4
}
405
406
4
int Recycler::start(brpc::Server* server) {
407
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
408
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
409
4
    S3Environment::getInstance();
410
411
4
    if (config::enable_checker) {
412
0
        checker_ = std::make_unique<Checker>(txn_kv_);
413
0
        int ret = checker_->start();
414
0
        std::string msg;
415
0
        if (ret != 0) {
416
0
            msg = "failed to start checker";
417
0
            LOG(ERROR) << msg;
418
0
            std::cerr << msg << std::endl;
419
0
            return ret;
420
0
        }
421
0
        msg = "checker started";
422
0
        LOG(INFO) << msg;
423
0
        std::cout << msg << std::endl;
424
0
    }
425
426
4
    if (server) {
427
        // Add service
428
1
        auto recycler_service =
429
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
430
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
431
1
    }
432
433
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
433
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
434
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
435
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
435
8
        workers_.emplace_back([this] { recycle_callback(); });
436
8
    }
437
438
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
439
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
440
441
4
    if (config::enable_snapshot_data_migrator) {
442
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
443
0
        int ret = snapshot_data_migrator_->start();
444
0
        if (ret != 0) {
445
0
            LOG(ERROR) << "failed to start snapshot data migrator";
446
0
            return ret;
447
0
        }
448
0
        LOG(INFO) << "snapshot data migrator started";
449
0
    }
450
451
4
    if (config::enable_snapshot_chain_compactor) {
452
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
453
0
        int ret = snapshot_chain_compactor_->start();
454
0
        if (ret != 0) {
455
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
456
0
            return ret;
457
0
        }
458
0
        LOG(INFO) << "snapshot chain compactor started";
459
0
    }
460
461
4
    return 0;
462
4
}
463
464
4
void Recycler::stop() {
465
4
    stopped_ = true;
466
4
    notifier_.notify_all();
467
4
    pending_instance_cond_.notify_all();
468
4
    {
469
4
        std::lock_guard lock(mtx_);
470
4
        for (auto& [_, recycler] : recycling_instance_map_) {
471
0
            recycler->stop();
472
0
        }
473
4
    }
474
20
    for (auto& w : workers_) {
475
20
        if (w.joinable()) w.join();
476
20
    }
477
4
    if (checker_) {
478
0
        checker_->stop();
479
0
    }
480
4
    if (snapshot_data_migrator_) {
481
0
        snapshot_data_migrator_->stop();
482
0
    }
483
4
    if (snapshot_chain_compactor_) {
484
0
        snapshot_chain_compactor_->stop();
485
0
    }
486
4
}
487
488
class InstanceRecycler::InvertedIndexIdCache {
489
public:
490
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
491
131
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
492
493
    // Return 0 if success, 1 if schema kv not found, negative for error
494
    // For the same index_id, schema_version, res, since `get` is not completely atomic
495
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
496
    // resulting in repeated addition and inaccuracy.
497
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
498
    // repeated addition does not affect correctness.
499
28.4k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
500
28.4k
        {
501
28.4k
            std::lock_guard lock(mtx_);
502
28.4k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
503
3.88k
                return 0;
504
3.88k
            }
505
24.5k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
506
24.5k
                it != inverted_index_id_map_.end()) {
507
16.3k
                res = it->second;
508
16.3k
                return 0;
509
16.3k
            }
510
24.5k
        }
511
        // Get schema from kv
512
        // TODO(plat1ko): Single flight
513
8.12k
        std::unique_ptr<Transaction> txn;
514
8.12k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
515
8.12k
        if (err != TxnErrorCode::TXN_OK) {
516
0
            LOG(WARNING) << "failed to create txn, err=" << err;
517
0
            return -1;
518
0
        }
519
8.12k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
520
8.12k
        ValueBuf val_buf;
521
8.12k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
522
8.12k
        if (err != TxnErrorCode::TXN_OK) {
523
504
            LOG(WARNING) << "failed to get schema, err=" << err;
524
504
            return static_cast<int>(err);
525
504
        }
526
7.62k
        doris::TabletSchemaCloudPB schema;
527
7.62k
        if (!parse_schema_value(val_buf, &schema)) {
528
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
529
0
            return -1;
530
0
        }
531
7.62k
        if (schema.index_size() > 0) {
532
5.80k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
533
5.80k
            if (schema.has_inverted_index_storage_format()) {
534
5.80k
                index_format = schema.inverted_index_storage_format();
535
5.80k
            }
536
5.80k
            res.first = index_format;
537
5.80k
            res.second.reserve(schema.index_size());
538
13.6k
            for (auto& i : schema.index()) {
539
13.6k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
540
13.6k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
541
13.6k
                }
542
13.6k
            }
543
5.80k
        }
544
7.62k
        insert(index_id, schema_version, res);
545
7.62k
        return 0;
546
7.62k
    }
547
548
    // Empty `ids` means this schema has no inverted index
549
7.62k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
550
7.62k
        if (index_info.second.empty()) {
551
1.81k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
552
1.81k
            std::lock_guard lock(mtx_);
553
1.81k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
554
5.80k
        } else {
555
5.80k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
556
5.80k
            std::lock_guard lock(mtx_);
557
5.80k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
558
5.80k
        }
559
7.62k
    }
560
561
private:
562
    std::string instance_id_;
563
    std::shared_ptr<TxnKv> txn_kv_;
564
565
    std::mutex mtx_;
566
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
567
    struct HashOfKey {
568
60.5k
        size_t operator()(const Key& key) const {
569
60.5k
            size_t seed = 0;
570
60.5k
            seed = std::hash<int64_t> {}(key.first);
571
60.5k
            seed = std::hash<int32_t> {}(key.second);
572
60.5k
            return seed;
573
60.5k
        }
574
    };
575
    // <index_id, schema_version> -> inverted_index_ids
576
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
577
    // Store <index_id, schema_version> of schema which doesn't have inverted index
578
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
579
};
580
581
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
582
                                   RecyclerThreadPoolGroup thread_pool_group,
583
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
584
        : txn_kv_(std::move(txn_kv)),
585
          instance_id_(instance.instance_id()),
586
          instance_info_(instance),
587
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
588
          _thread_pool_group(std::move(thread_pool_group)),
589
          txn_lazy_committer_(std::move(txn_lazy_committer)),
590
          delete_bitmap_lock_white_list_(std::make_shared<DeleteBitmapLockWhiteList>()),
591
131
          resource_mgr_(std::make_shared<ResourceManager>(txn_kv_)) {
592
131
    delete_bitmap_lock_white_list_->init();
593
131
    resource_mgr_->init();
594
131
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
595
596
    // Since the recycler's resource manager could not be notified when instance info changes,
597
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
598
131
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
599
131
};
600
601
131
InstanceRecycler::~InstanceRecycler() = default;
602
603
115
int InstanceRecycler::init_obj_store_accessors() {
604
115
    for (const auto& obj_info : instance_info_.obj_info()) {
605
75
#ifdef UNIT_TEST
606
75
        auto accessor = std::make_shared<MockAccessor>();
607
#else
608
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
609
        if (!s3_conf) {
610
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
611
            return -1;
612
        }
613
614
        std::shared_ptr<S3Accessor> accessor;
615
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
616
        if (ret != 0) {
617
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
618
                         << " resource_id=" << obj_info.id();
619
            return ret;
620
        }
621
#endif
622
75
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
623
75
    }
624
625
115
    return 0;
626
115
}
627
628
115
int InstanceRecycler::init_storage_vault_accessors() {
629
115
    if (instance_info_.resource_ids().empty()) {
630
108
        return 0;
631
108
    }
632
633
7
    FullRangeGetOptions opts(txn_kv_);
634
7
    opts.prefetch = true;
635
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
636
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
637
638
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
639
18
        auto [k, v] = *kv;
640
18
        StorageVaultPB vault;
641
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
642
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
643
0
            return -1;
644
0
        }
645
18
        std::string recycler_storage_vault_white_list = accumulate(
646
18
                config::recycler_storage_vault_white_list.begin(),
647
18
                config::recycler_storage_vault_white_list.end(), std::string(),
648
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
648
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
649
18
        LOG_INFO("config::recycler_storage_vault_white_list")
650
18
                .tag("", recycler_storage_vault_white_list);
651
18
        if (!config::recycler_storage_vault_white_list.empty()) {
652
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
653
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
654
8
                it == config::recycler_storage_vault_white_list.end()) {
655
2
                LOG_WARNING(
656
2
                        "failed to init accessor for vault because this vault is not in "
657
2
                        "config::recycler_storage_vault_white_list. ")
658
2
                        .tag(" vault name:", vault.name())
659
2
                        .tag(" config::recycler_storage_vault_white_list:",
660
2
                             recycler_storage_vault_white_list);
661
2
                continue;
662
2
            }
663
8
        }
664
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
665
16
                                 &accessor_map_, &vault);
666
16
        if (vault.has_hdfs_info()) {
667
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
668
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
669
9
            int ret = accessor->init();
670
9
            if (ret != 0) {
671
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
672
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
673
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
674
4
                continue;
675
4
            }
676
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
677
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
678
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
679
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
680
#else
681
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
682
                       << "but HDFS storage vaults were detected";
683
#endif
684
7
        } else if (vault.has_obj_info()) {
685
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
686
7
            if (!s3_conf) {
687
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
688
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
689
1
                continue;
690
1
            }
691
692
6
            std::shared_ptr<S3Accessor> accessor;
693
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
694
6
            if (ret != 0) {
695
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
696
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
697
0
                             << " ret=" << ret
698
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
699
0
                continue;
700
0
            }
701
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
702
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
703
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
704
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
705
6
        }
706
16
    }
707
708
7
    if (!it->is_valid()) {
709
0
        LOG_WARNING("failed to get storage vault kv");
710
0
        return -1;
711
0
    }
712
713
7
    if (accessor_map_.empty()) {
714
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
715
1
        return -2;
716
1
    }
717
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
718
6
             instance_id_);
719
720
6
    return 0;
721
7
}
722
723
115
int InstanceRecycler::init() {
724
115
    int ret = init_obj_store_accessors();
725
115
    if (ret != 0) {
726
0
        return ret;
727
0
    }
728
729
115
    return init_storage_vault_accessors();
730
115
}
731
732
template <typename... Func>
733
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
120
    return [funcs...]() {
735
120
        return [](std::initializer_list<int> ret_vals) {
736
120
            int i = 0;
737
140
            for (int ret : ret_vals) {
738
140
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
140
            }
742
120
            return i;
743
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
20
            for (int ret : ret_vals) {
738
20
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
20
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
20
            for (int ret : ret_vals) {
738
20
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
20
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
746
747
10
int InstanceRecycler::do_recycle() {
748
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
749
10
    tablet_metrics_context_.reset();
750
10
    segment_metrics_context_.reset();
751
10
    DORIS_CLOUD_DEFER {
752
10
        tablet_metrics_context_.finish_report();
753
10
        segment_metrics_context_.finish_report();
754
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
751
10
    DORIS_CLOUD_DEFER {
752
10
        tablet_metrics_context_.finish_report();
753
10
        segment_metrics_context_.finish_report();
754
10
    };
755
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
756
0
        int res = recycle_cluster_snapshots();
757
0
        if (res != 0) {
758
0
            return -1;
759
0
        }
760
0
        return recycle_deleted_instance();
761
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
762
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
763
10
                                        fmt::format("instance id {}", instance_id_),
764
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
764
120
                                        [](int r) { return r != 0; });
765
10
        sync_executor
766
10
                .add(task_wrapper(
767
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
767
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
768
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
768
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
769
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
770
                                   // becase they may both recycle the same set of tablets
771
                        // recycle dropped table or idexes(mv, rollup)
772
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
772
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
773
                        // recycle dropped partitions
774
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
774
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
775
10
                .add(task_wrapper(
776
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
776
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
777
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
777
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
778
10
                .add(task_wrapper(
779
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
779
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
780
10
                .add(task_wrapper(
781
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
781
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
782
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
782
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
783
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
783
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
784
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
784
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
785
10
                .add(task_wrapper(
786
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
786
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
787
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
787
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
788
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
788
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
789
10
        bool finished = true;
790
10
        std::vector<int> rets = sync_executor.when_all(&finished);
791
120
        for (int ret : rets) {
792
120
            if (ret != 0) {
793
0
                return ret;
794
0
            }
795
120
        }
796
10
        return finished ? 0 : -1;
797
10
    } else {
798
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
799
0
                     << " instance_id=" << instance_id_;
800
0
        return -1;
801
0
    }
802
10
}
803
804
/**
805
* 1. delete all remote data
806
* 2. delete all kv
807
* 3. remove instance kv
808
*/
809
5
int InstanceRecycler::recycle_deleted_instance() {
810
5
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
811
812
5
    int ret = 0;
813
5
    auto start_time = steady_clock::now();
814
815
5
    DORIS_CLOUD_DEFER {
816
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
817
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
818
5
                     << " recycle deleted instance, cost=" << cost
819
5
                     << "s, instance_id=" << instance_id_;
820
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
815
5
    DORIS_CLOUD_DEFER {
816
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
817
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
818
5
                     << " recycle deleted instance, cost=" << cost
819
5
                     << "s, instance_id=" << instance_id_;
820
5
    };
821
822
    // Step 1: Recycle tmp rowsets (contains ref count but txn is not committed)
823
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
824
5
        int res = recycle_tmp_rowsets();
825
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
826
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
827
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
828
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
829
            // and cannot be recycled.
830
5
            res = recycle_tmp_rowsets();
831
5
        }
832
5
        return res;
833
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
Line
Count
Source
823
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
824
5
        int res = recycle_tmp_rowsets();
825
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
826
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
827
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
828
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
829
            // and cannot be recycled.
830
5
            res = recycle_tmp_rowsets();
831
5
        }
832
5
        return res;
833
5
    };
834
5
    if (recycle_tmp_rowsets_with_mark_delete_enabled() != 0) {
835
0
        LOG_WARNING("failed to recycle tmp rowsets").tag("instance_id", instance_id_);
836
0
        return -1;
837
0
    }
838
839
    // Step 2: Recycle versioned rowsets in recycle space (already marked for deletion)
840
5
    if (recycle_versioned_rowsets() != 0) {
841
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
842
0
        return -1;
843
0
    }
844
845
    // Step 3: Recycle operation logs (can recycle logs not referenced by snapshots)
846
5
    if (recycle_operation_logs() != 0) {
847
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
848
0
        return -1;
849
0
    }
850
851
    // Step 4: Check if there are still cluster snapshots
852
5
    bool has_snapshots = false;
853
5
    if (has_cluster_snapshots(&has_snapshots) != 0) {
854
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
855
0
        return -1;
856
5
    } else if (has_snapshots) {
857
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
858
1
        return 0;
859
1
    }
860
861
4
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
862
4
                            instance_info().snapshot_switch_status() !=
863
1
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
864
4
    if (snapshot_enabled) {
865
1
        bool has_unrecycled_rowsets = false;
866
1
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
867
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
868
0
            return -1;
869
1
        } else if (has_unrecycled_rowsets) {
870
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
871
0
                    .tag("instance_id", instance_id_);
872
0
            return ret;
873
0
        }
874
3
    } else { // delete all remote data if snapshot is disabled
875
3
        for (auto& [_, accessor] : accessor_map_) {
876
3
            if (stopped()) {
877
0
                return ret;
878
0
            }
879
880
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
881
3
            int del_ret = accessor->delete_all();
882
3
            if (del_ret == 0) {
883
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
884
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
885
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
886
                // so the recycling has been successful.
887
0
                ret = -1;
888
0
            }
889
3
        }
890
891
3
        if (ret != 0) {
892
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
893
0
            return ret;
894
0
        }
895
3
    }
896
897
    // delete all kv
898
4
    std::unique_ptr<Transaction> txn;
899
4
    TxnErrorCode err = txn_kv_->create_txn(&txn);
900
4
    if (err != TxnErrorCode::TXN_OK) {
901
0
        LOG(WARNING) << "failed to create txn";
902
0
        ret = -1;
903
0
        return -1;
904
0
    }
905
4
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
906
    // delete kv before deleting objects to prevent the checker from misjudging data loss
907
4
    std::string start_txn_key = txn_key_prefix(instance_id_);
908
4
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
909
4
    txn->remove(start_txn_key, end_txn_key);
910
4
    std::string start_version_key = version_key_prefix(instance_id_);
911
4
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
912
4
    txn->remove(start_version_key, end_version_key);
913
4
    std::string start_meta_key = meta_key_prefix(instance_id_);
914
4
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
915
4
    txn->remove(start_meta_key, end_meta_key);
916
4
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
917
4
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
918
4
    txn->remove(start_recycle_key, end_recycle_key);
919
4
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
920
4
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
921
4
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
922
4
    std::string start_copy_key = copy_key_prefix(instance_id_);
923
4
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
924
4
    txn->remove(start_copy_key, end_copy_key);
925
    // should not remove job key range, because we need to reserve job recycle kv
926
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
927
4
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
928
4
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
929
4
    txn->remove(start_job_tablet_key, end_job_tablet_key);
930
4
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
931
4
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
932
4
    std::string start_vault_key = storage_vault_key(key_info0);
933
4
    std::string end_vault_key = storage_vault_key(key_info1);
934
4
    txn->remove(start_vault_key, end_vault_key);
935
4
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
936
4
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
937
4
    txn->remove(versioned_version_key_start, versioned_version_key_end);
938
4
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
939
4
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
940
4
    txn->remove(versioned_index_key_start, versioned_index_key_end);
941
4
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
942
4
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
943
4
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
944
4
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
945
4
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
946
4
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
947
4
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
948
4
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
949
4
    txn->remove(versioned_data_key_start, versioned_data_key_end);
950
4
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
951
4
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
952
4
    txn->remove(versioned_log_key_start, versioned_log_key_end);
953
4
    err = txn->commit();
954
4
    if (err != TxnErrorCode::TXN_OK) {
955
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
956
0
        ret = -1;
957
0
    }
958
959
4
    if (ret == 0) {
960
        // remove instance kv
961
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
962
4
        err = txn_kv_->create_txn(&txn);
963
4
        if (err != TxnErrorCode::TXN_OK) {
964
0
            LOG(WARNING) << "failed to create txn";
965
0
            ret = -1;
966
0
            return ret;
967
0
        }
968
4
        std::string key;
969
4
        instance_key({instance_id_}, &key);
970
4
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
971
4
        txn->remove(key);
972
4
        err = txn->commit();
973
4
        if (err != TxnErrorCode::TXN_OK) {
974
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
975
0
                         << " err=" << err;
976
0
            ret = -1;
977
0
        }
978
4
    }
979
4
    return ret;
980
4
}
981
982
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
983
9
                                          bool* exists, PackedFileRecycleStats* stats) {
984
9
    if (exists == nullptr) {
985
0
        return -1;
986
0
    }
987
9
    *exists = false;
988
989
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
990
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
991
9
    std::string scan_begin = begin;
992
993
9
    while (true) {
994
9
        std::unique_ptr<RangeGetIterator> it_range;
995
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
996
9
        if (get_ret < 0) {
997
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
998
0
                    .tag("instance_id", instance_id_)
999
0
                    .tag("tablet_id", tablet_id)
1000
0
                    .tag("ret", get_ret);
1001
0
            return -1;
1002
0
        }
1003
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
1004
6
            return 0;
1005
6
        }
1006
1007
3
        std::string last_key;
1008
3
        while (it_range->has_next()) {
1009
3
            auto [k, v] = it_range->next();
1010
3
            last_key.assign(k.data(), k.size());
1011
3
            doris::RowsetMetaCloudPB rowset_meta;
1012
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
1013
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
1014
0
                        .tag("instance_id", instance_id_)
1015
0
                        .tag("tablet_id", tablet_id)
1016
0
                        .tag("key", hex(k));
1017
0
                continue;
1018
0
            }
1019
3
            if (stats) {
1020
3
                ++stats->rowset_scan_count;
1021
3
            }
1022
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
1023
3
                *exists = true;
1024
3
                return 0;
1025
3
            }
1026
3
        }
1027
1028
0
        if (!it_range->more()) {
1029
0
            return 0;
1030
0
        }
1031
1032
        // Continue scanning from the next key to keep each transaction short.
1033
0
        scan_begin = std::move(last_key);
1034
0
        scan_begin.push_back('\x00');
1035
0
    }
1036
9
}
1037
1038
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
1039
                                                          const std::string& rowset_id,
1040
                                                          int64_t txn_id, bool* recycle_exists,
1041
11
                                                          bool* tmp_exists) {
1042
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
1043
0
        return -1;
1044
0
    }
1045
11
    *recycle_exists = false;
1046
11
    *tmp_exists = false;
1047
1048
11
    if (txn_id <= 0) {
1049
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
1050
0
                .tag("instance_id", instance_id_)
1051
0
                .tag("tablet_id", tablet_id)
1052
0
                .tag("rowset_id", rowset_id)
1053
0
                .tag("txn_id", txn_id);
1054
0
        return -1;
1055
0
    }
1056
1057
11
    std::unique_ptr<Transaction> txn;
1058
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1059
11
    if (err != TxnErrorCode::TXN_OK) {
1060
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
1061
0
                .tag("instance_id", instance_id_)
1062
0
                .tag("tablet_id", tablet_id)
1063
0
                .tag("rowset_id", rowset_id)
1064
0
                .tag("txn_id", txn_id)
1065
0
                .tag("err", err);
1066
0
        return -1;
1067
0
    }
1068
1069
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1070
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1071
11
    if (ret == TxnErrorCode::TXN_OK) {
1072
1
        *recycle_exists = true;
1073
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1074
0
        LOG_WARNING("failed to check recycle rowset existence")
1075
0
                .tag("instance_id", instance_id_)
1076
0
                .tag("tablet_id", tablet_id)
1077
0
                .tag("rowset_id", rowset_id)
1078
0
                .tag("key", hex(recycle_key))
1079
0
                .tag("err", ret);
1080
0
        return -1;
1081
0
    }
1082
1083
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1084
11
    ret = key_exists(txn.get(), tmp_key, true);
1085
11
    if (ret == TxnErrorCode::TXN_OK) {
1086
1
        *tmp_exists = true;
1087
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1088
0
        LOG_WARNING("failed to check tmp rowset existence")
1089
0
                .tag("instance_id", instance_id_)
1090
0
                .tag("tablet_id", tablet_id)
1091
0
                .tag("txn_id", txn_id)
1092
0
                .tag("key", hex(tmp_key))
1093
0
                .tag("err", ret);
1094
0
        return -1;
1095
0
    }
1096
1097
11
    return 0;
1098
11
}
1099
1100
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1101
8
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1102
8
    if (!hint.empty()) {
1103
8
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1104
8
            return {hint, it->second};
1105
8
        }
1106
8
    }
1107
1108
0
    return {"", nullptr};
1109
8
}
1110
1111
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1112
                                               const std::string& packed_file_path,
1113
3
                                               PackedFileRecycleStats* stats) {
1114
3
    bool local_changed = false;
1115
3
    int64_t left_num = 0;
1116
3
    int64_t left_bytes = 0;
1117
3
    bool all_small_files_confirmed = true;
1118
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1119
1120
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1121
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1122
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1123
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1124
14
        LOG_INFO("packed slice correction status")
1125
14
                .tag("instance_id", instance_id_)
1126
14
                .tag("packed_file_path", packed_file_path)
1127
14
                .tag("small_file_path", file.path())
1128
14
                .tag("tablet_id", tablet_id)
1129
14
                .tag("rowset_id", rowset_id)
1130
14
                .tag("txn_id", txn_id)
1131
14
                .tag("size", file.size())
1132
14
                .tag("deleted", file.deleted())
1133
14
                .tag("corrected", file.corrected())
1134
14
                .tag("confirmed_this_round", confirmed_this_round);
1135
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1120
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1121
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1122
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1123
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1124
14
        LOG_INFO("packed slice correction status")
1125
14
                .tag("instance_id", instance_id_)
1126
14
                .tag("packed_file_path", packed_file_path)
1127
14
                .tag("small_file_path", file.path())
1128
14
                .tag("tablet_id", tablet_id)
1129
14
                .tag("rowset_id", rowset_id)
1130
14
                .tag("txn_id", txn_id)
1131
14
                .tag("size", file.size())
1132
14
                .tag("deleted", file.deleted())
1133
14
                .tag("corrected", file.corrected())
1134
14
                .tag("confirmed_this_round", confirmed_this_round);
1135
14
    };
1136
1137
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1138
14
        auto* small_file = packed_info->mutable_slices(i);
1139
14
        if (small_file->deleted()) {
1140
3
            log_small_file_status(*small_file, small_file->corrected());
1141
3
            continue;
1142
3
        }
1143
1144
11
        if (small_file->corrected()) {
1145
0
            left_num++;
1146
0
            left_bytes += small_file->size();
1147
0
            log_small_file_status(*small_file, true);
1148
0
            continue;
1149
0
        }
1150
1151
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1152
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1153
0
                    .tag("instance_id", instance_id_)
1154
0
                    .tag("small_file_path", small_file->path())
1155
0
                    .tag("index", i);
1156
0
            return -1;
1157
0
        }
1158
1159
11
        int64_t tablet_id = small_file->tablet_id();
1160
11
        const std::string& rowset_id = small_file->rowset_id();
1161
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1162
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1163
0
                    .tag("instance_id", instance_id_)
1164
0
                    .tag("small_file_path", small_file->path())
1165
0
                    .tag("index", i)
1166
0
                    .tag("tablet_id", tablet_id)
1167
0
                    .tag("rowset_id", rowset_id)
1168
0
                    .tag("has_txn_id", small_file->has_txn_id())
1169
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1170
0
            return -1;
1171
0
        }
1172
11
        int64_t txn_id = small_file->txn_id();
1173
11
        bool recycle_exists = false;
1174
11
        bool tmp_exists = false;
1175
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1176
11
                                                &tmp_exists) != 0) {
1177
0
            return -1;
1178
0
        }
1179
1180
11
        bool small_file_confirmed = false;
1181
11
        if (tmp_exists) {
1182
1
            left_num++;
1183
1
            left_bytes += small_file->size();
1184
1
            small_file_confirmed = true;
1185
10
        } else if (recycle_exists) {
1186
1
            left_num++;
1187
1
            left_bytes += small_file->size();
1188
            // keep small_file_confirmed=false so the packed file remains uncorrected
1189
9
        } else {
1190
9
            bool rowset_exists = false;
1191
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1192
0
                return -1;
1193
0
            }
1194
1195
9
            if (!rowset_exists) {
1196
6
                if (!small_file->deleted()) {
1197
6
                    small_file->set_deleted(true);
1198
6
                    local_changed = true;
1199
6
                }
1200
6
                if (!small_file->corrected()) {
1201
6
                    small_file->set_corrected(true);
1202
6
                    local_changed = true;
1203
6
                }
1204
6
                small_file_confirmed = true;
1205
6
            } else {
1206
3
                left_num++;
1207
3
                left_bytes += small_file->size();
1208
3
                small_file_confirmed = true;
1209
3
            }
1210
9
        }
1211
1212
11
        if (!small_file_confirmed) {
1213
1
            all_small_files_confirmed = false;
1214
1
        }
1215
1216
11
        if (small_file->corrected() != small_file_confirmed) {
1217
4
            small_file->set_corrected(small_file_confirmed);
1218
4
            local_changed = true;
1219
4
        }
1220
1221
11
        log_small_file_status(*small_file, small_file_confirmed);
1222
11
    }
1223
1224
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1225
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1226
3
        local_changed = true;
1227
3
    }
1228
3
    if (packed_info->ref_cnt() != left_num) {
1229
3
        auto old_ref_cnt = packed_info->ref_cnt();
1230
3
        packed_info->set_ref_cnt(left_num);
1231
3
        LOG_INFO("corrected packed file ref count")
1232
3
                .tag("instance_id", instance_id_)
1233
3
                .tag("resource_id", packed_info->resource_id())
1234
3
                .tag("packed_file_path", packed_file_path)
1235
3
                .tag("old_ref_cnt", old_ref_cnt)
1236
3
                .tag("new_ref_cnt", left_num);
1237
3
        local_changed = true;
1238
3
    }
1239
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1240
2
        packed_info->set_corrected(all_small_files_confirmed);
1241
2
        local_changed = true;
1242
2
    }
1243
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1244
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1245
1
        local_changed = true;
1246
1
    }
1247
1248
3
    if (changed != nullptr) {
1249
3
        *changed = local_changed;
1250
3
    }
1251
3
    return 0;
1252
3
}
1253
1254
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1255
                                                 const std::string& packed_file_path,
1256
4
                                                 PackedFileRecycleStats* stats) {
1257
4
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
1258
4
    bool correction_ok = false;
1259
4
    cloud::PackedFileInfoPB packed_info;
1260
1261
4
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
1262
4
        if (stopped()) {
1263
0
            LOG_WARNING("recycler stopped before processing packed file")
1264
0
                    .tag("instance_id", instance_id_)
1265
0
                    .tag("packed_file_path", packed_file_path)
1266
0
                    .tag("attempt", attempt);
1267
0
            return -1;
1268
0
        }
1269
1270
4
        std::unique_ptr<Transaction> txn;
1271
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1272
4
        if (err != TxnErrorCode::TXN_OK) {
1273
0
            LOG_WARNING("failed to create txn when processing packed file")
1274
0
                    .tag("instance_id", instance_id_)
1275
0
                    .tag("packed_file_path", packed_file_path)
1276
0
                    .tag("attempt", attempt)
1277
0
                    .tag("err", err);
1278
0
            return -1;
1279
0
        }
1280
1281
4
        std::string packed_val;
1282
4
        err = txn->get(packed_key, &packed_val);
1283
4
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1284
0
            return 0;
1285
0
        }
1286
4
        if (err != TxnErrorCode::TXN_OK) {
1287
0
            LOG_WARNING("failed to get packed file kv")
1288
0
                    .tag("instance_id", instance_id_)
1289
0
                    .tag("packed_file_path", packed_file_path)
1290
0
                    .tag("attempt", attempt)
1291
0
                    .tag("err", err);
1292
0
            return -1;
1293
0
        }
1294
1295
4
        if (!packed_info.ParseFromString(packed_val)) {
1296
0
            LOG_WARNING("failed to parse packed file info")
1297
0
                    .tag("instance_id", instance_id_)
1298
0
                    .tag("packed_file_path", packed_file_path)
1299
0
                    .tag("attempt", attempt);
1300
0
            return -1;
1301
0
        }
1302
1303
4
        int64_t now_sec = ::time(nullptr);
1304
4
        bool corrected = packed_info.corrected();
1305
4
        bool due = config::force_immediate_recycle ||
1306
4
                   now_sec - packed_info.created_at_sec() >=
1307
4
                           config::packed_file_correction_delay_seconds;
1308
1309
4
        if (!corrected && due) {
1310
3
            bool changed = false;
1311
3
            if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1312
0
                LOG_WARNING("correct_packed_file_info failed")
1313
0
                        .tag("instance_id", instance_id_)
1314
0
                        .tag("packed_file_path", packed_file_path)
1315
0
                        .tag("attempt", attempt);
1316
0
                return -1;
1317
0
            }
1318
3
            if (changed) {
1319
3
                std::string updated;
1320
3
                if (!packed_info.SerializeToString(&updated)) {
1321
0
                    LOG_WARNING("failed to serialize packed file info after correction")
1322
0
                            .tag("instance_id", instance_id_)
1323
0
                            .tag("packed_file_path", packed_file_path)
1324
0
                            .tag("attempt", attempt);
1325
0
                    return -1;
1326
0
                }
1327
3
                txn->put(packed_key, updated);
1328
3
                err = txn->commit();
1329
3
                if (err == TxnErrorCode::TXN_OK) {
1330
3
                    if (stats) {
1331
3
                        ++stats->num_corrected;
1332
3
                    }
1333
3
                } else {
1334
0
                    if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) {
1335
0
                        LOG_WARNING(
1336
0
                                "failed to commit correction for packed file due to conflict, "
1337
0
                                "retrying")
1338
0
                                .tag("instance_id", instance_id_)
1339
0
                                .tag("packed_file_path", packed_file_path)
1340
0
                                .tag("attempt", attempt);
1341
0
                        sleep_for_packed_file_retry();
1342
0
                        packed_info.Clear();
1343
0
                        continue;
1344
0
                    }
1345
0
                    LOG_WARNING("failed to commit correction for packed file")
1346
0
                            .tag("instance_id", instance_id_)
1347
0
                            .tag("packed_file_path", packed_file_path)
1348
0
                            .tag("attempt", attempt)
1349
0
                            .tag("err", err);
1350
0
                    return -1;
1351
0
                }
1352
3
            }
1353
3
        }
1354
1355
4
        correction_ok = true;
1356
4
        break;
1357
4
    }
1358
1359
4
    if (!correction_ok) {
1360
0
        return -1;
1361
0
    }
1362
1363
4
    if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1364
4
          packed_info.ref_cnt() == 0)) {
1365
3
        return 0;
1366
3
    }
1367
1368
1
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1369
0
        LOG_WARNING("packed file missing resource id when recycling")
1370
0
                .tag("instance_id", instance_id_)
1371
0
                .tag("packed_file_path", packed_file_path);
1372
0
        return -1;
1373
0
    }
1374
1
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1375
1
    if (!accessor) {
1376
0
        LOG_WARNING("no accessor available to delete packed file")
1377
0
                .tag("instance_id", instance_id_)
1378
0
                .tag("packed_file_path", packed_file_path)
1379
0
                .tag("resource_id", packed_info.resource_id());
1380
0
        return -1;
1381
0
    }
1382
1
    int del_ret = accessor->delete_file(packed_file_path);
1383
1
    if (del_ret != 0 && del_ret != 1) {
1384
0
        LOG_WARNING("failed to delete packed file")
1385
0
                .tag("instance_id", instance_id_)
1386
0
                .tag("packed_file_path", packed_file_path)
1387
0
                .tag("resource_id", resource_id)
1388
0
                .tag("ret", del_ret);
1389
0
        return -1;
1390
0
    }
1391
1
    if (del_ret == 1) {
1392
0
        LOG_INFO("packed file already removed")
1393
0
                .tag("instance_id", instance_id_)
1394
0
                .tag("packed_file_path", packed_file_path)
1395
0
                .tag("resource_id", resource_id);
1396
1
    } else {
1397
1
        LOG_INFO("deleted packed file")
1398
1
                .tag("instance_id", instance_id_)
1399
1
                .tag("packed_file_path", packed_file_path)
1400
1
                .tag("resource_id", resource_id);
1401
1
    }
1402
1403
1
    for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) {
1404
1
        std::unique_ptr<Transaction> del_txn;
1405
1
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
1406
1
        if (err != TxnErrorCode::TXN_OK) {
1407
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1408
0
                    .tag("instance_id", instance_id_)
1409
0
                    .tag("packed_file_path", packed_file_path)
1410
0
                    .tag("del_attempt", del_attempt)
1411
0
                    .tag("err", err);
1412
0
            return -1;
1413
0
        }
1414
1415
1
        std::string latest_val;
1416
1
        err = del_txn->get(packed_key, &latest_val);
1417
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1418
0
            return 0;
1419
0
        }
1420
1
        if (err != TxnErrorCode::TXN_OK) {
1421
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1422
0
                    .tag("instance_id", instance_id_)
1423
0
                    .tag("packed_file_path", packed_file_path)
1424
0
                    .tag("del_attempt", del_attempt)
1425
0
                    .tag("err", err);
1426
0
            return -1;
1427
0
        }
1428
1429
1
        cloud::PackedFileInfoPB latest_info;
1430
1
        if (!latest_info.ParseFromString(latest_val)) {
1431
0
            LOG_WARNING("failed to parse packed file info before removal")
1432
0
                    .tag("instance_id", instance_id_)
1433
0
                    .tag("packed_file_path", packed_file_path)
1434
0
                    .tag("del_attempt", del_attempt);
1435
0
            return -1;
1436
0
        }
1437
1438
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1439
1
              latest_info.ref_cnt() == 0)) {
1440
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1441
0
                    .tag("instance_id", instance_id_)
1442
0
                    .tag("packed_file_path", packed_file_path)
1443
0
                    .tag("del_attempt", del_attempt);
1444
0
            return 0;
1445
0
        }
1446
1447
1
        del_txn->remove(packed_key);
1448
1
        err = del_txn->commit();
1449
1
        if (err == TxnErrorCode::TXN_OK) {
1450
1
            if (stats) {
1451
1
                ++stats->num_deleted;
1452
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1453
1
                                        static_cast<int64_t>(latest_val.size());
1454
1
                if (del_ret == 0 || del_ret == 1) {
1455
1
                    ++stats->num_object_deleted;
1456
1
                    int64_t object_size = latest_info.total_slice_bytes();
1457
1
                    if (object_size <= 0) {
1458
0
                        object_size = packed_info.total_slice_bytes();
1459
0
                    }
1460
1
                    stats->bytes_object_deleted += object_size;
1461
1
                }
1462
1
            }
1463
1
            LOG_INFO("removed packed file metadata")
1464
1
                    .tag("instance_id", instance_id_)
1465
1
                    .tag("packed_file_path", packed_file_path);
1466
1
            return 0;
1467
1
        }
1468
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1469
0
            if (del_attempt >= max_retry_times) {
1470
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
1471
0
                        .tag("instance_id", instance_id_)
1472
0
                        .tag("packed_file_path", packed_file_path)
1473
0
                        .tag("del_attempt", del_attempt);
1474
0
                return -1;
1475
0
            }
1476
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
1477
0
                    .tag("instance_id", instance_id_)
1478
0
                    .tag("packed_file_path", packed_file_path)
1479
0
                    .tag("del_attempt", del_attempt);
1480
0
            sleep_for_packed_file_retry();
1481
0
            continue;
1482
0
        }
1483
0
        LOG_WARNING("failed to remove packed file kv")
1484
0
                .tag("instance_id", instance_id_)
1485
0
                .tag("packed_file_path", packed_file_path)
1486
0
                .tag("del_attempt", del_attempt)
1487
0
                .tag("err", err);
1488
0
        return -1;
1489
0
    }
1490
1491
0
    return -1;
1492
1
}
1493
1494
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/,
1495
4
                                            PackedFileRecycleStats* stats, int* ret) {
1496
4
    if (stats) {
1497
4
        ++stats->num_scanned;
1498
4
    }
1499
4
    std::string packed_file_path;
1500
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1501
0
        LOG_WARNING("failed to decode packed file key")
1502
0
                .tag("instance_id", instance_id_)
1503
0
                .tag("key", hex(key));
1504
0
        if (stats) {
1505
0
            ++stats->num_failed;
1506
0
        }
1507
0
        if (ret) {
1508
0
            *ret = -1;
1509
0
        }
1510
0
        return 0;
1511
0
    }
1512
1513
4
    std::string packed_key(key);
1514
4
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1515
4
    if (process_ret != 0) {
1516
0
        if (stats) {
1517
0
            ++stats->num_failed;
1518
0
        }
1519
0
        if (ret) {
1520
0
            *ret = -1;
1521
0
        }
1522
0
    }
1523
4
    return 0;
1524
4
}
1525
1526
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1527
9.77k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1528
9.77k
    if (config::force_immediate_recycle) {
1529
15
        return 0L;
1530
15
    }
1531
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1532
9.75k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1533
9.75k
    int64_t retention_seconds = config::retention_seconds;
1534
9.75k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1535
7.80k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1536
7.80k
    }
1537
9.75k
    int64_t final_expiration = expiration + retention_seconds;
1538
9.75k
    if (*earlest_ts > final_expiration) {
1539
7
        *earlest_ts = final_expiration;
1540
7
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1541
7
    }
1542
9.75k
    return final_expiration;
1543
9.77k
}
1544
1545
int64_t calculate_partition_expired_time(
1546
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1547
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1548
9
    if (config::force_immediate_recycle) {
1549
3
        return 0L;
1550
3
    }
1551
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1552
6
                                                            : partition_meta_pb.creation_time();
1553
6
    int64_t retention_seconds = config::retention_seconds;
1554
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1555
6
        retention_seconds =
1556
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1557
6
    }
1558
6
    int64_t final_expiration = expiration + retention_seconds;
1559
6
    if (*earlest_ts > final_expiration) {
1560
2
        *earlest_ts = final_expiration;
1561
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1562
2
    }
1563
6
    return final_expiration;
1564
9
}
1565
1566
int64_t calculate_index_expired_time(const std::string& instance_id_,
1567
                                     const RecycleIndexPB& index_meta_pb,
1568
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1569
10
    if (config::force_immediate_recycle) {
1570
4
        return 0L;
1571
4
    }
1572
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1573
6
                                                        : index_meta_pb.creation_time();
1574
6
    int64_t retention_seconds = config::retention_seconds;
1575
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1576
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1577
6
    }
1578
6
    int64_t final_expiration = expiration + retention_seconds;
1579
6
    if (*earlest_ts > final_expiration) {
1580
2
        *earlest_ts = final_expiration;
1581
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1582
2
    }
1583
6
    return final_expiration;
1584
10
}
1585
1586
int64_t calculate_tmp_rowset_expired_time(
1587
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1588
106k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1589
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1590
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1591
    //  duration or timeout always < `retention_time` in practice.
1592
106k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1593
106k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1594
106k
                                 : tmp_rowset_meta_pb.creation_time();
1595
106k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1596
106k
    int64_t final_expiration = expiration + config::retention_seconds;
1597
106k
    if (*earlest_ts > final_expiration) {
1598
24
        *earlest_ts = final_expiration;
1599
24
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1600
24
    }
1601
106k
    return final_expiration;
1602
106k
}
1603
1604
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1605
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1606
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1607
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1608
8
        *earlest_ts = final_expiration / 1000;
1609
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1610
8
    }
1611
30.0k
    return final_expiration;
1612
30.0k
}
1613
1614
int64_t calculate_restore_job_expired_time(
1615
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1616
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1617
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1618
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1619
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1620
        // final state, recycle immediately
1621
41
        return 0L;
1622
41
    }
1623
    // not final state, wait much longer than the FE's timeout(1 day)
1624
0
    int64_t last_modified_s =
1625
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1626
0
    int64_t expiration = restore_job.expired_at_s() > 0
1627
0
                                 ? last_modified_s + restore_job.expired_at_s()
1628
0
                                 : last_modified_s;
1629
0
    int64_t final_expiration = expiration + config::retention_seconds;
1630
0
    if (*earlest_ts > final_expiration) {
1631
0
        *earlest_ts = final_expiration;
1632
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1633
0
    }
1634
0
    return final_expiration;
1635
41
}
1636
1637
2
int InstanceRecycler::abort_txn_for_related_rowset(int64_t txn_id) {
1638
2
    AbortTxnRequest req;
1639
2
    TxnInfoPB txn_info;
1640
2
    MetaServiceCode code = MetaServiceCode::OK;
1641
2
    std::string msg;
1642
2
    std::stringstream ss;
1643
2
    std::unique_ptr<Transaction> txn;
1644
2
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1645
2
    if (err != TxnErrorCode::TXN_OK) {
1646
0
        LOG_WARNING("failed to create txn").tag("err", err);
1647
0
        return -1;
1648
0
    }
1649
1650
    // get txn index
1651
2
    TxnIndexPB txn_idx_pb;
1652
2
    auto index_key = txn_index_key({instance_id_, txn_id});
1653
2
    std::string index_val;
1654
2
    err = txn->get(index_key, &index_val);
1655
2
    if (err != TxnErrorCode::TXN_OK) {
1656
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1657
            // maybe recycled
1658
0
            LOG_INFO("txn index not found, txn_id={} instance_id={}", txn_id, instance_id_)
1659
0
                    .tag("key", hex(index_key))
1660
0
                    .tag("txn_id", txn_id);
1661
0
            return 0;
1662
0
        }
1663
0
        LOG_WARNING("failed to get txn index")
1664
0
                .tag("err", err)
1665
0
                .tag("key", hex(index_key))
1666
0
                .tag("txn_id", txn_id);
1667
0
        return -1;
1668
0
    }
1669
2
    if (!txn_idx_pb.ParseFromString(index_val)) {
1670
0
        LOG_WARNING("failed to parse txn index")
1671
0
                .tag("err", err)
1672
0
                .tag("key", hex(index_key))
1673
0
                .tag("txn_id", txn_id);
1674
0
        return -1;
1675
0
    }
1676
1677
2
    auto info_key = txn_info_key({instance_id_, txn_idx_pb.tablet_index().db_id(), txn_id});
1678
2
    std::string info_val;
1679
2
    err = txn->get(info_key, &info_val);
1680
2
    if (err != TxnErrorCode::TXN_OK) {
1681
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1682
            // maybe recycled
1683
0
            LOG_INFO("txn info not found, txn_id={} instance_id={}", txn_id, instance_id_)
1684
0
                    .tag("key", hex(info_key))
1685
0
                    .tag("txn_id", txn_id);
1686
0
            return 0;
1687
0
        }
1688
0
        LOG_WARNING("failed to get txn info")
1689
0
                .tag("err", err)
1690
0
                .tag("key", hex(info_key))
1691
0
                .tag("txn_id", txn_id);
1692
0
        return -1;
1693
0
    }
1694
2
    if (!txn_info.ParseFromString(info_val)) {
1695
0
        LOG_WARNING("failed to parse txn info")
1696
0
                .tag("err", err)
1697
0
                .tag("key", hex(info_key))
1698
0
                .tag("txn_id", txn_id);
1699
0
        return -1;
1700
0
    }
1701
1702
2
    if (txn_info.status() != TxnStatusPB::TXN_STATUS_PREPARED) {
1703
0
        LOG_INFO("txn is not prepared status, txn_id={} status={}", txn_id, txn_info.status())
1704
0
                .tag("key", hex(info_key))
1705
0
                .tag("txn_id", txn_id);
1706
0
        return 0;
1707
0
    }
1708
1709
2
    req.set_txn_id(txn_id);
1710
1711
2
    LOG(INFO) << "begin abort txn for related rowset, txn_id=" << txn_id
1712
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString();
1713
1714
2
    _abort_txn(instance_id_, &req, txn.get(), txn_info, ss, code, msg);
1715
2
    err = txn->commit();
1716
2
    if (err != TxnErrorCode::TXN_OK) {
1717
0
        code = cast_as<ErrCategory::COMMIT>(err);
1718
0
        ss << "failed to commit kv txn, txn_id=" << txn_info.txn_id() << " err=" << err;
1719
0
        msg = ss.str();
1720
0
        return -1;
1721
0
    }
1722
1723
2
    LOG(INFO) << "finish abort txn for related rowset, txn_id=" << txn_id
1724
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString()
1725
2
              << " code=" << code << " msg=" << msg;
1726
1727
2
    return 0;
1728
2
}
1729
1730
4
int InstanceRecycler::abort_job_for_related_rowset(const RowsetMetaCloudPB& rowset_meta) {
1731
4
    FinishTabletJobRequest req;
1732
4
    FinishTabletJobResponse res;
1733
4
    req.set_action(FinishTabletJobRequest::ABORT);
1734
4
    MetaServiceCode code = MetaServiceCode::OK;
1735
4
    std::string msg;
1736
4
    std::stringstream ss;
1737
1738
4
    TabletIndexPB tablet_idx;
1739
4
    int ret = get_tablet_idx(txn_kv_.get(), instance_id_, rowset_meta.tablet_id(), tablet_idx);
1740
4
    if (ret == 1) {
1741
        // tablet maybe recycled, directly return 0
1742
1
        return 0;
1743
3
    } else if (ret != 0) {
1744
0
        LOG(WARNING) << "failed to get tablet index, tablet_id=" << rowset_meta.tablet_id()
1745
0
                     << " instance_id=" << instance_id_ << " ret=" << ret;
1746
0
        return ret;
1747
0
    }
1748
1749
3
    std::unique_ptr<Transaction> txn;
1750
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1751
3
    if (err != TxnErrorCode::TXN_OK) {
1752
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ << " err=" << err;
1753
0
        return -1;
1754
0
    }
1755
1756
3
    std::string job_key =
1757
3
            job_tablet_key({instance_id_, tablet_idx.table_id(), tablet_idx.index_id(),
1758
3
                            tablet_idx.partition_id(), tablet_idx.tablet_id()});
1759
3
    std::string job_val;
1760
3
    err = txn->get(job_key, &job_val);
1761
3
    if (err != TxnErrorCode::TXN_OK) {
1762
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1763
0
            LOG(INFO) << "job not exists, instance_id=" << instance_id_
1764
0
                      << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1765
0
            return 0;
1766
0
        }
1767
0
        LOG(WARNING) << "failed to get job, instance_id=" << instance_id_
1768
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " err=" << err
1769
0
                     << " key=" << hex(job_key);
1770
0
        return -1;
1771
0
    }
1772
1773
3
    TabletJobInfoPB job_pb;
1774
3
    if (!job_pb.ParseFromString(job_val)) {
1775
0
        LOG(WARNING) << "failed to parse job, instance_id=" << instance_id_
1776
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1777
0
        return -1;
1778
0
    }
1779
1780
3
    std::string job_id {};
1781
3
    if (!job_pb.compaction().empty()) {
1782
2
        for (const auto& c : job_pb.compaction()) {
1783
2
            if (c.id() == rowset_meta.job_id()) {
1784
2
                job_id = c.id();
1785
2
                break;
1786
2
            }
1787
2
        }
1788
2
    } else if (job_pb.has_schema_change()) {
1789
1
        job_id = job_pb.schema_change().id();
1790
1
    }
1791
1792
3
    if (!job_id.empty() && rowset_meta.job_id() == job_id) {
1793
3
        LOG(INFO) << "begin to abort job for related rowset, job_id=" << rowset_meta.job_id()
1794
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id();
1795
3
        req.mutable_job()->CopyFrom(job_pb);
1796
3
        req.set_action(FinishTabletJobRequest::ABORT);
1797
3
        _finish_tablet_job(&req, &res, instance_id_, txn, txn_kv_.get(),
1798
3
                           delete_bitmap_lock_white_list_.get(), resource_mgr_.get(), code, msg,
1799
3
                           ss);
1800
3
        if (code != MetaServiceCode::OK) {
1801
0
            LOG(WARNING) << "failed to abort job, instance_id=" << instance_id_
1802
0
                         << " tablet_id=" << tablet_idx.tablet_id() << " code=" << code
1803
0
                         << " msg=" << msg;
1804
0
            return -1;
1805
0
        }
1806
3
        LOG(INFO) << "finish abort job for related rowset, job_id=" << rowset_meta.job_id()
1807
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id()
1808
3
                  << " code=" << code << " msg=" << msg;
1809
3
    } else {
1810
        // clang-format off
1811
0
        LOG(INFO) << "there is no job for related rowset, directly recycle rowset data"
1812
0
                  << ", instance_id=" << instance_id_ 
1813
0
                  << ", tablet_id=" << tablet_idx.tablet_id() 
1814
0
                  << ", job_id=" << job_id
1815
0
                  << ", rowset_id=" << rowset_meta.rowset_id_v2();
1816
        // clang-format on
1817
0
    }
1818
1819
3
    return 0;
1820
3
}
1821
1822
template <typename T>
1823
57.7k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1824
57.7k
    RowsetMetaCloudPB* rs_meta;
1825
57.7k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1826
1827
57.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1828
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1829
        // we do not need to check the job or txn state
1830
        // because tmp_rowset_key already exists when this key is generated.
1831
3.75k
        rowset_type = rowset_meta_pb.type();
1832
3.75k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1833
54.0k
    } else {
1834
54.0k
        rs_meta = &rowset_meta_pb;
1835
54.0k
    }
1836
1837
57.7k
    DCHECK(rs_meta != nullptr);
1838
1839
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1840
    // we need skip them because the related txn has been finished
1841
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1842
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1843
57.7k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1844
54.6k
        if (rs_meta->has_load_id()) {
1845
            // load
1846
2
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1847
54.6k
        } else if (rs_meta->has_job_id()) {
1848
            // compaction / schema change
1849
3
            return abort_job_for_related_rowset(*rs_meta);
1850
3
        }
1851
54.6k
    }
1852
1853
57.7k
    return 0;
1854
57.7k
}
_ZN5doris5cloud16InstanceRecycler28abort_txn_or_job_for_recycleINS0_15RecycleRowsetPBEEEiRT_
Line
Count
Source
1823
3.75k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1824
3.75k
    RowsetMetaCloudPB* rs_meta;
1825
3.75k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1826
1827
3.75k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1828
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1829
        // we do not need to check the job or txn state
1830
        // because tmp_rowset_key already exists when this key is generated.
1831
3.75k
        rowset_type = rowset_meta_pb.type();
1832
3.75k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1833
3.75k
    } else {
1834
3.75k
        rs_meta = &rowset_meta_pb;
1835
3.75k
    }
1836
1837
3.75k
    DCHECK(rs_meta != nullptr);
1838
1839
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1840
    // we need skip them because the related txn has been finished
1841
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1842
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1843
3.75k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1844
652
        if (rs_meta->has_load_id()) {
1845
            // load
1846
1
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1847
651
        } else if (rs_meta->has_job_id()) {
1848
            // compaction / schema change
1849
1
            return abort_job_for_related_rowset(*rs_meta);
1850
1
        }
1851
652
    }
1852
1853
3.75k
    return 0;
1854
3.75k
}
_ZN5doris5cloud16InstanceRecycler28abort_txn_or_job_for_recycleINS_17RowsetMetaCloudPBEEEiRT_
Line
Count
Source
1823
54.0k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1824
54.0k
    RowsetMetaCloudPB* rs_meta;
1825
54.0k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1826
1827
54.0k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1828
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1829
        // we do not need to check the job or txn state
1830
        // because tmp_rowset_key already exists when this key is generated.
1831
54.0k
        rowset_type = rowset_meta_pb.type();
1832
54.0k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1833
54.0k
    } else {
1834
54.0k
        rs_meta = &rowset_meta_pb;
1835
54.0k
    }
1836
1837
54.0k
    DCHECK(rs_meta != nullptr);
1838
1839
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1840
    // we need skip them because the related txn has been finished
1841
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1842
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1843
54.0k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1844
54.0k
        if (rs_meta->has_load_id()) {
1845
            // load
1846
1
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1847
54.0k
        } else if (rs_meta->has_job_id()) {
1848
            // compaction / schema change
1849
2
            return abort_job_for_related_rowset(*rs_meta);
1850
2
        }
1851
54.0k
    }
1852
1853
54.0k
    return 0;
1854
54.0k
}
1855
1856
template <typename T>
1857
int mark_rowset_as_recycled(TxnKv* txn_kv, const std::string& instance_id, std::string_view key,
1858
113k
                            T& rowset_meta_pb) {
1859
113k
    RowsetMetaCloudPB* rs_meta;
1860
1861
113k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1862
106k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1863
106k
    } else {
1864
106k
        rs_meta = &rowset_meta_pb;
1865
106k
    }
1866
1867
113k
    bool need_write_back = false;
1868
113k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1869
55.7k
        need_write_back = true;
1870
55.7k
        rs_meta->set_is_recycled(true);
1871
55.7k
    }
1872
1873
113k
    if (need_write_back) {
1874
55.7k
        std::unique_ptr<Transaction> txn;
1875
55.7k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1876
55.7k
        if (err != TxnErrorCode::TXN_OK) {
1877
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1878
0
            return -1;
1879
0
        }
1880
        // double check becase of new transaction
1881
55.7k
        T rowset_meta;
1882
55.7k
        std::string val;
1883
55.7k
        err = txn->get(key, &val);
1884
55.7k
        if (!rowset_meta.ParseFromString(val)) {
1885
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1886
0
            return -1;
1887
0
        }
1888
55.7k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1889
52.0k
            rs_meta = rowset_meta.mutable_rowset_meta();
1890
52.0k
        } else {
1891
52.0k
            rs_meta = &rowset_meta;
1892
52.0k
        }
1893
55.7k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1894
0
            return 0;
1895
0
        }
1896
55.7k
        rs_meta->set_is_recycled(true);
1897
55.7k
        val.clear();
1898
55.7k
        rowset_meta.SerializeToString(&val);
1899
55.7k
        txn->put(key, val);
1900
55.7k
        err = txn->commit();
1901
55.7k
        if (err != TxnErrorCode::TXN_OK) {
1902
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1903
0
            return -1;
1904
0
        }
1905
55.7k
    }
1906
113k
    return need_write_back ? 1 : 0;
1907
113k
}
_ZN5doris5cloud23mark_rowset_as_recycledINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS8_ERT_
Line
Count
Source
1858
7.50k
                            T& rowset_meta_pb) {
1859
7.50k
    RowsetMetaCloudPB* rs_meta;
1860
1861
7.50k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1862
7.50k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1863
7.50k
    } else {
1864
7.50k
        rs_meta = &rowset_meta_pb;
1865
7.50k
    }
1866
1867
7.50k
    bool need_write_back = false;
1868
7.50k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1869
3.75k
        need_write_back = true;
1870
3.75k
        rs_meta->set_is_recycled(true);
1871
3.75k
    }
1872
1873
7.50k
    if (need_write_back) {
1874
3.75k
        std::unique_ptr<Transaction> txn;
1875
3.75k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1876
3.75k
        if (err != TxnErrorCode::TXN_OK) {
1877
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1878
0
            return -1;
1879
0
        }
1880
        // double check becase of new transaction
1881
3.75k
        T rowset_meta;
1882
3.75k
        std::string val;
1883
3.75k
        err = txn->get(key, &val);
1884
3.75k
        if (!rowset_meta.ParseFromString(val)) {
1885
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1886
0
            return -1;
1887
0
        }
1888
3.75k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1889
3.75k
            rs_meta = rowset_meta.mutable_rowset_meta();
1890
3.75k
        } else {
1891
3.75k
            rs_meta = &rowset_meta;
1892
3.75k
        }
1893
3.75k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1894
0
            return 0;
1895
0
        }
1896
3.75k
        rs_meta->set_is_recycled(true);
1897
3.75k
        val.clear();
1898
3.75k
        rowset_meta.SerializeToString(&val);
1899
3.75k
        txn->put(key, val);
1900
3.75k
        err = txn->commit();
1901
3.75k
        if (err != TxnErrorCode::TXN_OK) {
1902
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1903
0
            return -1;
1904
0
        }
1905
3.75k
    }
1906
7.50k
    return need_write_back ? 1 : 0;
1907
7.50k
}
_ZN5doris5cloud23mark_rowset_as_recycledINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS8_ERT_
Line
Count
Source
1858
106k
                            T& rowset_meta_pb) {
1859
106k
    RowsetMetaCloudPB* rs_meta;
1860
1861
106k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1862
106k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1863
106k
    } else {
1864
106k
        rs_meta = &rowset_meta_pb;
1865
106k
    }
1866
1867
106k
    bool need_write_back = false;
1868
106k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1869
52.0k
        need_write_back = true;
1870
52.0k
        rs_meta->set_is_recycled(true);
1871
52.0k
    }
1872
1873
106k
    if (need_write_back) {
1874
52.0k
        std::unique_ptr<Transaction> txn;
1875
52.0k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1876
52.0k
        if (err != TxnErrorCode::TXN_OK) {
1877
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1878
0
            return -1;
1879
0
        }
1880
        // double check becase of new transaction
1881
52.0k
        T rowset_meta;
1882
52.0k
        std::string val;
1883
52.0k
        err = txn->get(key, &val);
1884
52.0k
        if (!rowset_meta.ParseFromString(val)) {
1885
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1886
0
            return -1;
1887
0
        }
1888
52.0k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1889
52.0k
            rs_meta = rowset_meta.mutable_rowset_meta();
1890
52.0k
        } else {
1891
52.0k
            rs_meta = &rowset_meta;
1892
52.0k
        }
1893
52.0k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1894
0
            return 0;
1895
0
        }
1896
52.0k
        rs_meta->set_is_recycled(true);
1897
52.0k
        val.clear();
1898
52.0k
        rowset_meta.SerializeToString(&val);
1899
52.0k
        txn->put(key, val);
1900
52.0k
        err = txn->commit();
1901
52.0k
        if (err != TxnErrorCode::TXN_OK) {
1902
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1903
0
            return -1;
1904
0
        }
1905
52.0k
    }
1906
106k
    return need_write_back ? 1 : 0;
1907
106k
}
1908
1909
1
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
1910
1
    const std::string task_name = "recycle_ref_rowsets";
1911
1
    *has_unrecycled_rowsets = false;
1912
1913
1
    std::string data_rowset_ref_count_key_start =
1914
1
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
1915
1
    std::string data_rowset_ref_count_key_end =
1916
1
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
1917
1918
1
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
1919
1920
1
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1921
1
    register_recycle_task(task_name, start_time);
1922
1923
1
    DORIS_CLOUD_DEFER {
1924
1
        unregister_recycle_task(task_name);
1925
1
        int64_t cost =
1926
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1927
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
1928
1
                .tag("instance_id", instance_id_);
1929
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Line
Count
Source
1923
1
    DORIS_CLOUD_DEFER {
1924
1
        unregister_recycle_task(task_name);
1925
1
        int64_t cost =
1926
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1927
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
1928
1
                .tag("instance_id", instance_id_);
1929
1
    };
1930
1931
    // Phase 1: Scan to collect all tablet_ids that have rowset ref counts
1932
1
    std::set<int64_t> tablets_with_refs;
1933
1
    int64_t num_scanned = 0;
1934
1935
1
    auto scan_func = [&](std::string_view k, std::string_view v) -> int {
1936
0
        ++num_scanned;
1937
0
        int64_t tablet_id;
1938
0
        std::string rowset_id;
1939
0
        std::string_view key(k);
1940
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
1941
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
1942
0
            return 0; // Continue scanning
1943
0
        }
1944
1945
0
        tablets_with_refs.insert(tablet_id);
1946
0
        return 0;
1947
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
1948
1949
1
    if (scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
1950
1
                         std::move(scan_func)) != 0) {
1951
0
        LOG_WARNING("failed to scan data rowset ref count keys");
1952
0
        return -1;
1953
0
    }
1954
1955
1
    LOG_INFO("collected {} tablets with rowset refs, scanned {} ref count keys",
1956
1
             tablets_with_refs.size(), num_scanned)
1957
1
            .tag("instance_id", instance_id_);
1958
1959
    // Phase 2: Recycle each tablet
1960
1
    int64_t num_recycled_tablets = 0;
1961
1
    for (int64_t tablet_id : tablets_with_refs) {
1962
0
        if (stopped()) {
1963
0
            LOG_INFO("recycler stopped, skip remaining tablets")
1964
0
                    .tag("instance_id", instance_id_)
1965
0
                    .tag("tablets_processed", num_recycled_tablets)
1966
0
                    .tag("tablets_remaining", tablets_with_refs.size() - num_recycled_tablets);
1967
0
            break;
1968
0
        }
1969
1970
0
        RecyclerMetricsContext metrics_context(instance_id_, task_name);
1971
0
        if (recycle_versioned_tablet(tablet_id, metrics_context) != 0) {
1972
0
            LOG_WARNING("failed to recycle tablet")
1973
0
                    .tag("instance_id", instance_id_)
1974
0
                    .tag("tablet_id", tablet_id);
1975
0
            return -1;
1976
0
        }
1977
0
        ++num_recycled_tablets;
1978
0
    }
1979
1980
1
    LOG_INFO("recycled {} tablets", num_recycled_tablets)
1981
1
            .tag("instance_id", instance_id_)
1982
1
            .tag("total_tablets", tablets_with_refs.size());
1983
1984
    // Phase 3: Scan again to check if any ref count keys still exist
1985
1
    std::unique_ptr<Transaction> txn;
1986
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1987
1
    if (err != TxnErrorCode::TXN_OK) {
1988
0
        LOG_WARNING("failed to create txn for final check")
1989
0
                .tag("instance_id", instance_id_)
1990
0
                .tag("err", err);
1991
0
        return -1;
1992
0
    }
1993
1994
1
    std::unique_ptr<RangeGetIterator> iter;
1995
1
    err = txn->get(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end, &iter, true);
1996
1
    if (err != TxnErrorCode::TXN_OK) {
1997
0
        LOG_WARNING("failed to create range iterator for final check")
1998
0
                .tag("instance_id", instance_id_)
1999
0
                .tag("err", err);
2000
0
        return -1;
2001
0
    }
2002
2003
1
    *has_unrecycled_rowsets = iter->has_next();
2004
1
    if (*has_unrecycled_rowsets) {
2005
0
        LOG_INFO("still has unrecycled rowsets after recycle_ref_rowsets")
2006
0
                .tag("instance_id", instance_id_);
2007
0
    }
2008
2009
1
    return 0;
2010
1
}
2011
2012
17
int InstanceRecycler::recycle_indexes() {
2013
17
    const std::string task_name = "recycle_indexes";
2014
17
    int64_t num_scanned = 0;
2015
17
    int64_t num_expired = 0;
2016
17
    int64_t num_recycled = 0;
2017
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2018
2019
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
2020
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
2021
17
    std::string index_key0;
2022
17
    std::string index_key1;
2023
17
    recycle_index_key(index_key_info0, &index_key0);
2024
17
    recycle_index_key(index_key_info1, &index_key1);
2025
2026
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
2027
2028
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2029
17
    register_recycle_task(task_name, start_time);
2030
2031
17
    DORIS_CLOUD_DEFER {
2032
17
        unregister_recycle_task(task_name);
2033
17
        int64_t cost =
2034
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2035
17
        metrics_context.finish_report();
2036
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2037
17
                .tag("instance_id", instance_id_)
2038
17
                .tag("num_scanned", num_scanned)
2039
17
                .tag("num_expired", num_expired)
2040
17
                .tag("num_recycled", num_recycled);
2041
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2031
2
    DORIS_CLOUD_DEFER {
2032
2
        unregister_recycle_task(task_name);
2033
2
        int64_t cost =
2034
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2035
2
        metrics_context.finish_report();
2036
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2037
2
                .tag("instance_id", instance_id_)
2038
2
                .tag("num_scanned", num_scanned)
2039
2
                .tag("num_expired", num_expired)
2040
2
                .tag("num_recycled", num_recycled);
2041
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2031
15
    DORIS_CLOUD_DEFER {
2032
15
        unregister_recycle_task(task_name);
2033
15
        int64_t cost =
2034
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2035
15
        metrics_context.finish_report();
2036
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2037
15
                .tag("instance_id", instance_id_)
2038
15
                .tag("num_scanned", num_scanned)
2039
15
                .tag("num_expired", num_expired)
2040
15
                .tag("num_recycled", num_recycled);
2041
15
    };
2042
2043
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2044
2045
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
2046
17
    std::vector<std::string_view> index_keys;
2047
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2048
10
        ++num_scanned;
2049
10
        RecycleIndexPB index_pb;
2050
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2051
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2052
0
            return -1;
2053
0
        }
2054
10
        int64_t current_time = ::time(nullptr);
2055
10
        if (current_time <
2056
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2057
0
            return 0;
2058
0
        }
2059
10
        ++num_expired;
2060
        // decode index_id
2061
10
        auto k1 = k;
2062
10
        k1.remove_prefix(1);
2063
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2064
10
        decode_key(&k1, &out);
2065
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2066
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2067
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2068
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2069
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2070
        // Change state to RECYCLING
2071
10
        std::unique_ptr<Transaction> txn;
2072
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2073
10
        if (err != TxnErrorCode::TXN_OK) {
2074
0
            LOG_WARNING("failed to create txn").tag("err", err);
2075
0
            return -1;
2076
0
        }
2077
10
        std::string val;
2078
10
        err = txn->get(k, &val);
2079
10
        if (err ==
2080
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2081
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2082
0
            return 0;
2083
0
        }
2084
10
        if (err != TxnErrorCode::TXN_OK) {
2085
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2086
0
            return -1;
2087
0
        }
2088
10
        index_pb.Clear();
2089
10
        if (!index_pb.ParseFromString(val)) {
2090
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2091
0
            return -1;
2092
0
        }
2093
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2094
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2095
9
            txn->put(k, index_pb.SerializeAsString());
2096
9
            err = txn->commit();
2097
9
            if (err != TxnErrorCode::TXN_OK) {
2098
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2099
0
                return -1;
2100
0
            }
2101
9
        }
2102
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2103
1
            LOG_WARNING("failed to recycle tablets under index")
2104
1
                    .tag("table_id", index_pb.table_id())
2105
1
                    .tag("instance_id", instance_id_)
2106
1
                    .tag("index_id", index_id);
2107
1
            return -1;
2108
1
        }
2109
2110
9
        if (index_pb.has_db_id()) {
2111
            // Recycle the versioned keys
2112
3
            std::unique_ptr<Transaction> txn;
2113
3
            err = txn_kv_->create_txn(&txn);
2114
3
            if (err != TxnErrorCode::TXN_OK) {
2115
0
                LOG_WARNING("failed to create txn").tag("err", err);
2116
0
                return -1;
2117
0
            }
2118
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2119
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2120
3
            std::string index_inverted_key = versioned::index_inverted_key(
2121
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2122
3
            versioned_remove_all(txn.get(), meta_key);
2123
3
            txn->remove(index_key);
2124
3
            txn->remove(index_inverted_key);
2125
3
            err = txn->commit();
2126
3
            if (err != TxnErrorCode::TXN_OK) {
2127
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2128
0
                return -1;
2129
0
            }
2130
3
        }
2131
2132
9
        metrics_context.total_recycled_num = ++num_recycled;
2133
9
        metrics_context.report();
2134
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2135
9
        index_keys.push_back(k);
2136
9
        return 0;
2137
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2047
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2048
2
        ++num_scanned;
2049
2
        RecycleIndexPB index_pb;
2050
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2051
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2052
0
            return -1;
2053
0
        }
2054
2
        int64_t current_time = ::time(nullptr);
2055
2
        if (current_time <
2056
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2057
0
            return 0;
2058
0
        }
2059
2
        ++num_expired;
2060
        // decode index_id
2061
2
        auto k1 = k;
2062
2
        k1.remove_prefix(1);
2063
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2064
2
        decode_key(&k1, &out);
2065
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2066
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2067
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2068
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2069
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2070
        // Change state to RECYCLING
2071
2
        std::unique_ptr<Transaction> txn;
2072
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2073
2
        if (err != TxnErrorCode::TXN_OK) {
2074
0
            LOG_WARNING("failed to create txn").tag("err", err);
2075
0
            return -1;
2076
0
        }
2077
2
        std::string val;
2078
2
        err = txn->get(k, &val);
2079
2
        if (err ==
2080
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2081
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2082
0
            return 0;
2083
0
        }
2084
2
        if (err != TxnErrorCode::TXN_OK) {
2085
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2086
0
            return -1;
2087
0
        }
2088
2
        index_pb.Clear();
2089
2
        if (!index_pb.ParseFromString(val)) {
2090
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2091
0
            return -1;
2092
0
        }
2093
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2094
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2095
1
            txn->put(k, index_pb.SerializeAsString());
2096
1
            err = txn->commit();
2097
1
            if (err != TxnErrorCode::TXN_OK) {
2098
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2099
0
                return -1;
2100
0
            }
2101
1
        }
2102
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2103
1
            LOG_WARNING("failed to recycle tablets under index")
2104
1
                    .tag("table_id", index_pb.table_id())
2105
1
                    .tag("instance_id", instance_id_)
2106
1
                    .tag("index_id", index_id);
2107
1
            return -1;
2108
1
        }
2109
2110
1
        if (index_pb.has_db_id()) {
2111
            // Recycle the versioned keys
2112
1
            std::unique_ptr<Transaction> txn;
2113
1
            err = txn_kv_->create_txn(&txn);
2114
1
            if (err != TxnErrorCode::TXN_OK) {
2115
0
                LOG_WARNING("failed to create txn").tag("err", err);
2116
0
                return -1;
2117
0
            }
2118
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2119
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2120
1
            std::string index_inverted_key = versioned::index_inverted_key(
2121
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2122
1
            versioned_remove_all(txn.get(), meta_key);
2123
1
            txn->remove(index_key);
2124
1
            txn->remove(index_inverted_key);
2125
1
            err = txn->commit();
2126
1
            if (err != TxnErrorCode::TXN_OK) {
2127
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2128
0
                return -1;
2129
0
            }
2130
1
        }
2131
2132
1
        metrics_context.total_recycled_num = ++num_recycled;
2133
1
        metrics_context.report();
2134
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2135
1
        index_keys.push_back(k);
2136
1
        return 0;
2137
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2047
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2048
8
        ++num_scanned;
2049
8
        RecycleIndexPB index_pb;
2050
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2051
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2052
0
            return -1;
2053
0
        }
2054
8
        int64_t current_time = ::time(nullptr);
2055
8
        if (current_time <
2056
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2057
0
            return 0;
2058
0
        }
2059
8
        ++num_expired;
2060
        // decode index_id
2061
8
        auto k1 = k;
2062
8
        k1.remove_prefix(1);
2063
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2064
8
        decode_key(&k1, &out);
2065
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2066
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2067
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2068
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2069
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2070
        // Change state to RECYCLING
2071
8
        std::unique_ptr<Transaction> txn;
2072
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2073
8
        if (err != TxnErrorCode::TXN_OK) {
2074
0
            LOG_WARNING("failed to create txn").tag("err", err);
2075
0
            return -1;
2076
0
        }
2077
8
        std::string val;
2078
8
        err = txn->get(k, &val);
2079
8
        if (err ==
2080
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2081
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2082
0
            return 0;
2083
0
        }
2084
8
        if (err != TxnErrorCode::TXN_OK) {
2085
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2086
0
            return -1;
2087
0
        }
2088
8
        index_pb.Clear();
2089
8
        if (!index_pb.ParseFromString(val)) {
2090
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2091
0
            return -1;
2092
0
        }
2093
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2094
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2095
8
            txn->put(k, index_pb.SerializeAsString());
2096
8
            err = txn->commit();
2097
8
            if (err != TxnErrorCode::TXN_OK) {
2098
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2099
0
                return -1;
2100
0
            }
2101
8
        }
2102
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2103
0
            LOG_WARNING("failed to recycle tablets under index")
2104
0
                    .tag("table_id", index_pb.table_id())
2105
0
                    .tag("instance_id", instance_id_)
2106
0
                    .tag("index_id", index_id);
2107
0
            return -1;
2108
0
        }
2109
2110
8
        if (index_pb.has_db_id()) {
2111
            // Recycle the versioned keys
2112
2
            std::unique_ptr<Transaction> txn;
2113
2
            err = txn_kv_->create_txn(&txn);
2114
2
            if (err != TxnErrorCode::TXN_OK) {
2115
0
                LOG_WARNING("failed to create txn").tag("err", err);
2116
0
                return -1;
2117
0
            }
2118
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2119
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2120
2
            std::string index_inverted_key = versioned::index_inverted_key(
2121
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2122
2
            versioned_remove_all(txn.get(), meta_key);
2123
2
            txn->remove(index_key);
2124
2
            txn->remove(index_inverted_key);
2125
2
            err = txn->commit();
2126
2
            if (err != TxnErrorCode::TXN_OK) {
2127
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2128
0
                return -1;
2129
0
            }
2130
2
        }
2131
2132
8
        metrics_context.total_recycled_num = ++num_recycled;
2133
8
        metrics_context.report();
2134
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2135
8
        index_keys.push_back(k);
2136
8
        return 0;
2137
8
    };
2138
2139
17
    auto loop_done = [&index_keys, this]() -> int {
2140
6
        if (index_keys.empty()) return 0;
2141
5
        DORIS_CLOUD_DEFER {
2142
5
            index_keys.clear();
2143
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2141
1
        DORIS_CLOUD_DEFER {
2142
1
            index_keys.clear();
2143
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2141
4
        DORIS_CLOUD_DEFER {
2142
4
            index_keys.clear();
2143
4
        };
2144
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2145
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2146
0
            return -1;
2147
0
        }
2148
5
        return 0;
2149
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2139
2
    auto loop_done = [&index_keys, this]() -> int {
2140
2
        if (index_keys.empty()) return 0;
2141
1
        DORIS_CLOUD_DEFER {
2142
1
            index_keys.clear();
2143
1
        };
2144
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2145
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2146
0
            return -1;
2147
0
        }
2148
1
        return 0;
2149
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2139
4
    auto loop_done = [&index_keys, this]() -> int {
2140
4
        if (index_keys.empty()) return 0;
2141
4
        DORIS_CLOUD_DEFER {
2142
4
            index_keys.clear();
2143
4
        };
2144
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2145
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2146
0
            return -1;
2147
0
        }
2148
4
        return 0;
2149
4
    };
2150
2151
17
    if (config::enable_recycler_stats_metrics) {
2152
0
        scan_and_statistics_indexes();
2153
0
    }
2154
    // recycle_func and loop_done for scan and recycle
2155
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
2156
17
}
2157
2158
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
2159
8.24k
                             int64_t tablet_id) {
2160
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
2161
2162
8.24k
    std::unique_ptr<Transaction> txn;
2163
8.24k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2164
8.24k
    if (err != TxnErrorCode::TXN_OK) {
2165
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
2166
0
                     << " tablet_id=" << tablet_id << " err=" << err;
2167
0
        return false;
2168
0
    }
2169
2170
8.24k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
2171
8.24k
    std::string tablet_idx_val;
2172
8.24k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
2173
8.24k
    if (TxnErrorCode::TXN_OK != err) {
2174
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
2175
0
                     << " tablet_id=" << tablet_id << " err=" << err
2176
0
                     << " key=" << hex(tablet_idx_key);
2177
0
        return false;
2178
0
    }
2179
2180
8.24k
    TabletIndexPB tablet_idx_pb;
2181
8.24k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
2182
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
2183
0
                     << " tablet_id=" << tablet_id;
2184
0
        return false;
2185
0
    }
2186
2187
8.24k
    if (!tablet_idx_pb.has_db_id()) {
2188
        // In the previous version, the db_id was not set in the index_pb.
2189
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2190
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
2191
0
                  << " instance_id=" << instance_id
2192
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
2193
0
        return true;
2194
0
    }
2195
2196
8.24k
    std::string ver_val;
2197
8.24k
    std::string ver_key =
2198
8.24k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
2199
8.24k
                                   tablet_idx_pb.partition_id()});
2200
8.24k
    err = txn->get(ver_key, &ver_val);
2201
2202
8.24k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2203
204
        LOG(INFO) << ""
2204
204
                     "partition version not found, instance_id="
2205
204
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
2206
204
                  << " table_id=" << tablet_idx_pb.table_id()
2207
204
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
2208
204
                  << " key=" << hex(ver_key);
2209
204
        return true;
2210
204
    }
2211
2212
8.03k
    if (TxnErrorCode::TXN_OK != err) {
2213
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
2214
0
                     << " db_id=" << tablet_idx_pb.db_id()
2215
0
                     << " table_id=" << tablet_idx_pb.table_id()
2216
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2217
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
2218
0
        return false;
2219
0
    }
2220
2221
8.03k
    VersionPB version_pb;
2222
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
2223
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
2224
0
                     << " db_id=" << tablet_idx_pb.db_id()
2225
0
                     << " table_id=" << tablet_idx_pb.table_id()
2226
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2227
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
2228
0
        return false;
2229
0
    }
2230
2231
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
2232
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
2233
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
2234
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
2235
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
2236
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
2237
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
2238
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
2239
4.00k
                     << " key=" << hex(ver_key);
2240
4.00k
        return false;
2241
4.00k
    }
2242
4.03k
    return true;
2243
8.03k
}
2244
2245
15
int InstanceRecycler::recycle_partitions() {
2246
15
    const std::string task_name = "recycle_partitions";
2247
15
    int64_t num_scanned = 0;
2248
15
    int64_t num_expired = 0;
2249
15
    int64_t num_recycled = 0;
2250
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2251
2252
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
2253
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
2254
15
    std::string part_key0;
2255
15
    std::string part_key1;
2256
15
    recycle_partition_key(part_key_info0, &part_key0);
2257
15
    recycle_partition_key(part_key_info1, &part_key1);
2258
2259
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
2260
2261
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2262
15
    register_recycle_task(task_name, start_time);
2263
2264
15
    DORIS_CLOUD_DEFER {
2265
15
        unregister_recycle_task(task_name);
2266
15
        int64_t cost =
2267
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2268
15
        metrics_context.finish_report();
2269
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2270
15
                .tag("instance_id", instance_id_)
2271
15
                .tag("num_scanned", num_scanned)
2272
15
                .tag("num_expired", num_expired)
2273
15
                .tag("num_recycled", num_recycled);
2274
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2264
2
    DORIS_CLOUD_DEFER {
2265
2
        unregister_recycle_task(task_name);
2266
2
        int64_t cost =
2267
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2268
2
        metrics_context.finish_report();
2269
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2270
2
                .tag("instance_id", instance_id_)
2271
2
                .tag("num_scanned", num_scanned)
2272
2
                .tag("num_expired", num_expired)
2273
2
                .tag("num_recycled", num_recycled);
2274
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2264
13
    DORIS_CLOUD_DEFER {
2265
13
        unregister_recycle_task(task_name);
2266
13
        int64_t cost =
2267
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2268
13
        metrics_context.finish_report();
2269
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2270
13
                .tag("instance_id", instance_id_)
2271
13
                .tag("num_scanned", num_scanned)
2272
13
                .tag("num_expired", num_expired)
2273
13
                .tag("num_recycled", num_recycled);
2274
13
    };
2275
2276
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2277
2278
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
2279
15
    std::vector<std::string_view> partition_keys;
2280
15
    std::vector<std::string> partition_version_keys;
2281
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2282
9
        ++num_scanned;
2283
9
        RecyclePartitionPB part_pb;
2284
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2285
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2286
0
            return -1;
2287
0
        }
2288
9
        int64_t current_time = ::time(nullptr);
2289
9
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2290
9
                                                            &earlest_ts)) { // not expired
2291
0
            return 0;
2292
0
        }
2293
9
        ++num_expired;
2294
        // decode partition_id
2295
9
        auto k1 = k;
2296
9
        k1.remove_prefix(1);
2297
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2298
9
        decode_key(&k1, &out);
2299
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2300
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2301
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2302
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2303
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2304
        // Change state to RECYCLING
2305
9
        std::unique_ptr<Transaction> txn;
2306
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2307
9
        if (err != TxnErrorCode::TXN_OK) {
2308
0
            LOG_WARNING("failed to create txn").tag("err", err);
2309
0
            return -1;
2310
0
        }
2311
9
        std::string val;
2312
9
        err = txn->get(k, &val);
2313
9
        if (err ==
2314
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2315
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2316
0
            return 0;
2317
0
        }
2318
9
        if (err != TxnErrorCode::TXN_OK) {
2319
0
            LOG_WARNING("failed to get kv");
2320
0
            return -1;
2321
0
        }
2322
9
        part_pb.Clear();
2323
9
        if (!part_pb.ParseFromString(val)) {
2324
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2325
0
            return -1;
2326
0
        }
2327
        // Partitions with PREPARED state MUST have no data
2328
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2329
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2330
8
            txn->put(k, part_pb.SerializeAsString());
2331
8
            err = txn->commit();
2332
8
            if (err != TxnErrorCode::TXN_OK) {
2333
0
                LOG_WARNING("failed to commit txn: {}", err);
2334
0
                return -1;
2335
0
            }
2336
8
        }
2337
2338
9
        int ret = 0;
2339
33
        for (int64_t index_id : part_pb.index_id()) {
2340
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2341
1
                LOG_WARNING("failed to recycle tablets under partition")
2342
1
                        .tag("table_id", part_pb.table_id())
2343
1
                        .tag("instance_id", instance_id_)
2344
1
                        .tag("index_id", index_id)
2345
1
                        .tag("partition_id", partition_id);
2346
1
                ret = -1;
2347
1
            }
2348
33
        }
2349
9
        if (ret == 0 && part_pb.has_db_id()) {
2350
            // Recycle the versioned keys
2351
8
            std::unique_ptr<Transaction> txn;
2352
8
            err = txn_kv_->create_txn(&txn);
2353
8
            if (err != TxnErrorCode::TXN_OK) {
2354
0
                LOG_WARNING("failed to create txn").tag("err", err);
2355
0
                return -1;
2356
0
            }
2357
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2358
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2359
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2360
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2361
8
            std::string partition_version_key =
2362
8
                    versioned::partition_version_key({instance_id_, partition_id});
2363
8
            versioned_remove_all(txn.get(), meta_key);
2364
8
            txn->remove(index_key);
2365
8
            txn->remove(inverted_index_key);
2366
8
            versioned_remove_all(txn.get(), partition_version_key);
2367
8
            err = txn->commit();
2368
8
            if (err != TxnErrorCode::TXN_OK) {
2369
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2370
0
                return -1;
2371
0
            }
2372
8
        }
2373
2374
9
        if (ret == 0) {
2375
8
            ++num_recycled;
2376
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2377
8
            partition_keys.push_back(k);
2378
8
            if (part_pb.db_id() > 0) {
2379
8
                partition_version_keys.push_back(partition_version_key(
2380
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2381
8
            }
2382
8
            metrics_context.total_recycled_num = num_recycled;
2383
8
            metrics_context.report();
2384
8
        }
2385
9
        return ret;
2386
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2281
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2282
2
        ++num_scanned;
2283
2
        RecyclePartitionPB part_pb;
2284
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2285
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2286
0
            return -1;
2287
0
        }
2288
2
        int64_t current_time = ::time(nullptr);
2289
2
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2290
2
                                                            &earlest_ts)) { // not expired
2291
0
            return 0;
2292
0
        }
2293
2
        ++num_expired;
2294
        // decode partition_id
2295
2
        auto k1 = k;
2296
2
        k1.remove_prefix(1);
2297
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2298
2
        decode_key(&k1, &out);
2299
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2300
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2301
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2302
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2303
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2304
        // Change state to RECYCLING
2305
2
        std::unique_ptr<Transaction> txn;
2306
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2307
2
        if (err != TxnErrorCode::TXN_OK) {
2308
0
            LOG_WARNING("failed to create txn").tag("err", err);
2309
0
            return -1;
2310
0
        }
2311
2
        std::string val;
2312
2
        err = txn->get(k, &val);
2313
2
        if (err ==
2314
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2315
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2316
0
            return 0;
2317
0
        }
2318
2
        if (err != TxnErrorCode::TXN_OK) {
2319
0
            LOG_WARNING("failed to get kv");
2320
0
            return -1;
2321
0
        }
2322
2
        part_pb.Clear();
2323
2
        if (!part_pb.ParseFromString(val)) {
2324
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2325
0
            return -1;
2326
0
        }
2327
        // Partitions with PREPARED state MUST have no data
2328
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2329
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2330
1
            txn->put(k, part_pb.SerializeAsString());
2331
1
            err = txn->commit();
2332
1
            if (err != TxnErrorCode::TXN_OK) {
2333
0
                LOG_WARNING("failed to commit txn: {}", err);
2334
0
                return -1;
2335
0
            }
2336
1
        }
2337
2338
2
        int ret = 0;
2339
2
        for (int64_t index_id : part_pb.index_id()) {
2340
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2341
1
                LOG_WARNING("failed to recycle tablets under partition")
2342
1
                        .tag("table_id", part_pb.table_id())
2343
1
                        .tag("instance_id", instance_id_)
2344
1
                        .tag("index_id", index_id)
2345
1
                        .tag("partition_id", partition_id);
2346
1
                ret = -1;
2347
1
            }
2348
2
        }
2349
2
        if (ret == 0 && part_pb.has_db_id()) {
2350
            // Recycle the versioned keys
2351
1
            std::unique_ptr<Transaction> txn;
2352
1
            err = txn_kv_->create_txn(&txn);
2353
1
            if (err != TxnErrorCode::TXN_OK) {
2354
0
                LOG_WARNING("failed to create txn").tag("err", err);
2355
0
                return -1;
2356
0
            }
2357
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2358
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2359
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2360
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2361
1
            std::string partition_version_key =
2362
1
                    versioned::partition_version_key({instance_id_, partition_id});
2363
1
            versioned_remove_all(txn.get(), meta_key);
2364
1
            txn->remove(index_key);
2365
1
            txn->remove(inverted_index_key);
2366
1
            versioned_remove_all(txn.get(), partition_version_key);
2367
1
            err = txn->commit();
2368
1
            if (err != TxnErrorCode::TXN_OK) {
2369
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2370
0
                return -1;
2371
0
            }
2372
1
        }
2373
2374
2
        if (ret == 0) {
2375
1
            ++num_recycled;
2376
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2377
1
            partition_keys.push_back(k);
2378
1
            if (part_pb.db_id() > 0) {
2379
1
                partition_version_keys.push_back(partition_version_key(
2380
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2381
1
            }
2382
1
            metrics_context.total_recycled_num = num_recycled;
2383
1
            metrics_context.report();
2384
1
        }
2385
2
        return ret;
2386
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2281
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2282
7
        ++num_scanned;
2283
7
        RecyclePartitionPB part_pb;
2284
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2285
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2286
0
            return -1;
2287
0
        }
2288
7
        int64_t current_time = ::time(nullptr);
2289
7
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2290
7
                                                            &earlest_ts)) { // not expired
2291
0
            return 0;
2292
0
        }
2293
7
        ++num_expired;
2294
        // decode partition_id
2295
7
        auto k1 = k;
2296
7
        k1.remove_prefix(1);
2297
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2298
7
        decode_key(&k1, &out);
2299
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2300
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2301
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2302
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2303
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2304
        // Change state to RECYCLING
2305
7
        std::unique_ptr<Transaction> txn;
2306
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2307
7
        if (err != TxnErrorCode::TXN_OK) {
2308
0
            LOG_WARNING("failed to create txn").tag("err", err);
2309
0
            return -1;
2310
0
        }
2311
7
        std::string val;
2312
7
        err = txn->get(k, &val);
2313
7
        if (err ==
2314
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2315
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2316
0
            return 0;
2317
0
        }
2318
7
        if (err != TxnErrorCode::TXN_OK) {
2319
0
            LOG_WARNING("failed to get kv");
2320
0
            return -1;
2321
0
        }
2322
7
        part_pb.Clear();
2323
7
        if (!part_pb.ParseFromString(val)) {
2324
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2325
0
            return -1;
2326
0
        }
2327
        // Partitions with PREPARED state MUST have no data
2328
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2329
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2330
7
            txn->put(k, part_pb.SerializeAsString());
2331
7
            err = txn->commit();
2332
7
            if (err != TxnErrorCode::TXN_OK) {
2333
0
                LOG_WARNING("failed to commit txn: {}", err);
2334
0
                return -1;
2335
0
            }
2336
7
        }
2337
2338
7
        int ret = 0;
2339
31
        for (int64_t index_id : part_pb.index_id()) {
2340
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2341
0
                LOG_WARNING("failed to recycle tablets under partition")
2342
0
                        .tag("table_id", part_pb.table_id())
2343
0
                        .tag("instance_id", instance_id_)
2344
0
                        .tag("index_id", index_id)
2345
0
                        .tag("partition_id", partition_id);
2346
0
                ret = -1;
2347
0
            }
2348
31
        }
2349
7
        if (ret == 0 && part_pb.has_db_id()) {
2350
            // Recycle the versioned keys
2351
7
            std::unique_ptr<Transaction> txn;
2352
7
            err = txn_kv_->create_txn(&txn);
2353
7
            if (err != TxnErrorCode::TXN_OK) {
2354
0
                LOG_WARNING("failed to create txn").tag("err", err);
2355
0
                return -1;
2356
0
            }
2357
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2358
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2359
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2360
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2361
7
            std::string partition_version_key =
2362
7
                    versioned::partition_version_key({instance_id_, partition_id});
2363
7
            versioned_remove_all(txn.get(), meta_key);
2364
7
            txn->remove(index_key);
2365
7
            txn->remove(inverted_index_key);
2366
7
            versioned_remove_all(txn.get(), partition_version_key);
2367
7
            err = txn->commit();
2368
7
            if (err != TxnErrorCode::TXN_OK) {
2369
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2370
0
                return -1;
2371
0
            }
2372
7
        }
2373
2374
7
        if (ret == 0) {
2375
7
            ++num_recycled;
2376
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2377
7
            partition_keys.push_back(k);
2378
7
            if (part_pb.db_id() > 0) {
2379
7
                partition_version_keys.push_back(partition_version_key(
2380
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2381
7
            }
2382
7
            metrics_context.total_recycled_num = num_recycled;
2383
7
            metrics_context.report();
2384
7
        }
2385
7
        return ret;
2386
7
    };
2387
2388
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2389
5
        if (partition_keys.empty()) return 0;
2390
4
        DORIS_CLOUD_DEFER {
2391
4
            partition_keys.clear();
2392
4
            partition_version_keys.clear();
2393
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2390
1
        DORIS_CLOUD_DEFER {
2391
1
            partition_keys.clear();
2392
1
            partition_version_keys.clear();
2393
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2390
3
        DORIS_CLOUD_DEFER {
2391
3
            partition_keys.clear();
2392
3
            partition_version_keys.clear();
2393
3
        };
2394
4
        std::unique_ptr<Transaction> txn;
2395
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2396
4
        if (err != TxnErrorCode::TXN_OK) {
2397
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2398
0
            return -1;
2399
0
        }
2400
8
        for (auto& k : partition_keys) {
2401
8
            txn->remove(k);
2402
8
        }
2403
8
        for (auto& k : partition_version_keys) {
2404
8
            txn->remove(k);
2405
8
        }
2406
4
        err = txn->commit();
2407
4
        if (err != TxnErrorCode::TXN_OK) {
2408
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2409
0
                         << " err=" << err;
2410
0
            return -1;
2411
0
        }
2412
4
        return 0;
2413
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2388
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2389
2
        if (partition_keys.empty()) return 0;
2390
1
        DORIS_CLOUD_DEFER {
2391
1
            partition_keys.clear();
2392
1
            partition_version_keys.clear();
2393
1
        };
2394
1
        std::unique_ptr<Transaction> txn;
2395
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2396
1
        if (err != TxnErrorCode::TXN_OK) {
2397
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2398
0
            return -1;
2399
0
        }
2400
1
        for (auto& k : partition_keys) {
2401
1
            txn->remove(k);
2402
1
        }
2403
1
        for (auto& k : partition_version_keys) {
2404
1
            txn->remove(k);
2405
1
        }
2406
1
        err = txn->commit();
2407
1
        if (err != TxnErrorCode::TXN_OK) {
2408
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2409
0
                         << " err=" << err;
2410
0
            return -1;
2411
0
        }
2412
1
        return 0;
2413
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2388
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2389
3
        if (partition_keys.empty()) return 0;
2390
3
        DORIS_CLOUD_DEFER {
2391
3
            partition_keys.clear();
2392
3
            partition_version_keys.clear();
2393
3
        };
2394
3
        std::unique_ptr<Transaction> txn;
2395
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2396
3
        if (err != TxnErrorCode::TXN_OK) {
2397
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2398
0
            return -1;
2399
0
        }
2400
7
        for (auto& k : partition_keys) {
2401
7
            txn->remove(k);
2402
7
        }
2403
7
        for (auto& k : partition_version_keys) {
2404
7
            txn->remove(k);
2405
7
        }
2406
3
        err = txn->commit();
2407
3
        if (err != TxnErrorCode::TXN_OK) {
2408
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2409
0
                         << " err=" << err;
2410
0
            return -1;
2411
0
        }
2412
3
        return 0;
2413
3
    };
2414
2415
15
    if (config::enable_recycler_stats_metrics) {
2416
0
        scan_and_statistics_partitions();
2417
0
    }
2418
    // recycle_func and loop_done for scan and recycle
2419
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2420
15
}
2421
2422
14
int InstanceRecycler::recycle_versions() {
2423
14
    if (should_recycle_versioned_keys()) {
2424
2
        return recycle_orphan_partitions();
2425
2
    }
2426
2427
12
    int64_t num_scanned = 0;
2428
12
    int64_t num_recycled = 0;
2429
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2430
2431
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2432
2433
12
    auto start_time = steady_clock::now();
2434
2435
12
    DORIS_CLOUD_DEFER {
2436
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2437
12
        metrics_context.finish_report();
2438
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2439
12
                .tag("instance_id", instance_id_)
2440
12
                .tag("num_scanned", num_scanned)
2441
12
                .tag("num_recycled", num_recycled);
2442
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2435
12
    DORIS_CLOUD_DEFER {
2436
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2437
12
        metrics_context.finish_report();
2438
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2439
12
                .tag("instance_id", instance_id_)
2440
12
                .tag("num_scanned", num_scanned)
2441
12
                .tag("num_recycled", num_recycled);
2442
12
    };
2443
2444
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2445
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2446
12
    int64_t last_scanned_table_id = 0;
2447
12
    bool is_recycled = false; // Is last scanned kv recycled
2448
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2449
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2450
2
        ++num_scanned;
2451
2
        auto k1 = k;
2452
2
        k1.remove_prefix(1);
2453
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2454
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2455
2
        decode_key(&k1, &out);
2456
2
        DCHECK_EQ(out.size(), 6) << k;
2457
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2458
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2459
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2460
0
            return 0;
2461
0
        }
2462
2
        last_scanned_table_id = table_id;
2463
2
        is_recycled = false;
2464
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2465
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2466
2
        std::unique_ptr<Transaction> txn;
2467
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2468
2
        if (err != TxnErrorCode::TXN_OK) {
2469
0
            return -1;
2470
0
        }
2471
2
        std::unique_ptr<RangeGetIterator> iter;
2472
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2473
2
        if (err != TxnErrorCode::TXN_OK) {
2474
0
            return -1;
2475
0
        }
2476
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2477
1
            return 0;
2478
1
        }
2479
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2480
        // 1. Remove all partition version kvs of this table
2481
1
        auto partition_version_key_begin =
2482
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2483
1
        auto partition_version_key_end =
2484
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2485
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2486
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2487
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2488
1
                     << " table_id=" << table_id;
2489
        // 2. Remove the table version kv of this table
2490
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2491
1
        txn->remove(tbl_version_key);
2492
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2493
        // 3. Remove mow delete bitmap update lock and tablet job lock
2494
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2495
1
        txn->remove(lock_key);
2496
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2497
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2498
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2499
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2500
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2501
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2502
1
                     << " table_id=" << table_id;
2503
1
        err = txn->commit();
2504
1
        if (err != TxnErrorCode::TXN_OK) {
2505
0
            return -1;
2506
0
        }
2507
1
        metrics_context.total_recycled_num = ++num_recycled;
2508
1
        metrics_context.report();
2509
1
        is_recycled = true;
2510
1
        return 0;
2511
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2449
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2450
2
        ++num_scanned;
2451
2
        auto k1 = k;
2452
2
        k1.remove_prefix(1);
2453
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2454
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2455
2
        decode_key(&k1, &out);
2456
2
        DCHECK_EQ(out.size(), 6) << k;
2457
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2458
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2459
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2460
0
            return 0;
2461
0
        }
2462
2
        last_scanned_table_id = table_id;
2463
2
        is_recycled = false;
2464
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2465
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2466
2
        std::unique_ptr<Transaction> txn;
2467
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2468
2
        if (err != TxnErrorCode::TXN_OK) {
2469
0
            return -1;
2470
0
        }
2471
2
        std::unique_ptr<RangeGetIterator> iter;
2472
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2473
2
        if (err != TxnErrorCode::TXN_OK) {
2474
0
            return -1;
2475
0
        }
2476
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2477
1
            return 0;
2478
1
        }
2479
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2480
        // 1. Remove all partition version kvs of this table
2481
1
        auto partition_version_key_begin =
2482
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2483
1
        auto partition_version_key_end =
2484
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2485
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2486
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2487
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2488
1
                     << " table_id=" << table_id;
2489
        // 2. Remove the table version kv of this table
2490
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2491
1
        txn->remove(tbl_version_key);
2492
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2493
        // 3. Remove mow delete bitmap update lock and tablet job lock
2494
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2495
1
        txn->remove(lock_key);
2496
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2497
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2498
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2499
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2500
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2501
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2502
1
                     << " table_id=" << table_id;
2503
1
        err = txn->commit();
2504
1
        if (err != TxnErrorCode::TXN_OK) {
2505
0
            return -1;
2506
0
        }
2507
1
        metrics_context.total_recycled_num = ++num_recycled;
2508
1
        metrics_context.report();
2509
1
        is_recycled = true;
2510
1
        return 0;
2511
1
    };
2512
2513
12
    if (config::enable_recycler_stats_metrics) {
2514
0
        scan_and_statistics_versions();
2515
0
    }
2516
    // recycle_func and loop_done for scan and recycle
2517
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2518
14
}
2519
2520
3
int InstanceRecycler::recycle_orphan_partitions() {
2521
3
    int64_t num_scanned = 0;
2522
3
    int64_t num_recycled = 0;
2523
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2524
2525
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2526
3
            .tag("instance_id", instance_id_);
2527
2528
3
    auto start_time = steady_clock::now();
2529
2530
3
    DORIS_CLOUD_DEFER {
2531
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2532
3
        metrics_context.finish_report();
2533
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2534
3
                .tag("instance_id", instance_id_)
2535
3
                .tag("num_scanned", num_scanned)
2536
3
                .tag("num_recycled", num_recycled);
2537
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2530
3
    DORIS_CLOUD_DEFER {
2531
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2532
3
        metrics_context.finish_report();
2533
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2534
3
                .tag("instance_id", instance_id_)
2535
3
                .tag("num_scanned", num_scanned)
2536
3
                .tag("num_recycled", num_recycled);
2537
3
    };
2538
2539
3
    bool is_empty_table = false;        // whether the table has no indexes
2540
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2541
3
    int64_t current_table_id = 0;       // current scanning table id
2542
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2543
3
                         &current_table_id, &is_table_kvs_recycled,
2544
3
                         this](std::string_view k, std::string_view) {
2545
2
        ++num_scanned;
2546
2547
2
        std::string_view k1(k);
2548
2
        int64_t db_id, table_id, partition_id;
2549
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2550
2
                                                            &partition_id)) {
2551
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2552
0
            return -1;
2553
2
        } else if (table_id != current_table_id) {
2554
2
            current_table_id = table_id;
2555
2
            is_table_kvs_recycled = false;
2556
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2557
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2558
2
            if (err != TxnErrorCode::TXN_OK) {
2559
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2560
0
                             << " table_id=" << table_id << " err=" << err;
2561
0
                return -1;
2562
0
            }
2563
2
        }
2564
2565
2
        if (!is_empty_table) {
2566
            // table is not empty, skip recycle
2567
1
            return 0;
2568
1
        }
2569
2570
1
        std::unique_ptr<Transaction> txn;
2571
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2572
1
        if (err != TxnErrorCode::TXN_OK) {
2573
0
            return -1;
2574
0
        }
2575
2576
        // 1. Remove all partition related kvs
2577
1
        std::string partition_meta_key =
2578
1
                versioned::meta_partition_key({instance_id_, partition_id});
2579
1
        std::string partition_index_key =
2580
1
                versioned::partition_index_key({instance_id_, partition_id});
2581
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2582
1
                {instance_id_, db_id, table_id, partition_id});
2583
1
        std::string partition_version_key =
2584
1
                versioned::partition_version_key({instance_id_, partition_id});
2585
1
        txn->remove(partition_index_key);
2586
1
        txn->remove(partition_inverted_key);
2587
1
        versioned_remove_all(txn.get(), partition_meta_key);
2588
1
        versioned_remove_all(txn.get(), partition_version_key);
2589
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2590
1
                     << " table_id=" << table_id << " db_id=" << db_id
2591
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2592
1
                     << " partition_version_key=" << hex(partition_version_key);
2593
2594
1
        if (!is_table_kvs_recycled) {
2595
1
            is_table_kvs_recycled = true;
2596
2597
            // 2. Remove the table version kv of this table
2598
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2599
1
            versioned_remove_all(txn.get(), table_version_key);
2600
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2601
            // 3. Remove mow delete bitmap update lock and tablet job lock
2602
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2603
1
            txn->remove(lock_key);
2604
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2605
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2606
1
            std::string tablet_job_key_end =
2607
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2608
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2609
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2610
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2611
1
                         << " table_id=" << table_id;
2612
1
        }
2613
2614
1
        err = txn->commit();
2615
1
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            return -1;
2617
0
        }
2618
1
        metrics_context.total_recycled_num = ++num_recycled;
2619
1
        metrics_context.report();
2620
1
        return 0;
2621
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2544
2
                         this](std::string_view k, std::string_view) {
2545
2
        ++num_scanned;
2546
2547
2
        std::string_view k1(k);
2548
2
        int64_t db_id, table_id, partition_id;
2549
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2550
2
                                                            &partition_id)) {
2551
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2552
0
            return -1;
2553
2
        } else if (table_id != current_table_id) {
2554
2
            current_table_id = table_id;
2555
2
            is_table_kvs_recycled = false;
2556
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2557
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2558
2
            if (err != TxnErrorCode::TXN_OK) {
2559
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2560
0
                             << " table_id=" << table_id << " err=" << err;
2561
0
                return -1;
2562
0
            }
2563
2
        }
2564
2565
2
        if (!is_empty_table) {
2566
            // table is not empty, skip recycle
2567
1
            return 0;
2568
1
        }
2569
2570
1
        std::unique_ptr<Transaction> txn;
2571
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2572
1
        if (err != TxnErrorCode::TXN_OK) {
2573
0
            return -1;
2574
0
        }
2575
2576
        // 1. Remove all partition related kvs
2577
1
        std::string partition_meta_key =
2578
1
                versioned::meta_partition_key({instance_id_, partition_id});
2579
1
        std::string partition_index_key =
2580
1
                versioned::partition_index_key({instance_id_, partition_id});
2581
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2582
1
                {instance_id_, db_id, table_id, partition_id});
2583
1
        std::string partition_version_key =
2584
1
                versioned::partition_version_key({instance_id_, partition_id});
2585
1
        txn->remove(partition_index_key);
2586
1
        txn->remove(partition_inverted_key);
2587
1
        versioned_remove_all(txn.get(), partition_meta_key);
2588
1
        versioned_remove_all(txn.get(), partition_version_key);
2589
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2590
1
                     << " table_id=" << table_id << " db_id=" << db_id
2591
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2592
1
                     << " partition_version_key=" << hex(partition_version_key);
2593
2594
1
        if (!is_table_kvs_recycled) {
2595
1
            is_table_kvs_recycled = true;
2596
2597
            // 2. Remove the table version kv of this table
2598
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2599
1
            versioned_remove_all(txn.get(), table_version_key);
2600
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2601
            // 3. Remove mow delete bitmap update lock and tablet job lock
2602
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2603
1
            txn->remove(lock_key);
2604
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2605
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2606
1
            std::string tablet_job_key_end =
2607
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2608
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2609
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2610
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2611
1
                         << " table_id=" << table_id;
2612
1
        }
2613
2614
1
        err = txn->commit();
2615
1
        if (err != TxnErrorCode::TXN_OK) {
2616
0
            return -1;
2617
0
        }
2618
1
        metrics_context.total_recycled_num = ++num_recycled;
2619
1
        metrics_context.report();
2620
1
        return 0;
2621
1
    };
2622
2623
    // recycle_func and loop_done for scan and recycle
2624
3
    return scan_and_recycle(
2625
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2626
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2627
3
            std::move(recycle_func));
2628
3
}
2629
2630
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2631
                                      RecyclerMetricsContext& metrics_context,
2632
49
                                      int64_t partition_id) {
2633
49
    bool is_multi_version =
2634
49
            instance_info_.has_multi_version_status() &&
2635
49
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2636
49
    int64_t num_scanned = 0;
2637
49
    std::atomic_long num_recycled = 0;
2638
2639
49
    std::string tablet_key_begin, tablet_key_end;
2640
49
    std::string stats_key_begin, stats_key_end;
2641
49
    std::string job_key_begin, job_key_end;
2642
2643
49
    std::string tablet_belongs;
2644
49
    if (partition_id > 0) {
2645
        // recycle tablets in a partition belonging to the index
2646
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2647
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2648
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2649
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2650
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2651
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2652
33
        tablet_belongs = "partition";
2653
33
    } else {
2654
        // recycle tablets in the index
2655
16
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2656
16
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2657
16
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2658
16
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2659
16
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2660
16
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2661
16
        tablet_belongs = "index";
2662
16
    }
2663
2664
49
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2665
49
            .tag("table_id", table_id)
2666
49
            .tag("index_id", index_id)
2667
49
            .tag("partition_id", partition_id);
2668
2669
49
    auto start_time = steady_clock::now();
2670
2671
49
    DORIS_CLOUD_DEFER {
2672
49
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2673
49
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2674
49
                .tag("instance_id", instance_id_)
2675
49
                .tag("table_id", table_id)
2676
49
                .tag("index_id", index_id)
2677
49
                .tag("partition_id", partition_id)
2678
49
                .tag("num_scanned", num_scanned)
2679
49
                .tag("num_recycled", num_recycled);
2680
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2671
4
    DORIS_CLOUD_DEFER {
2672
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2673
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2674
4
                .tag("instance_id", instance_id_)
2675
4
                .tag("table_id", table_id)
2676
4
                .tag("index_id", index_id)
2677
4
                .tag("partition_id", partition_id)
2678
4
                .tag("num_scanned", num_scanned)
2679
4
                .tag("num_recycled", num_recycled);
2680
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2671
45
    DORIS_CLOUD_DEFER {
2672
45
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2673
45
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2674
45
                .tag("instance_id", instance_id_)
2675
45
                .tag("table_id", table_id)
2676
45
                .tag("index_id", index_id)
2677
45
                .tag("partition_id", partition_id)
2678
45
                .tag("num_scanned", num_scanned)
2679
45
                .tag("num_recycled", num_recycled);
2680
45
    };
2681
2682
    // The first string_view represents the tablet key which has been recycled
2683
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
2684
49
    using TabletKeyPair = std::pair<std::string_view, bool>;
2685
49
    SyncExecutor<TabletKeyPair> sync_executor(
2686
49
            _thread_pool_group.recycle_tablet_pool,
2687
49
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2688
49
                        index_id, partition_id),
2689
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2689
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2689
237
            [](const TabletKeyPair& k) { return k.first.empty(); });
2690
2691
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
2692
49
    std::vector<std::string> tablet_idx_keys;
2693
49
    std::vector<std::string> restore_job_keys;
2694
49
    std::vector<std::string> init_rs_keys;
2695
49
    std::vector<std::string> tablet_compact_stats_keys;
2696
49
    std::vector<std::string> tablet_load_stats_keys;
2697
49
    std::vector<std::string> versioned_meta_tablet_keys;
2698
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2699
8.24k
        bool use_range_remove = true;
2700
8.24k
        ++num_scanned;
2701
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
2702
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2703
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2704
0
            use_range_remove = false;
2705
0
            return -1;
2706
0
        }
2707
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2708
2709
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2710
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2711
4.00k
            return -1;
2712
4.00k
        }
2713
2714
4.24k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2715
4.24k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2716
4.24k
        if (is_multi_version) {
2717
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2718
6
            tablet_compact_stats_keys.push_back(
2719
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2720
6
            tablet_load_stats_keys.push_back(
2721
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2722
6
            versioned_meta_tablet_keys.push_back(
2723
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2724
6
        }
2725
4.24k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2726
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2727
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2728
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2729
1
                LOG_WARNING("failed to recycle tablet")
2730
1
                        .tag("instance_id", instance_id_)
2731
1
                        .tag("tablet_id", tid);
2732
1
                range_move = false;
2733
1
                return {std::string_view(), range_move};
2734
1
            }
2735
4.23k
            ++num_recycled;
2736
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2737
4.23k
            return {k, range_move};
2738
4.23k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2727
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2728
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2729
0
                LOG_WARNING("failed to recycle tablet")
2730
0
                        .tag("instance_id", instance_id_)
2731
0
                        .tag("tablet_id", tid);
2732
0
                range_move = false;
2733
0
                return {std::string_view(), range_move};
2734
0
            }
2735
4.00k
            ++num_recycled;
2736
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2737
4.00k
            return {k, range_move};
2738
4.00k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2727
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2728
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2729
1
                LOG_WARNING("failed to recycle tablet")
2730
1
                        .tag("instance_id", instance_id_)
2731
1
                        .tag("tablet_id", tid);
2732
1
                range_move = false;
2733
1
                return {std::string_view(), range_move};
2734
1
            }
2735
236
            ++num_recycled;
2736
236
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2737
236
            return {k, range_move};
2738
237
        });
2739
4.23k
        return 0;
2740
4.24k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2698
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2699
8.00k
        bool use_range_remove = true;
2700
8.00k
        ++num_scanned;
2701
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2702
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2703
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2704
0
            use_range_remove = false;
2705
0
            return -1;
2706
0
        }
2707
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2708
2709
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2710
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2711
4.00k
            return -1;
2712
4.00k
        }
2713
2714
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2715
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2716
4.00k
        if (is_multi_version) {
2717
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2718
0
            tablet_compact_stats_keys.push_back(
2719
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2720
0
            tablet_load_stats_keys.push_back(
2721
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2722
0
            versioned_meta_tablet_keys.push_back(
2723
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2724
0
        }
2725
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2726
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2727
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2728
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2729
4.00k
                LOG_WARNING("failed to recycle tablet")
2730
4.00k
                        .tag("instance_id", instance_id_)
2731
4.00k
                        .tag("tablet_id", tid);
2732
4.00k
                range_move = false;
2733
4.00k
                return {std::string_view(), range_move};
2734
4.00k
            }
2735
4.00k
            ++num_recycled;
2736
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2737
4.00k
            return {k, range_move};
2738
4.00k
        });
2739
4.00k
        return 0;
2740
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2698
240
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2699
240
        bool use_range_remove = true;
2700
240
        ++num_scanned;
2701
240
        doris::TabletMetaCloudPB tablet_meta_pb;
2702
240
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2703
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2704
0
            use_range_remove = false;
2705
0
            return -1;
2706
0
        }
2707
240
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2708
2709
240
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2710
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2711
0
            return -1;
2712
0
        }
2713
2714
240
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2715
240
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2716
240
        if (is_multi_version) {
2717
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2718
6
            tablet_compact_stats_keys.push_back(
2719
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2720
6
            tablet_load_stats_keys.push_back(
2721
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2722
6
            versioned_meta_tablet_keys.push_back(
2723
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2724
6
        }
2725
240
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2726
237
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2727
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2728
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2729
237
                LOG_WARNING("failed to recycle tablet")
2730
237
                        .tag("instance_id", instance_id_)
2731
237
                        .tag("tablet_id", tid);
2732
237
                range_move = false;
2733
237
                return {std::string_view(), range_move};
2734
237
            }
2735
237
            ++num_recycled;
2736
237
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2737
237
            return {k, range_move};
2738
237
        });
2739
237
        return 0;
2740
240
    };
2741
2742
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2743
49
    auto loop_done = [&, this]() -> int {
2744
49
        bool finished = true;
2745
49
        auto tablet_keys = sync_executor.when_all(&finished);
2746
49
        if (!finished) {
2747
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2748
1
            return -1;
2749
1
        }
2750
48
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2751
46
        if (!tablet_keys.empty() &&
2752
46
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2752
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2752
42
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2753
0
            return -1;
2754
0
        }
2755
        // sort the vector using key's order
2756
46
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2757
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2757
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2757
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2758
46
        bool use_range_remove = true;
2759
4.23k
        for (auto& [_, remove] : tablet_keys) {
2760
4.23k
            if (!remove) {
2761
0
                use_range_remove = remove;
2762
0
                break;
2763
0
            }
2764
4.23k
        }
2765
46
        DORIS_CLOUD_DEFER {
2766
46
            tablet_idx_keys.clear();
2767
46
            restore_job_keys.clear();
2768
46
            init_rs_keys.clear();
2769
46
            tablet_compact_stats_keys.clear();
2770
46
            tablet_load_stats_keys.clear();
2771
46
            versioned_meta_tablet_keys.clear();
2772
46
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2765
2
        DORIS_CLOUD_DEFER {
2766
2
            tablet_idx_keys.clear();
2767
2
            restore_job_keys.clear();
2768
2
            init_rs_keys.clear();
2769
2
            tablet_compact_stats_keys.clear();
2770
2
            tablet_load_stats_keys.clear();
2771
2
            versioned_meta_tablet_keys.clear();
2772
2
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2765
44
        DORIS_CLOUD_DEFER {
2766
44
            tablet_idx_keys.clear();
2767
44
            restore_job_keys.clear();
2768
44
            init_rs_keys.clear();
2769
44
            tablet_compact_stats_keys.clear();
2770
44
            tablet_load_stats_keys.clear();
2771
44
            versioned_meta_tablet_keys.clear();
2772
44
        };
2773
46
        std::unique_ptr<Transaction> txn;
2774
46
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2775
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2776
0
            return -1;
2777
0
        }
2778
46
        std::string tablet_key_end;
2779
46
        if (!tablet_keys.empty()) {
2780
44
            if (use_range_remove) {
2781
44
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2782
44
                txn->remove(tablet_keys.front().first, tablet_key_end);
2783
44
            } else {
2784
0
                for (auto& [k, _] : tablet_keys) {
2785
0
                    txn->remove(k);
2786
0
                }
2787
0
            }
2788
44
        }
2789
46
        if (is_multi_version) {
2790
6
            for (auto& k : tablet_compact_stats_keys) {
2791
                // Remove all versions of tablet compact stats for recycled tablet
2792
6
                LOG_INFO("remove versioned tablet compact stats key")
2793
6
                        .tag("compact_stats_key", hex(k));
2794
6
                versioned_remove_all(txn.get(), k);
2795
6
            }
2796
6
            for (auto& k : tablet_load_stats_keys) {
2797
                // Remove all versions of tablet load stats for recycled tablet
2798
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2799
6
                versioned_remove_all(txn.get(), k);
2800
6
            }
2801
6
            for (auto& k : versioned_meta_tablet_keys) {
2802
                // Remove all versions of meta tablet for recycled tablet
2803
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2804
6
                versioned_remove_all(txn.get(), k);
2805
6
            }
2806
5
        }
2807
4.24k
        for (auto& k : tablet_idx_keys) {
2808
4.24k
            txn->remove(k);
2809
4.24k
        }
2810
4.24k
        for (auto& k : restore_job_keys) {
2811
4.24k
            txn->remove(k);
2812
4.24k
        }
2813
46
        for (auto& k : init_rs_keys) {
2814
0
            txn->remove(k);
2815
0
        }
2816
46
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2817
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2818
0
                         << ", err=" << err;
2819
0
            return -1;
2820
0
        }
2821
46
        return 0;
2822
46
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2743
4
    auto loop_done = [&, this]() -> int {
2744
4
        bool finished = true;
2745
4
        auto tablet_keys = sync_executor.when_all(&finished);
2746
4
        if (!finished) {
2747
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2748
0
            return -1;
2749
0
        }
2750
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2751
2
        if (!tablet_keys.empty() &&
2752
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2753
0
            return -1;
2754
0
        }
2755
        // sort the vector using key's order
2756
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2757
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2758
2
        bool use_range_remove = true;
2759
4.00k
        for (auto& [_, remove] : tablet_keys) {
2760
4.00k
            if (!remove) {
2761
0
                use_range_remove = remove;
2762
0
                break;
2763
0
            }
2764
4.00k
        }
2765
2
        DORIS_CLOUD_DEFER {
2766
2
            tablet_idx_keys.clear();
2767
2
            restore_job_keys.clear();
2768
2
            init_rs_keys.clear();
2769
2
            tablet_compact_stats_keys.clear();
2770
2
            tablet_load_stats_keys.clear();
2771
2
            versioned_meta_tablet_keys.clear();
2772
2
        };
2773
2
        std::unique_ptr<Transaction> txn;
2774
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2775
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2776
0
            return -1;
2777
0
        }
2778
2
        std::string tablet_key_end;
2779
2
        if (!tablet_keys.empty()) {
2780
2
            if (use_range_remove) {
2781
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2782
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2783
2
            } else {
2784
0
                for (auto& [k, _] : tablet_keys) {
2785
0
                    txn->remove(k);
2786
0
                }
2787
0
            }
2788
2
        }
2789
2
        if (is_multi_version) {
2790
0
            for (auto& k : tablet_compact_stats_keys) {
2791
                // Remove all versions of tablet compact stats for recycled tablet
2792
0
                LOG_INFO("remove versioned tablet compact stats key")
2793
0
                        .tag("compact_stats_key", hex(k));
2794
0
                versioned_remove_all(txn.get(), k);
2795
0
            }
2796
0
            for (auto& k : tablet_load_stats_keys) {
2797
                // Remove all versions of tablet load stats for recycled tablet
2798
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2799
0
                versioned_remove_all(txn.get(), k);
2800
0
            }
2801
0
            for (auto& k : versioned_meta_tablet_keys) {
2802
                // Remove all versions of meta tablet for recycled tablet
2803
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2804
0
                versioned_remove_all(txn.get(), k);
2805
0
            }
2806
0
        }
2807
4.00k
        for (auto& k : tablet_idx_keys) {
2808
4.00k
            txn->remove(k);
2809
4.00k
        }
2810
4.00k
        for (auto& k : restore_job_keys) {
2811
4.00k
            txn->remove(k);
2812
4.00k
        }
2813
2
        for (auto& k : init_rs_keys) {
2814
0
            txn->remove(k);
2815
0
        }
2816
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2817
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2818
0
                         << ", err=" << err;
2819
0
            return -1;
2820
0
        }
2821
2
        return 0;
2822
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2743
45
    auto loop_done = [&, this]() -> int {
2744
45
        bool finished = true;
2745
45
        auto tablet_keys = sync_executor.when_all(&finished);
2746
45
        if (!finished) {
2747
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2748
1
            return -1;
2749
1
        }
2750
44
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2751
44
        if (!tablet_keys.empty() &&
2752
44
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2753
0
            return -1;
2754
0
        }
2755
        // sort the vector using key's order
2756
44
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2757
44
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2758
44
        bool use_range_remove = true;
2759
236
        for (auto& [_, remove] : tablet_keys) {
2760
236
            if (!remove) {
2761
0
                use_range_remove = remove;
2762
0
                break;
2763
0
            }
2764
236
        }
2765
44
        DORIS_CLOUD_DEFER {
2766
44
            tablet_idx_keys.clear();
2767
44
            restore_job_keys.clear();
2768
44
            init_rs_keys.clear();
2769
44
            tablet_compact_stats_keys.clear();
2770
44
            tablet_load_stats_keys.clear();
2771
44
            versioned_meta_tablet_keys.clear();
2772
44
        };
2773
44
        std::unique_ptr<Transaction> txn;
2774
44
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2775
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2776
0
            return -1;
2777
0
        }
2778
44
        std::string tablet_key_end;
2779
44
        if (!tablet_keys.empty()) {
2780
42
            if (use_range_remove) {
2781
42
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2782
42
                txn->remove(tablet_keys.front().first, tablet_key_end);
2783
42
            } else {
2784
0
                for (auto& [k, _] : tablet_keys) {
2785
0
                    txn->remove(k);
2786
0
                }
2787
0
            }
2788
42
        }
2789
44
        if (is_multi_version) {
2790
6
            for (auto& k : tablet_compact_stats_keys) {
2791
                // Remove all versions of tablet compact stats for recycled tablet
2792
6
                LOG_INFO("remove versioned tablet compact stats key")
2793
6
                        .tag("compact_stats_key", hex(k));
2794
6
                versioned_remove_all(txn.get(), k);
2795
6
            }
2796
6
            for (auto& k : tablet_load_stats_keys) {
2797
                // Remove all versions of tablet load stats for recycled tablet
2798
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2799
6
                versioned_remove_all(txn.get(), k);
2800
6
            }
2801
6
            for (auto& k : versioned_meta_tablet_keys) {
2802
                // Remove all versions of meta tablet for recycled tablet
2803
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2804
6
                versioned_remove_all(txn.get(), k);
2805
6
            }
2806
5
        }
2807
239
        for (auto& k : tablet_idx_keys) {
2808
239
            txn->remove(k);
2809
239
        }
2810
239
        for (auto& k : restore_job_keys) {
2811
239
            txn->remove(k);
2812
239
        }
2813
44
        for (auto& k : init_rs_keys) {
2814
0
            txn->remove(k);
2815
0
        }
2816
44
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2817
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2818
0
                         << ", err=" << err;
2819
0
            return -1;
2820
0
        }
2821
44
        return 0;
2822
44
    };
2823
2824
49
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
2825
49
                               std::move(loop_done));
2826
49
    if (ret != 0) {
2827
3
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
2828
3
        return ret;
2829
3
    }
2830
2831
    // directly remove tablet stats and tablet jobs of these dropped index or partition
2832
46
    std::unique_ptr<Transaction> txn;
2833
46
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2834
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
2835
0
        return -1;
2836
0
    }
2837
46
    txn->remove(stats_key_begin, stats_key_end);
2838
46
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
2839
46
                 << " end=" << hex(stats_key_end);
2840
46
    txn->remove(job_key_begin, job_key_end);
2841
46
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
2842
46
    std::string schema_key_begin, schema_key_end;
2843
46
    std::string schema_dict_key;
2844
46
    std::string versioned_schema_key_begin, versioned_schema_key_end;
2845
46
    if (partition_id <= 0) {
2846
        // Delete schema kv of this index
2847
14
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
2848
14
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
2849
14
        txn->remove(schema_key_begin, schema_key_end);
2850
14
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
2851
14
                     << " end=" << hex(schema_key_end);
2852
14
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
2853
14
        txn->remove(schema_dict_key);
2854
14
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
2855
14
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
2856
14
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
2857
14
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
2858
14
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
2859
14
                     << " end=" << hex(versioned_schema_key_end);
2860
14
    }
2861
2862
46
    TxnErrorCode err = txn->commit();
2863
46
    if (err != TxnErrorCode::TXN_OK) {
2864
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
2865
0
                     << " err=" << err;
2866
0
        return -1;
2867
0
    }
2868
2869
46
    return ret;
2870
46
}
2871
2872
5.61k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
2873
5.61k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
2874
5.61k
    int64_t num_segments = rs_meta_pb.num_segments();
2875
5.61k
    if (num_segments <= 0) return 0;
2876
2877
5.61k
    std::vector<std::string> file_paths;
2878
5.61k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
2879
0
        return -1;
2880
0
    }
2881
2882
    // Process inverted indexes
2883
5.61k
    std::vector<std::pair<int64_t, std::string>> index_ids;
2884
    // default format as v1.
2885
5.61k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2886
5.61k
    bool delete_rowset_data_by_prefix = false;
2887
5.61k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2888
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2889
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2890
0
        delete_rowset_data_by_prefix = true;
2891
5.61k
    } else if (rs_meta_pb.has_tablet_schema()) {
2892
10.0k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
2893
10.0k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2894
10.0k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2895
10.0k
            }
2896
10.0k
        }
2897
4.80k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
2898
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
2899
2.00k
        }
2900
4.80k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
2901
        // schema version and index id are not found, delete rowset data by prefix directly.
2902
0
        delete_rowset_data_by_prefix = true;
2903
809
    } else {
2904
        // otherwise, try to get schema kv
2905
809
        InvertedIndexInfo index_info;
2906
809
        int inverted_index_get_ret = inverted_index_id_cache_->get(
2907
809
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
2908
809
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2909
809
                                 &inverted_index_get_ret);
2910
809
        if (inverted_index_get_ret == 0) {
2911
809
            index_format = index_info.first;
2912
809
            index_ids = index_info.second;
2913
809
        } else if (inverted_index_get_ret == 1) {
2914
            // 1. Schema kv not found means tablet has been recycled
2915
            // Maybe some tablet recycle failed by some bugs
2916
            // We need to delete again to double check
2917
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2918
            // because we are uncertain about the inverted index information.
2919
            // If there are inverted indexes, some data might not be deleted,
2920
            // but this is acceptable as we have made our best effort to delete the data.
2921
0
            LOG_INFO(
2922
0
                    "delete rowset data schema kv not found, need to delete again to double "
2923
0
                    "check")
2924
0
                    .tag("instance_id", instance_id_)
2925
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2926
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
2927
            // Currently index_ids is guaranteed to be empty,
2928
            // but we clear it again here as a safeguard against future code changes
2929
            // that might cause index_ids to no longer be empty
2930
0
            index_format = InvertedIndexStorageFormatPB::V2;
2931
0
            index_ids.clear();
2932
0
        } else {
2933
            // failed to get schema kv, delete rowset data by prefix directly.
2934
0
            delete_rowset_data_by_prefix = true;
2935
0
        }
2936
809
    }
2937
2938
5.61k
    if (delete_rowset_data_by_prefix) {
2939
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
2940
0
                                  rs_meta_pb.rowset_id_v2());
2941
0
    }
2942
2943
5.61k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
2944
5.61k
    if (it == accessor_map_.end()) {
2945
1.59k
        LOG_WARNING("instance has no such resource id")
2946
1.59k
                .tag("instance_id", instance_id_)
2947
1.59k
                .tag("resource_id", rs_meta_pb.resource_id());
2948
1.59k
        return -1;
2949
1.59k
    }
2950
4.01k
    auto& accessor = it->second;
2951
2952
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
2953
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
2954
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
2955
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2956
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
2957
40.0k
            for (const auto& index_id : index_ids) {
2958
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
2959
40.0k
                                                            index_id.second));
2960
40.0k
            }
2961
20.0k
        } else if (!index_ids.empty()) {
2962
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
2963
0
        }
2964
20.0k
    }
2965
2966
    // Process delete bitmap - check if it's stored in packed file
2967
4.01k
    bool delete_bitmap_is_packed = false;
2968
4.01k
    if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
2969
4.01k
                                                       &delete_bitmap_is_packed) != 0) {
2970
0
        LOG_WARNING("failed to decrement delete bitmap packed file ref count")
2971
0
                .tag("instance_id", instance_id_)
2972
0
                .tag("tablet_id", tablet_id)
2973
0
                .tag("rowset_id", rowset_id);
2974
0
        return -1;
2975
0
    }
2976
    // Only delete standalone delete bitmap file if not stored in packed file
2977
4.01k
    if (!delete_bitmap_is_packed) {
2978
4.01k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
2979
4.01k
    }
2980
    // TODO(AlexYue): seems could do do batch
2981
4.01k
    return accessor->delete_files(file_paths);
2982
4.01k
}
2983
2984
62.3k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
2985
62.3k
    LOG_INFO("begin process_packed_file_location_index")
2986
62.3k
            .tag("instance_id", instance_id_)
2987
62.3k
            .tag("tablet_id", rs_meta_pb.tablet_id())
2988
62.3k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
2989
62.3k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
2990
62.3k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
2991
62.3k
    if (index_map.empty()) {
2992
62.3k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
2993
62.3k
                .tag("instance_id", instance_id_)
2994
62.3k
                .tag("tablet_id", rs_meta_pb.tablet_id())
2995
62.3k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
2996
62.3k
        return 0;
2997
62.3k
    }
2998
2999
16
    struct PackedSmallFileInfo {
3000
16
        std::string small_file_path;
3001
16
    };
3002
16
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
3003
16
    packed_file_updates.reserve(index_map.size());
3004
27
    for (const auto& [small_path, index_pb] : index_map) {
3005
27
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
3006
0
            continue;
3007
0
        }
3008
27
        packed_file_updates[index_pb.packed_file_path()].push_back(
3009
27
                PackedSmallFileInfo {small_path});
3010
27
    }
3011
16
    if (packed_file_updates.empty()) {
3012
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
3013
0
                .tag("instance_id", instance_id_)
3014
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
3015
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3016
0
                .tag("index_map_size", index_map.size());
3017
0
        return 0;
3018
0
    }
3019
3020
16
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3021
16
    int ret = 0;
3022
24
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
3023
24
        if (small_files.empty()) {
3024
0
            continue;
3025
0
        }
3026
3027
24
        bool success = false;
3028
24
        for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3029
24
            std::unique_ptr<Transaction> txn;
3030
24
            TxnErrorCode err = txn_kv_->create_txn(&txn);
3031
24
            if (err != TxnErrorCode::TXN_OK) {
3032
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
3033
0
                        .tag("instance_id", instance_id_)
3034
0
                        .tag("packed_file_path", packed_file_path)
3035
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3036
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3037
0
                        .tag("err", err);
3038
0
                ret = -1;
3039
0
                break;
3040
0
            }
3041
3042
24
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3043
24
            std::string packed_val;
3044
24
            err = txn->get(packed_key, &packed_val);
3045
24
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3046
0
                LOG_WARNING("packed file info not found when recycling rowset")
3047
0
                        .tag("instance_id", instance_id_)
3048
0
                        .tag("packed_file_path", packed_file_path)
3049
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3050
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3051
0
                        .tag("key", hex(packed_key))
3052
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
3053
                // Skip this packed file entry and continue with others
3054
0
                success = true;
3055
0
                break;
3056
0
            }
3057
24
            if (err != TxnErrorCode::TXN_OK) {
3058
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
3059
0
                        .tag("instance_id", instance_id_)
3060
0
                        .tag("packed_file_path", packed_file_path)
3061
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3062
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3063
0
                        .tag("err", err);
3064
0
                ret = -1;
3065
0
                break;
3066
0
            }
3067
3068
24
            cloud::PackedFileInfoPB packed_info;
3069
24
            if (!packed_info.ParseFromString(packed_val)) {
3070
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
3071
0
                        .tag("instance_id", instance_id_)
3072
0
                        .tag("packed_file_path", packed_file_path)
3073
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3074
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3075
0
                ret = -1;
3076
0
                break;
3077
0
            }
3078
3079
24
            LOG_INFO("packed file update check")
3080
24
                    .tag("instance_id", instance_id_)
3081
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3082
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3083
24
                    .tag("merged_file_path", packed_file_path)
3084
24
                    .tag("requested_small_files", small_files.size())
3085
24
                    .tag("merge_entries", packed_info.slices_size());
3086
3087
24
            auto* small_file_entries = packed_info.mutable_slices();
3088
24
            int64_t changed_files = 0;
3089
24
            int64_t missing_entries = 0;
3090
24
            int64_t already_deleted = 0;
3091
27
            for (const auto& small_file_info : small_files) {
3092
27
                bool found = false;
3093
87
                for (auto& small_file_entry : *small_file_entries) {
3094
87
                    if (small_file_entry.path() == small_file_info.small_file_path) {
3095
27
                        if (!small_file_entry.deleted()) {
3096
27
                            small_file_entry.set_deleted(true);
3097
27
                            if (!small_file_entry.corrected()) {
3098
27
                                small_file_entry.set_corrected(true);
3099
27
                            }
3100
27
                            ++changed_files;
3101
27
                        } else {
3102
0
                            ++already_deleted;
3103
0
                        }
3104
27
                        found = true;
3105
27
                        break;
3106
27
                    }
3107
87
                }
3108
27
                if (!found) {
3109
0
                    ++missing_entries;
3110
0
                    LOG_WARNING("packed file info missing small file entry")
3111
0
                            .tag("instance_id", instance_id_)
3112
0
                            .tag("packed_file_path", packed_file_path)
3113
0
                            .tag("small_file_path", small_file_info.small_file_path)
3114
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3115
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
3116
0
                }
3117
27
            }
3118
3119
24
            if (changed_files == 0) {
3120
0
                LOG_INFO("skip merge file update: no merge entries changed")
3121
0
                        .tag("instance_id", instance_id_)
3122
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3123
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3124
0
                        .tag("merged_file_path", packed_file_path)
3125
0
                        .tag("missing_entries", missing_entries)
3126
0
                        .tag("already_deleted", already_deleted)
3127
0
                        .tag("requested_small_files", small_files.size())
3128
0
                        .tag("merge_entries", packed_info.slices_size());
3129
0
                success = true;
3130
0
                break;
3131
0
            }
3132
3133
            // Calculate remaining files
3134
24
            int64_t left_file_count = 0;
3135
24
            int64_t left_file_bytes = 0;
3136
141
            for (const auto& small_file_entry : packed_info.slices()) {
3137
141
                if (!small_file_entry.deleted()) {
3138
57
                    ++left_file_count;
3139
57
                    left_file_bytes += small_file_entry.size();
3140
57
                }
3141
141
            }
3142
24
            packed_info.set_remaining_slice_bytes(left_file_bytes);
3143
24
            packed_info.set_ref_cnt(left_file_count);
3144
24
            LOG_INFO("updated packed file reference info")
3145
24
                    .tag("instance_id", instance_id_)
3146
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3147
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3148
24
                    .tag("packed_file_path", packed_file_path)
3149
24
                    .tag("ref_cnt", left_file_count)
3150
24
                    .tag("left_file_bytes", left_file_bytes);
3151
3152
24
            if (left_file_count == 0) {
3153
7
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3154
7
            }
3155
3156
24
            std::string updated_val;
3157
24
            if (!packed_info.SerializeToString(&updated_val)) {
3158
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
3159
0
                        .tag("instance_id", instance_id_)
3160
0
                        .tag("packed_file_path", packed_file_path)
3161
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3162
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3163
0
                ret = -1;
3164
0
                break;
3165
0
            }
3166
3167
24
            txn->put(packed_key, updated_val);
3168
24
            err = txn->commit();
3169
24
            if (err == TxnErrorCode::TXN_OK) {
3170
24
                success = true;
3171
24
                if (left_file_count == 0) {
3172
7
                    LOG_INFO("packed file ready to delete, deleting immediately")
3173
7
                            .tag("instance_id", instance_id_)
3174
7
                            .tag("packed_file_path", packed_file_path);
3175
7
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3176
0
                        ret = -1;
3177
0
                    }
3178
7
                }
3179
24
                break;
3180
24
            }
3181
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
3182
0
                if (attempt >= max_retry_times) {
3183
0
                    LOG_WARNING("packed file info update conflict after max retry")
3184
0
                            .tag("instance_id", instance_id_)
3185
0
                            .tag("packed_file_path", packed_file_path)
3186
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3187
0
                            .tag("tablet_id", rs_meta_pb.tablet_id())
3188
0
                            .tag("changed_files", changed_files)
3189
0
                            .tag("attempt", attempt);
3190
0
                    ret = -1;
3191
0
                    break;
3192
0
                }
3193
0
                LOG_WARNING("packed file info update conflict, retrying")
3194
0
                        .tag("instance_id", instance_id_)
3195
0
                        .tag("packed_file_path", packed_file_path)
3196
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3197
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3198
0
                        .tag("changed_files", changed_files)
3199
0
                        .tag("attempt", attempt);
3200
0
                sleep_for_packed_file_retry();
3201
0
                continue;
3202
0
            }
3203
3204
0
            LOG_WARNING("failed to commit packed file info update")
3205
0
                    .tag("instance_id", instance_id_)
3206
0
                    .tag("packed_file_path", packed_file_path)
3207
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3208
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3209
0
                    .tag("err", err)
3210
0
                    .tag("changed_files", changed_files);
3211
0
            ret = -1;
3212
0
            break;
3213
0
        }
3214
3215
24
        if (!success) {
3216
0
            ret = -1;
3217
0
        }
3218
24
    }
3219
3220
16
    return ret;
3221
16
}
3222
3223
int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(int64_t tablet_id,
3224
                                                                     const std::string& rowset_id,
3225
58.2k
                                                                     bool* out_is_packed) {
3226
58.2k
    if (out_is_packed) {
3227
58.2k
        *out_is_packed = false;
3228
58.2k
    }
3229
3230
    // Get delete bitmap storage info from FDB
3231
58.2k
    std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3232
58.2k
    std::unique_ptr<Transaction> txn;
3233
58.2k
    TxnErrorCode err = txn_kv_->create_txn(&txn);
3234
58.2k
    if (err != TxnErrorCode::TXN_OK) {
3235
0
        LOG_WARNING("failed to create txn when getting delete bitmap storage")
3236
0
                .tag("instance_id", instance_id_)
3237
0
                .tag("tablet_id", tablet_id)
3238
0
                .tag("rowset_id", rowset_id)
3239
0
                .tag("err", err);
3240
0
        return -1;
3241
0
    }
3242
3243
58.2k
    std::string dbm_val;
3244
58.2k
    err = txn->get(dbm_key, &dbm_val);
3245
58.2k
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3246
        // No delete bitmap for this rowset, nothing to do
3247
4.63k
        LOG_INFO("delete bitmap not found, skip packed file ref count decrement")
3248
4.63k
                .tag("instance_id", instance_id_)
3249
4.63k
                .tag("tablet_id", tablet_id)
3250
4.63k
                .tag("rowset_id", rowset_id);
3251
4.63k
        return 0;
3252
4.63k
    }
3253
53.5k
    if (err != TxnErrorCode::TXN_OK) {
3254
0
        LOG_WARNING("failed to get delete bitmap storage")
3255
0
                .tag("instance_id", instance_id_)
3256
0
                .tag("tablet_id", tablet_id)
3257
0
                .tag("rowset_id", rowset_id)
3258
0
                .tag("err", err);
3259
0
        return -1;
3260
0
    }
3261
3262
53.5k
    DeleteBitmapStoragePB storage;
3263
53.5k
    if (!storage.ParseFromString(dbm_val)) {
3264
0
        LOG_WARNING("failed to parse delete bitmap storage")
3265
0
                .tag("instance_id", instance_id_)
3266
0
                .tag("tablet_id", tablet_id)
3267
0
                .tag("rowset_id", rowset_id);
3268
0
        return -1;
3269
0
    }
3270
3271
    // Check if delete bitmap is stored in packed file
3272
53.5k
    if (!storage.has_packed_slice_location() ||
3273
53.5k
        storage.packed_slice_location().packed_file_path().empty()) {
3274
        // Not stored in packed file, nothing to do
3275
53.5k
        return 0;
3276
53.5k
    }
3277
3278
18.4E
    if (out_is_packed) {
3279
0
        *out_is_packed = true;
3280
0
    }
3281
3282
18.4E
    const auto& packed_loc = storage.packed_slice_location();
3283
18.4E
    const std::string& packed_file_path = packed_loc.packed_file_path();
3284
3285
18.4E
    LOG_INFO("decrementing delete bitmap packed file ref count")
3286
18.4E
            .tag("instance_id", instance_id_)
3287
18.4E
            .tag("tablet_id", tablet_id)
3288
18.4E
            .tag("rowset_id", rowset_id)
3289
18.4E
            .tag("packed_file_path", packed_file_path);
3290
3291
18.4E
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3292
18.4E
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3293
0
        std::unique_ptr<Transaction> update_txn;
3294
0
        err = txn_kv_->create_txn(&update_txn);
3295
0
        if (err != TxnErrorCode::TXN_OK) {
3296
0
            LOG_WARNING("failed to create txn for delete bitmap packed file update")
3297
0
                    .tag("instance_id", instance_id_)
3298
0
                    .tag("tablet_id", tablet_id)
3299
0
                    .tag("rowset_id", rowset_id)
3300
0
                    .tag("err", err);
3301
0
            return -1;
3302
0
        }
3303
3304
0
        std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3305
0
        std::string packed_val;
3306
0
        err = update_txn->get(packed_key, &packed_val);
3307
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3308
0
            LOG_WARNING("packed file info not found for delete bitmap")
3309
0
                    .tag("instance_id", instance_id_)
3310
0
                    .tag("tablet_id", tablet_id)
3311
0
                    .tag("rowset_id", rowset_id)
3312
0
                    .tag("packed_file_path", packed_file_path);
3313
0
            return 0;
3314
0
        }
3315
0
        if (err != TxnErrorCode::TXN_OK) {
3316
0
            LOG_WARNING("failed to get packed file info for delete bitmap")
3317
0
                    .tag("instance_id", instance_id_)
3318
0
                    .tag("tablet_id", tablet_id)
3319
0
                    .tag("rowset_id", rowset_id)
3320
0
                    .tag("packed_file_path", packed_file_path)
3321
0
                    .tag("err", err);
3322
0
            return -1;
3323
0
        }
3324
3325
0
        cloud::PackedFileInfoPB packed_info;
3326
0
        if (!packed_info.ParseFromString(packed_val)) {
3327
0
            LOG_WARNING("failed to parse packed file info for delete bitmap")
3328
0
                    .tag("instance_id", instance_id_)
3329
0
                    .tag("tablet_id", tablet_id)
3330
0
                    .tag("rowset_id", rowset_id)
3331
0
                    .tag("packed_file_path", packed_file_path);
3332
0
            return -1;
3333
0
        }
3334
3335
        // Find and mark the small file entry as deleted
3336
        // Use tablet_id and rowset_id to match entry instead of path,
3337
        // because path format may vary with path_version (with or without shard prefix)
3338
0
        auto* entries = packed_info.mutable_slices();
3339
0
        bool found = false;
3340
0
        bool already_deleted = false;
3341
0
        for (auto& entry : *entries) {
3342
0
            if (entry.tablet_id() == tablet_id && entry.rowset_id() == rowset_id) {
3343
0
                if (!entry.deleted()) {
3344
0
                    entry.set_deleted(true);
3345
0
                    if (!entry.corrected()) {
3346
0
                        entry.set_corrected(true);
3347
0
                    }
3348
0
                } else {
3349
0
                    already_deleted = true;
3350
0
                }
3351
0
                found = true;
3352
0
                break;
3353
0
            }
3354
0
        }
3355
3356
0
        if (!found) {
3357
0
            LOG_WARNING("delete bitmap entry not found in packed file")
3358
0
                    .tag("instance_id", instance_id_)
3359
0
                    .tag("tablet_id", tablet_id)
3360
0
                    .tag("rowset_id", rowset_id)
3361
0
                    .tag("packed_file_path", packed_file_path);
3362
0
            return 0;
3363
0
        }
3364
3365
0
        if (already_deleted) {
3366
0
            LOG_INFO("delete bitmap entry already deleted in packed file")
3367
0
                    .tag("instance_id", instance_id_)
3368
0
                    .tag("tablet_id", tablet_id)
3369
0
                    .tag("rowset_id", rowset_id)
3370
0
                    .tag("packed_file_path", packed_file_path);
3371
0
            return 0;
3372
0
        }
3373
3374
        // Calculate remaining files
3375
0
        int64_t left_file_count = 0;
3376
0
        int64_t left_file_bytes = 0;
3377
0
        for (const auto& entry : packed_info.slices()) {
3378
0
            if (!entry.deleted()) {
3379
0
                ++left_file_count;
3380
0
                left_file_bytes += entry.size();
3381
0
            }
3382
0
        }
3383
0
        packed_info.set_remaining_slice_bytes(left_file_bytes);
3384
0
        packed_info.set_ref_cnt(left_file_count);
3385
3386
0
        if (left_file_count == 0) {
3387
0
            packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3388
0
        }
3389
3390
0
        std::string updated_val;
3391
0
        if (!packed_info.SerializeToString(&updated_val)) {
3392
0
            LOG_WARNING("failed to serialize packed file info for delete bitmap")
3393
0
                    .tag("instance_id", instance_id_)
3394
0
                    .tag("tablet_id", tablet_id)
3395
0
                    .tag("rowset_id", rowset_id)
3396
0
                    .tag("packed_file_path", packed_file_path);
3397
0
            return -1;
3398
0
        }
3399
3400
0
        update_txn->put(packed_key, updated_val);
3401
0
        err = update_txn->commit();
3402
0
        if (err == TxnErrorCode::TXN_OK) {
3403
0
            LOG_INFO("delete bitmap packed file ref count decremented")
3404
0
                    .tag("instance_id", instance_id_)
3405
0
                    .tag("tablet_id", tablet_id)
3406
0
                    .tag("rowset_id", rowset_id)
3407
0
                    .tag("packed_file_path", packed_file_path)
3408
0
                    .tag("left_file_count", left_file_count);
3409
0
            if (left_file_count == 0) {
3410
0
                if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3411
0
                    return -1;
3412
0
                }
3413
0
            }
3414
0
            return 0;
3415
0
        }
3416
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3417
0
            if (attempt >= max_retry_times) {
3418
0
                LOG_WARNING("delete bitmap packed file update conflict after max retry")
3419
0
                        .tag("instance_id", instance_id_)
3420
0
                        .tag("tablet_id", tablet_id)
3421
0
                        .tag("rowset_id", rowset_id)
3422
0
                        .tag("packed_file_path", packed_file_path)
3423
0
                        .tag("attempt", attempt);
3424
0
                return -1;
3425
0
            }
3426
0
            sleep_for_packed_file_retry();
3427
0
            continue;
3428
0
        }
3429
3430
0
        LOG_WARNING("failed to commit delete bitmap packed file update")
3431
0
                .tag("instance_id", instance_id_)
3432
0
                .tag("tablet_id", tablet_id)
3433
0
                .tag("rowset_id", rowset_id)
3434
0
                .tag("packed_file_path", packed_file_path)
3435
0
                .tag("err", err);
3436
0
        return -1;
3437
0
    }
3438
3439
18.4E
    return -1;
3440
18.4E
}
3441
3442
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
3443
                                                const std::string& packed_key,
3444
7
                                                const cloud::PackedFileInfoPB& packed_info) {
3445
7
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
3446
0
        LOG_WARNING("packed file missing resource id when recycling")
3447
0
                .tag("instance_id", instance_id_)
3448
0
                .tag("packed_file_path", packed_file_path);
3449
0
        return -1;
3450
0
    }
3451
3452
7
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
3453
7
    if (!accessor) {
3454
0
        LOG_WARNING("no accessor available to delete packed file")
3455
0
                .tag("instance_id", instance_id_)
3456
0
                .tag("packed_file_path", packed_file_path)
3457
0
                .tag("resource_id", packed_info.resource_id());
3458
0
        return -1;
3459
0
    }
3460
3461
7
    int del_ret = accessor->delete_file(packed_file_path);
3462
7
    if (del_ret != 0 && del_ret != 1) {
3463
0
        LOG_WARNING("failed to delete packed file")
3464
0
                .tag("instance_id", instance_id_)
3465
0
                .tag("packed_file_path", packed_file_path)
3466
0
                .tag("resource_id", resource_id)
3467
0
                .tag("ret", del_ret);
3468
0
        return -1;
3469
0
    }
3470
7
    if (del_ret == 1) {
3471
0
        LOG_INFO("packed file already removed")
3472
0
                .tag("instance_id", instance_id_)
3473
0
                .tag("packed_file_path", packed_file_path)
3474
0
                .tag("resource_id", resource_id);
3475
7
    } else {
3476
7
        LOG_INFO("deleted packed file")
3477
7
                .tag("instance_id", instance_id_)
3478
7
                .tag("packed_file_path", packed_file_path)
3479
7
                .tag("resource_id", resource_id);
3480
7
    }
3481
3482
7
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
3483
7
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3484
7
        std::unique_ptr<Transaction> del_txn;
3485
7
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
3486
7
        if (err != TxnErrorCode::TXN_OK) {
3487
0
            LOG_WARNING("failed to create txn when removing packed file kv")
3488
0
                    .tag("instance_id", instance_id_)
3489
0
                    .tag("packed_file_path", packed_file_path)
3490
0
                    .tag("attempt", attempt)
3491
0
                    .tag("err", err);
3492
0
            return -1;
3493
0
        }
3494
3495
7
        std::string latest_val;
3496
7
        err = del_txn->get(packed_key, &latest_val);
3497
7
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3498
0
            return 0;
3499
0
        }
3500
7
        if (err != TxnErrorCode::TXN_OK) {
3501
0
            LOG_WARNING("failed to re-read packed file kv before removal")
3502
0
                    .tag("instance_id", instance_id_)
3503
0
                    .tag("packed_file_path", packed_file_path)
3504
0
                    .tag("attempt", attempt)
3505
0
                    .tag("err", err);
3506
0
            return -1;
3507
0
        }
3508
3509
7
        cloud::PackedFileInfoPB latest_info;
3510
7
        if (!latest_info.ParseFromString(latest_val)) {
3511
0
            LOG_WARNING("failed to parse packed file info before removal")
3512
0
                    .tag("instance_id", instance_id_)
3513
0
                    .tag("packed_file_path", packed_file_path)
3514
0
                    .tag("attempt", attempt);
3515
0
            return -1;
3516
0
        }
3517
3518
7
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
3519
7
              latest_info.ref_cnt() == 0)) {
3520
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
3521
0
                    .tag("instance_id", instance_id_)
3522
0
                    .tag("packed_file_path", packed_file_path)
3523
0
                    .tag("attempt", attempt);
3524
0
            return 0;
3525
0
        }
3526
3527
7
        del_txn->remove(packed_key);
3528
7
        err = del_txn->commit();
3529
7
        if (err == TxnErrorCode::TXN_OK) {
3530
7
            LOG_INFO("removed packed file metadata")
3531
7
                    .tag("instance_id", instance_id_)
3532
7
                    .tag("packed_file_path", packed_file_path);
3533
7
            return 0;
3534
7
        }
3535
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3536
0
            if (attempt >= max_retry_times) {
3537
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
3538
0
                        .tag("instance_id", instance_id_)
3539
0
                        .tag("packed_file_path", packed_file_path)
3540
0
                        .tag("attempt", attempt);
3541
0
                return -1;
3542
0
            }
3543
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
3544
0
                    .tag("instance_id", instance_id_)
3545
0
                    .tag("packed_file_path", packed_file_path)
3546
0
                    .tag("attempt", attempt);
3547
0
            sleep_for_packed_file_retry();
3548
0
            continue;
3549
0
        }
3550
0
        LOG_WARNING("failed to remove packed file kv")
3551
0
                .tag("instance_id", instance_id_)
3552
0
                .tag("packed_file_path", packed_file_path)
3553
0
                .tag("attempt", attempt)
3554
0
                .tag("err", err);
3555
0
        return -1;
3556
0
    }
3557
0
    return -1;
3558
7
}
3559
3560
int InstanceRecycler::delete_rowset_data(
3561
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
3562
98
        RecyclerMetricsContext& metrics_context) {
3563
98
    int ret = 0;
3564
    // resource_id -> file_paths
3565
98
    std::map<std::string, std::vector<std::string>> resource_file_paths;
3566
    // (resource_id, tablet_id, rowset_id)
3567
98
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
3568
98
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
3569
3570
57.1k
    for (const auto& [_, rs] : rowsets) {
3571
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
3572
        // due to aborted schema change.
3573
57.1k
        if (is_formal_rowset) {
3574
3.16k
            std::lock_guard lock(recycled_tablets_mtx_);
3575
3.16k
            if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) {
3576
                // Tablet has been recycled and this rowset has no packed slices, so file data
3577
                // should already be gone; skip to avoid redundant deletes. Rowsets with packed
3578
                // slice info must still run to decrement packed file ref counts.
3579
0
                continue;
3580
0
            }
3581
3.16k
        }
3582
3583
57.1k
        int64_t num_segments = rs.num_segments();
3584
        // Check num_segments before accessor lookup, because empty rowsets
3585
        // (e.g. base compaction output of empty rowsets) may have no resource_id
3586
        // set. Skipping them early avoids a spurious "no such resource id" error
3587
        // that marks the entire batch as failed and prevents txn_remove from
3588
        // cleaning up recycle KV keys.
3589
57.1k
        if (num_segments <= 0) {
3590
0
            metrics_context.total_recycled_num++;
3591
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
3592
0
            continue;
3593
0
        }
3594
3595
57.1k
        auto it = accessor_map_.find(rs.resource_id());
3596
        // possible if the accessor is not initilized correctly
3597
57.1k
        if (it == accessor_map_.end()) [[unlikely]] {
3598
3.00k
            LOG_WARNING("instance has no such resource id")
3599
3.00k
                    .tag("instance_id", instance_id_)
3600
3.00k
                    .tag("resource_id", rs.resource_id());
3601
3.00k
            ret = -1;
3602
3.00k
            continue;
3603
3.00k
        }
3604
3605
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
3606
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
3607
54.1k
        int64_t tablet_id = rs.tablet_id();
3608
54.1k
        LOG_INFO("recycle rowset merge index size")
3609
54.1k
                .tag("instance_id", instance_id_)
3610
54.1k
                .tag("tablet_id", tablet_id)
3611
54.1k
                .tag("rowset_id", rowset_id)
3612
54.1k
                .tag("merge_index_size", rs.packed_slice_locations_size());
3613
54.1k
        if (decrement_packed_file_ref_counts(rs) != 0) {
3614
0
            ret = -1;
3615
0
            continue;
3616
0
        }
3617
3618
        // Process delete bitmap - check if it's stored in packed file
3619
54.1k
        bool delete_bitmap_is_packed = false;
3620
54.1k
        if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3621
54.1k
                                                           &delete_bitmap_is_packed) != 0) {
3622
0
            LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3623
0
                    .tag("instance_id", instance_id_)
3624
0
                    .tag("tablet_id", tablet_id)
3625
0
                    .tag("rowset_id", rowset_id);
3626
0
            ret = -1;
3627
0
            continue;
3628
0
        }
3629
        // Only delete standalone delete bitmap file if not stored in packed file
3630
54.2k
        if (!delete_bitmap_is_packed) {
3631
54.2k
            file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3632
54.2k
        }
3633
3634
        // Process inverted indexes
3635
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
3636
        // default format as v1.
3637
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3638
54.1k
        int inverted_index_get_ret = 0;
3639
54.1k
        if (rs.has_tablet_schema()) {
3640
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
3641
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3642
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3643
53.5k
                }
3644
53.5k
            }
3645
26.6k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
3646
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
3647
26.5k
            }
3648
27.5k
        } else {
3649
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
3650
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
3651
0
                                "instance_id="
3652
0
                             << instance_id_ << " tablet_id=" << tablet_id
3653
0
                             << " rowset_id=" << rowset_id;
3654
0
                ret = -1;
3655
0
                continue;
3656
0
            }
3657
27.5k
            InvertedIndexInfo index_info;
3658
27.5k
            inverted_index_get_ret =
3659
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
3660
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3661
27.5k
                                     &inverted_index_get_ret);
3662
27.5k
            if (inverted_index_get_ret == 0) {
3663
27.0k
                index_format = index_info.first;
3664
27.0k
                index_ids = index_info.second;
3665
27.0k
            } else if (inverted_index_get_ret == 1) {
3666
                // 1. Schema kv not found means tablet has been recycled
3667
                // Maybe some tablet recycle failed by some bugs
3668
                // We need to delete again to double check
3669
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3670
                // because we are uncertain about the inverted index information.
3671
                // If there are inverted indexes, some data might not be deleted,
3672
                // but this is acceptable as we have made our best effort to delete the data.
3673
507
                LOG_INFO(
3674
507
                        "delete rowset data schema kv not found, need to delete again to "
3675
507
                        "double "
3676
507
                        "check")
3677
507
                        .tag("instance_id", instance_id_)
3678
507
                        .tag("tablet_id", tablet_id)
3679
507
                        .tag("rowset", rs.ShortDebugString());
3680
                // Currently index_ids is guaranteed to be empty,
3681
                // but we clear it again here as a safeguard against future code changes
3682
                // that might cause index_ids to no longer be empty
3683
507
                index_format = InvertedIndexStorageFormatPB::V2;
3684
507
                index_ids.clear();
3685
18.4E
            } else {
3686
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
3687
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3688
18.4E
                ret = -1;
3689
18.4E
                continue;
3690
18.4E
            }
3691
27.5k
        }
3692
54.2k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3693
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3694
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3695
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3696
5
            continue;
3697
5
        }
3698
323k
        for (int64_t i = 0; i < num_segments; ++i) {
3699
269k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3700
269k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3701
534k
                for (const auto& index_id : index_ids) {
3702
534k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
3703
534k
                                                                index_id.first, index_id.second));
3704
534k
                }
3705
266k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3706
                // try to recycle inverted index v2 when get_ret == 1
3707
                // we treat schema not found as if it has a v2 format inverted index
3708
                // to reduce chance of data leakage
3709
2.50k
                if (inverted_index_get_ret == 1) {
3710
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3711
2.50k
                            .tag("instance_id", instance_id_)
3712
2.50k
                            .tag("inverted index v2 path",
3713
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3714
2.50k
                }
3715
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3716
2.50k
            }
3717
269k
        }
3718
54.1k
    }
3719
3720
98
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3721
98
                                                 "delete_rowset_data",
3722
98
                                                 [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3722
5
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3722
51
                                                 [](const int& ret) { return ret != 0; });
3723
98
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3724
51
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3725
51
            DCHECK(accessor_map_.count(*rid))
3726
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3727
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3728
51
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3729
51
                                     &accessor_map_);
3730
51
            if (!accessor_map_.contains(*rid)) {
3731
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3732
0
                        .tag("resource_id", resource_id)
3733
0
                        .tag("instance_id", instance_id_);
3734
0
                return -1;
3735
0
            }
3736
51
            auto& accessor = accessor_map_[*rid];
3737
51
            int ret = accessor->delete_files(*paths);
3738
51
            if (!ret) {
3739
                // deduplication of different files with the same rowset id
3740
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3741
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3742
51
                std::set<std::string> deleted_rowset_id;
3743
3744
51
                std::for_each(paths->begin(), paths->end(),
3745
51
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3746
860k
                               this](const std::string& path) {
3747
860k
                                  std::vector<std::string> str;
3748
860k
                                  butil::SplitString(path, '/', &str);
3749
860k
                                  std::string rowset_id;
3750
860k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3751
857k
                                      rowset_id = str.back().substr(0, pos);
3752
857k
                                  } else {
3753
2.55k
                                      if (path.find("packed_file/") != std::string::npos) {
3754
0
                                          return; // packed files do not have rowset_id encoded
3755
0
                                      }
3756
2.55k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3757
2.55k
                                      return;
3758
2.55k
                                  }
3759
857k
                                  auto rs_meta = rowsets.find(rowset_id);
3760
857k
                                  if (rs_meta != rowsets.end() &&
3761
861k
                                      !deleted_rowset_id.contains(rowset_id)) {
3762
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3763
54.1k
                                      metrics_context.total_recycled_data_size +=
3764
54.1k
                                              rs_meta->second.total_disk_size();
3765
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3766
54.1k
                                              rs_meta->second.num_segments();
3767
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3768
54.1k
                                              rs_meta->second.total_disk_size();
3769
54.1k
                                      metrics_context.total_recycled_num++;
3770
54.1k
                                  }
3771
857k
                              });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3746
14
                               this](const std::string& path) {
3747
14
                                  std::vector<std::string> str;
3748
14
                                  butil::SplitString(path, '/', &str);
3749
14
                                  std::string rowset_id;
3750
14
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3751
14
                                      rowset_id = str.back().substr(0, pos);
3752
14
                                  } else {
3753
0
                                      if (path.find("packed_file/") != std::string::npos) {
3754
0
                                          return; // packed files do not have rowset_id encoded
3755
0
                                      }
3756
0
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3757
0
                                      return;
3758
0
                                  }
3759
14
                                  auto rs_meta = rowsets.find(rowset_id);
3760
14
                                  if (rs_meta != rowsets.end() &&
3761
14
                                      !deleted_rowset_id.contains(rowset_id)) {
3762
7
                                      deleted_rowset_id.emplace(rowset_id);
3763
7
                                      metrics_context.total_recycled_data_size +=
3764
7
                                              rs_meta->second.total_disk_size();
3765
7
                                      segment_metrics_context_.total_recycled_num +=
3766
7
                                              rs_meta->second.num_segments();
3767
7
                                      segment_metrics_context_.total_recycled_data_size +=
3768
7
                                              rs_meta->second.total_disk_size();
3769
7
                                      metrics_context.total_recycled_num++;
3770
7
                                  }
3771
14
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3746
860k
                               this](const std::string& path) {
3747
860k
                                  std::vector<std::string> str;
3748
860k
                                  butil::SplitString(path, '/', &str);
3749
860k
                                  std::string rowset_id;
3750
860k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3751
857k
                                      rowset_id = str.back().substr(0, pos);
3752
857k
                                  } else {
3753
2.55k
                                      if (path.find("packed_file/") != std::string::npos) {
3754
0
                                          return; // packed files do not have rowset_id encoded
3755
0
                                      }
3756
2.55k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3757
2.55k
                                      return;
3758
2.55k
                                  }
3759
857k
                                  auto rs_meta = rowsets.find(rowset_id);
3760
857k
                                  if (rs_meta != rowsets.end() &&
3761
861k
                                      !deleted_rowset_id.contains(rowset_id)) {
3762
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3763
54.1k
                                      metrics_context.total_recycled_data_size +=
3764
54.1k
                                              rs_meta->second.total_disk_size();
3765
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3766
54.1k
                                              rs_meta->second.num_segments();
3767
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3768
54.1k
                                              rs_meta->second.total_disk_size();
3769
54.1k
                                      metrics_context.total_recycled_num++;
3770
54.1k
                                  }
3771
857k
                              });
3772
51
            }
3773
51
            return ret;
3774
51
        });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3724
5
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3725
5
            DCHECK(accessor_map_.count(*rid))
3726
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3727
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3728
5
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3729
5
                                     &accessor_map_);
3730
5
            if (!accessor_map_.contains(*rid)) {
3731
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3732
0
                        .tag("resource_id", resource_id)
3733
0
                        .tag("instance_id", instance_id_);
3734
0
                return -1;
3735
0
            }
3736
5
            auto& accessor = accessor_map_[*rid];
3737
5
            int ret = accessor->delete_files(*paths);
3738
5
            if (!ret) {
3739
                // deduplication of different files with the same rowset id
3740
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3741
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3742
5
                std::set<std::string> deleted_rowset_id;
3743
3744
5
                std::for_each(paths->begin(), paths->end(),
3745
5
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3746
5
                               this](const std::string& path) {
3747
5
                                  std::vector<std::string> str;
3748
5
                                  butil::SplitString(path, '/', &str);
3749
5
                                  std::string rowset_id;
3750
5
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3751
5
                                      rowset_id = str.back().substr(0, pos);
3752
5
                                  } else {
3753
5
                                      if (path.find("packed_file/") != std::string::npos) {
3754
5
                                          return; // packed files do not have rowset_id encoded
3755
5
                                      }
3756
5
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3757
5
                                      return;
3758
5
                                  }
3759
5
                                  auto rs_meta = rowsets.find(rowset_id);
3760
5
                                  if (rs_meta != rowsets.end() &&
3761
5
                                      !deleted_rowset_id.contains(rowset_id)) {
3762
5
                                      deleted_rowset_id.emplace(rowset_id);
3763
5
                                      metrics_context.total_recycled_data_size +=
3764
5
                                              rs_meta->second.total_disk_size();
3765
5
                                      segment_metrics_context_.total_recycled_num +=
3766
5
                                              rs_meta->second.num_segments();
3767
5
                                      segment_metrics_context_.total_recycled_data_size +=
3768
5
                                              rs_meta->second.total_disk_size();
3769
5
                                      metrics_context.total_recycled_num++;
3770
5
                                  }
3771
5
                              });
3772
5
            }
3773
5
            return ret;
3774
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3724
46
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3725
46
            DCHECK(accessor_map_.count(*rid))
3726
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3727
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3728
46
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3729
46
                                     &accessor_map_);
3730
46
            if (!accessor_map_.contains(*rid)) {
3731
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3732
0
                        .tag("resource_id", resource_id)
3733
0
                        .tag("instance_id", instance_id_);
3734
0
                return -1;
3735
0
            }
3736
46
            auto& accessor = accessor_map_[*rid];
3737
46
            int ret = accessor->delete_files(*paths);
3738
46
            if (!ret) {
3739
                // deduplication of different files with the same rowset id
3740
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3741
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3742
46
                std::set<std::string> deleted_rowset_id;
3743
3744
46
                std::for_each(paths->begin(), paths->end(),
3745
46
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3746
46
                               this](const std::string& path) {
3747
46
                                  std::vector<std::string> str;
3748
46
                                  butil::SplitString(path, '/', &str);
3749
46
                                  std::string rowset_id;
3750
46
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3751
46
                                      rowset_id = str.back().substr(0, pos);
3752
46
                                  } else {
3753
46
                                      if (path.find("packed_file/") != std::string::npos) {
3754
46
                                          return; // packed files do not have rowset_id encoded
3755
46
                                      }
3756
46
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3757
46
                                      return;
3758
46
                                  }
3759
46
                                  auto rs_meta = rowsets.find(rowset_id);
3760
46
                                  if (rs_meta != rowsets.end() &&
3761
46
                                      !deleted_rowset_id.contains(rowset_id)) {
3762
46
                                      deleted_rowset_id.emplace(rowset_id);
3763
46
                                      metrics_context.total_recycled_data_size +=
3764
46
                                              rs_meta->second.total_disk_size();
3765
46
                                      segment_metrics_context_.total_recycled_num +=
3766
46
                                              rs_meta->second.num_segments();
3767
46
                                      segment_metrics_context_.total_recycled_data_size +=
3768
46
                                              rs_meta->second.total_disk_size();
3769
46
                                      metrics_context.total_recycled_num++;
3770
46
                                  }
3771
46
                              });
3772
46
            }
3773
46
            return ret;
3774
46
        });
3775
51
    }
3776
98
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
3777
5
        LOG_INFO(
3778
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
3779
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
3780
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
3781
5
        concurrent_delete_executor.add([&]() -> int {
3782
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3783
5
            if (!ret) {
3784
5
                auto rs = rowsets.at(rowset_id);
3785
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3786
5
                metrics_context.total_recycled_num++;
3787
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3788
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3789
5
            }
3790
5
            return ret;
3791
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
3781
5
        concurrent_delete_executor.add([&]() -> int {
3782
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3783
5
            if (!ret) {
3784
5
                auto rs = rowsets.at(rowset_id);
3785
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3786
5
                metrics_context.total_recycled_num++;
3787
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3788
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3789
5
            }
3790
5
            return ret;
3791
5
        });
3792
5
    }
3793
3794
98
    bool finished = true;
3795
98
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3796
98
    for (int r : rets) {
3797
56
        if (r != 0) {
3798
0
            ret = -1;
3799
0
            break;
3800
0
        }
3801
56
    }
3802
98
    ret = finished ? ret : -1;
3803
98
    return ret;
3804
98
}
3805
3806
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
3807
3.30k
                                         const std::string& rowset_id) {
3808
3.30k
    auto it = accessor_map_.find(resource_id);
3809
3.30k
    if (it == accessor_map_.end()) {
3810
400
        LOG_WARNING("instance has no such resource id")
3811
400
                .tag("instance_id", instance_id_)
3812
400
                .tag("resource_id", resource_id)
3813
400
                .tag("tablet_id", tablet_id)
3814
400
                .tag("rowset_id", rowset_id);
3815
400
        return -1;
3816
400
    }
3817
2.90k
    auto& accessor = it->second;
3818
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
3819
3.30k
}
3820
3821
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
3822
4
    if (key.empty()) {
3823
0
        return false;
3824
0
    }
3825
4
    std::string_view key_view = key;
3826
4
    key_view.remove_prefix(1); // remove keyspace prefix
3827
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
3828
4
    if (decode_key(&key_view, &decoded) != 0) {
3829
0
        return false;
3830
0
    }
3831
4
    if (decoded.size() < 4) {
3832
0
        return false;
3833
0
    }
3834
4
    try {
3835
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
3836
4
    } catch (const std::bad_variant_access&) {
3837
0
        return false;
3838
0
    }
3839
4
    return true;
3840
4
}
3841
3842
14
int InstanceRecycler::recycle_packed_files() {
3843
14
    const std::string task_name = "recycle_packed_files";
3844
14
    auto start_tp = steady_clock::now();
3845
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
3846
14
    int ret = 0;
3847
14
    PackedFileRecycleStats stats;
3848
3849
14
    register_recycle_task(task_name, start_time);
3850
14
    DORIS_CLOUD_DEFER {
3851
14
        unregister_recycle_task(task_name);
3852
14
        int64_t cost =
3853
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3854
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3855
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3856
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3857
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3858
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3859
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3860
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3861
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3862
14
                                                             stats.bytes_object_deleted);
3863
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3864
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3865
14
                .tag("instance_id", instance_id_)
3866
14
                .tag("num_scanned", stats.num_scanned)
3867
14
                .tag("num_corrected", stats.num_corrected)
3868
14
                .tag("num_deleted", stats.num_deleted)
3869
14
                .tag("num_failed", stats.num_failed)
3870
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3871
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3872
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3873
14
                .tag("bytes_deleted", stats.bytes_deleted)
3874
14
                .tag("ret", ret);
3875
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
3850
14
    DORIS_CLOUD_DEFER {
3851
14
        unregister_recycle_task(task_name);
3852
14
        int64_t cost =
3853
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3854
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3855
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3856
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3857
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3858
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3859
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3860
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3861
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3862
14
                                                             stats.bytes_object_deleted);
3863
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3864
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3865
14
                .tag("instance_id", instance_id_)
3866
14
                .tag("num_scanned", stats.num_scanned)
3867
14
                .tag("num_corrected", stats.num_corrected)
3868
14
                .tag("num_deleted", stats.num_deleted)
3869
14
                .tag("num_failed", stats.num_failed)
3870
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3871
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3872
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3873
14
                .tag("bytes_deleted", stats.bytes_deleted)
3874
14
                .tag("ret", ret);
3875
14
    };
3876
3877
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3878
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3879
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3880
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
3877
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3878
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3879
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3880
4
    };
3881
3882
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
3883
3884
14
    std::string begin = packed_file_key({instance_id_, ""});
3885
14
    std::string end = packed_file_key({instance_id_, "\xff"});
3886
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
3887
0
        ret = -1;
3888
0
    }
3889
3890
14
    return ret;
3891
14
}
3892
3893
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
3894
                                                  RecyclerMetricsContext& metrics_context,
3895
0
                                                  int64_t partition_id, bool is_empty_tablet) {
3896
0
    std::string tablet_key_begin, tablet_key_end;
3897
3898
0
    if (partition_id > 0) {
3899
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
3900
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
3901
0
    } else {
3902
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
3903
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
3904
0
    }
3905
    // for calculate the total num or bytes of recyled objects
3906
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
3907
0
                                                          std::string_view v) -> int {
3908
0
        doris::TabletMetaCloudPB tablet_meta_pb;
3909
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
3910
0
            return 0;
3911
0
        }
3912
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
3913
3914
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
3915
0
            return 0;
3916
0
        }
3917
3918
0
        if (!is_empty_tablet) {
3919
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
3920
0
                return 0;
3921
0
            }
3922
0
            tablet_metrics_context_.total_need_recycle_num++;
3923
0
        }
3924
0
        return 0;
3925
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
3926
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
3927
0
    metrics_context.report(true);
3928
0
    tablet_metrics_context_.report(true);
3929
0
    segment_metrics_context_.report(true);
3930
0
    return ret;
3931
0
}
3932
3933
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
3934
0
                                                 RecyclerMetricsContext& metrics_context) {
3935
0
    int ret = 0;
3936
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
3937
0
    std::unique_ptr<Transaction> txn;
3938
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3939
0
        LOG_WARNING("failed to recycle tablet ")
3940
0
                .tag("tablet id", tablet_id)
3941
0
                .tag("instance_id", instance_id_)
3942
0
                .tag("reason", "failed to create txn");
3943
0
        ret = -1;
3944
0
    }
3945
0
    GetRowsetResponse resp;
3946
0
    std::string msg;
3947
0
    MetaServiceCode code = MetaServiceCode::OK;
3948
    // get rowsets in tablet
3949
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3950
0
                        tablet_id, code, msg, &resp);
3951
0
    if (code != MetaServiceCode::OK) {
3952
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3953
0
                .tag("tablet id", tablet_id)
3954
0
                .tag("msg", msg)
3955
0
                .tag("code", code)
3956
0
                .tag("instance id", instance_id_);
3957
0
        ret = -1;
3958
0
    }
3959
0
    for (const auto& rs_meta : resp.rowset_meta()) {
3960
        /*
3961
        * For compatibility, we skip the loop for [0-1] here.
3962
        * The purpose of this loop is to delete object files,
3963
        * and since [0-1] only has meta and doesn't have object files,
3964
        * skipping it doesn't affect system correctness.
3965
        *
3966
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
3967
        * would return error -1 directly, causing the recycle operation to fail.
3968
        *
3969
        * [0-1] doesn't have resource id is a bug.
3970
        * In the future, we will fix this problem, after that,
3971
        * we can remove this if statement.
3972
        *
3973
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
3974
        */
3975
3976
0
        if (rs_meta.end_version() == 1) {
3977
            // Assert that [0-1] has no resource_id to make sure
3978
            // this if statement will not be forgetted to remove
3979
            // when the resource id bug is fixed
3980
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3981
0
            continue;
3982
0
        }
3983
0
        if (!rs_meta.has_resource_id()) {
3984
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3985
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3986
0
                    .tag("instance_id", instance_id_)
3987
0
                    .tag("tablet_id", tablet_id);
3988
0
            continue;
3989
0
        }
3990
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3991
0
        auto it = accessor_map_.find(rs_meta.resource_id());
3992
        // possible if the accessor is not initilized correctly
3993
0
        if (it == accessor_map_.end()) [[unlikely]] {
3994
0
            LOG_WARNING(
3995
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3996
0
                    "recycle process")
3997
0
                    .tag("tablet id", tablet_id)
3998
0
                    .tag("instance_id", instance_id_)
3999
0
                    .tag("resource_id", rs_meta.resource_id())
4000
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4001
0
            continue;
4002
0
        }
4003
4004
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
4005
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4006
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4007
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
4008
0
    }
4009
0
    return ret;
4010
0
}
4011
4012
4.25k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
4013
4.25k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
4014
4.25k
            .tag("instance_id", instance_id_)
4015
4.25k
            .tag("tablet_id", tablet_id);
4016
4017
4.25k
    if (should_recycle_versioned_keys()) {
4018
11
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
4019
11
        if (ret != 0) {
4020
0
            return ret;
4021
0
        }
4022
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
4023
        // during the recycle_versioned_tablet process.
4024
        //
4025
        // .. And remove restore job rowsets of this tablet too
4026
11
    }
4027
4028
4.25k
    int ret = 0;
4029
4.25k
    auto start_time = steady_clock::now();
4030
4031
4.25k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4032
4033
    // collect resource ids
4034
248
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4035
248
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4036
248
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4037
248
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4038
248
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4039
248
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4040
4041
248
    std::set<std::string> resource_ids;
4042
248
    int64_t recycle_rowsets_number = 0;
4043
248
    int64_t recycle_segments_number = 0;
4044
248
    int64_t recycle_rowsets_data_size = 0;
4045
248
    int64_t recycle_rowsets_index_size = 0;
4046
248
    int64_t recycle_restore_job_rowsets_number = 0;
4047
248
    int64_t recycle_restore_job_segments_number = 0;
4048
248
    int64_t recycle_restore_job_rowsets_data_size = 0;
4049
248
    int64_t recycle_restore_job_rowsets_index_size = 0;
4050
248
    int64_t max_rowset_version = 0;
4051
248
    int64_t min_rowset_creation_time = INT64_MAX;
4052
248
    int64_t max_rowset_creation_time = 0;
4053
248
    int64_t min_rowset_expiration_time = INT64_MAX;
4054
248
    int64_t max_rowset_expiration_time = 0;
4055
4056
248
    DORIS_CLOUD_DEFER {
4057
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4058
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4059
248
                .tag("instance_id", instance_id_)
4060
248
                .tag("tablet_id", tablet_id)
4061
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4062
248
                .tag("recycle segments number", recycle_segments_number)
4063
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4064
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4065
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4066
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4067
248
                .tag("all restore job rowsets recycle data size",
4068
248
                     recycle_restore_job_rowsets_data_size)
4069
248
                .tag("all restore job rowsets recycle index size",
4070
248
                     recycle_restore_job_rowsets_index_size)
4071
248
                .tag("max rowset version", max_rowset_version)
4072
248
                .tag("min rowset creation time", min_rowset_creation_time)
4073
248
                .tag("max rowset creation time", max_rowset_creation_time)
4074
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4075
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4076
248
                .tag("task type", metrics_context.operation_type)
4077
248
                .tag("ret", ret);
4078
248
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4056
248
    DORIS_CLOUD_DEFER {
4057
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4058
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4059
248
                .tag("instance_id", instance_id_)
4060
248
                .tag("tablet_id", tablet_id)
4061
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4062
248
                .tag("recycle segments number", recycle_segments_number)
4063
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4064
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4065
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4066
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4067
248
                .tag("all restore job rowsets recycle data size",
4068
248
                     recycle_restore_job_rowsets_data_size)
4069
248
                .tag("all restore job rowsets recycle index size",
4070
248
                     recycle_restore_job_rowsets_index_size)
4071
248
                .tag("max rowset version", max_rowset_version)
4072
248
                .tag("min rowset creation time", min_rowset_creation_time)
4073
248
                .tag("max rowset creation time", max_rowset_creation_time)
4074
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4075
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4076
248
                .tag("task type", metrics_context.operation_type)
4077
248
                .tag("ret", ret);
4078
248
    };
4079
4080
248
    std::unique_ptr<Transaction> txn;
4081
248
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4082
0
        LOG_WARNING("failed to recycle tablet ")
4083
0
                .tag("tablet id", tablet_id)
4084
0
                .tag("instance_id", instance_id_)
4085
0
                .tag("reason", "failed to create txn");
4086
0
        ret = -1;
4087
0
    }
4088
248
    GetRowsetResponse resp;
4089
248
    std::string msg;
4090
248
    MetaServiceCode code = MetaServiceCode::OK;
4091
    // get rowsets in tablet
4092
248
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4093
248
                        tablet_id, code, msg, &resp);
4094
248
    if (code != MetaServiceCode::OK) {
4095
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4096
0
                .tag("tablet id", tablet_id)
4097
0
                .tag("msg", msg)
4098
0
                .tag("code", code)
4099
0
                .tag("instance id", instance_id_);
4100
0
        ret = -1;
4101
0
    }
4102
248
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
4103
4104
2.51k
    for (const auto& rs_meta : resp.rowset_meta()) {
4105
        // The rowset has no resource id and segments when it was generated by compaction
4106
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
4107
2.51k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
4108
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
4109
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4110
0
                    .tag("instance_id", instance_id_)
4111
0
                    .tag("tablet_id", tablet_id);
4112
0
            recycle_rowsets_number += 1;
4113
0
            continue;
4114
0
        }
4115
2.51k
        if (!rs_meta.has_resource_id()) {
4116
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4117
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
4118
1
                    .tag("instance_id", instance_id_)
4119
1
                    .tag("tablet_id", tablet_id);
4120
1
            return -1;
4121
1
        }
4122
2.51k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4123
2.51k
        auto it = accessor_map_.find(rs_meta.resource_id());
4124
        // possible if the accessor is not initilized correctly
4125
2.51k
        if (it == accessor_map_.end()) [[unlikely]] {
4126
1
            LOG_WARNING(
4127
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4128
1
                    "recycle process")
4129
1
                    .tag("tablet id", tablet_id)
4130
1
                    .tag("instance_id", instance_id_)
4131
1
                    .tag("resource_id", rs_meta.resource_id())
4132
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4133
1
            return -1;
4134
1
        }
4135
2.51k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4136
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
4137
0
                    .tag("instance_id", instance_id_)
4138
0
                    .tag("tablet_id", tablet_id)
4139
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4140
0
            return -1;
4141
0
        }
4142
2.51k
        recycle_rowsets_number += 1;
4143
2.51k
        recycle_segments_number += rs_meta.num_segments();
4144
2.51k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4145
2.51k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4146
2.51k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4147
2.51k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4148
2.51k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4149
2.51k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4150
2.51k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4151
2.51k
        resource_ids.emplace(rs_meta.resource_id());
4152
2.51k
    }
4153
4154
    // get restore job rowset in tablet
4155
246
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
4156
246
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
4157
246
    if (code != MetaServiceCode::OK) {
4158
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
4159
0
                .tag("tablet id", tablet_id)
4160
0
                .tag("msg", msg)
4161
0
                .tag("code", code)
4162
0
                .tag("instance id", instance_id_);
4163
0
        return -1;
4164
0
    }
4165
4166
246
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
4167
0
        if (!rs_meta.has_resource_id()) {
4168
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4169
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4170
0
                    .tag("instance_id", instance_id_)
4171
0
                    .tag("tablet_id", tablet_id);
4172
0
            return -1;
4173
0
        }
4174
4175
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4176
        // possible if the accessor is not initilized correctly
4177
0
        if (it == accessor_map_.end()) [[unlikely]] {
4178
0
            LOG_WARNING(
4179
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4180
0
                    "recycle process")
4181
0
                    .tag("tablet id", tablet_id)
4182
0
                    .tag("instance_id", instance_id_)
4183
0
                    .tag("resource_id", rs_meta.resource_id())
4184
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4185
0
            return -1;
4186
0
        }
4187
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4188
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
4189
0
                    .tag("instance_id", instance_id_)
4190
0
                    .tag("tablet_id", tablet_id)
4191
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4192
0
            return -1;
4193
0
        }
4194
0
        recycle_restore_job_rowsets_number += 1;
4195
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
4196
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
4197
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
4198
0
        resource_ids.emplace(rs_meta.resource_id());
4199
0
    }
4200
4201
246
    LOG_INFO("recycle tablet start to delete object")
4202
246
            .tag("instance id", instance_id_)
4203
246
            .tag("tablet id", tablet_id)
4204
246
            .tag("recycle tablet resource ids are",
4205
246
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
4206
246
                                 [](std::string rs_id, const auto& it) {
4207
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4208
206
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
4206
206
                                 [](std::string rs_id, const auto& it) {
4207
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4208
206
                                 }));
4209
4210
246
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
4211
246
            _thread_pool_group.s3_producer_pool,
4212
246
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4213
246
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
4213
206
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
4214
4215
    // delete all rowset data in this tablet
4216
    // ATTN: there may be data leak if not all accessor initilized successfully
4217
    //       partial data deleted if the tablet is stored cross-storage vault
4218
    //       vault id is not attached to TabletMeta...
4219
246
    for (const auto& resource_id : resource_ids) {
4220
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
4221
206
        concurrent_delete_executor.add(
4222
206
                [&, rs_id = resource_id,
4223
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4224
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4225
206
                    if (res != 0) {
4226
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4227
2
                                     << " path=" << accessor_ptr->uri()
4228
2
                                     << " task type=" << metrics_context.operation_type;
4229
2
                        return std::make_pair(-1, rs_id);
4230
2
                    }
4231
204
                    return std::make_pair(0, rs_id);
4232
206
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
4223
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4224
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4225
206
                    if (res != 0) {
4226
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4227
2
                                     << " path=" << accessor_ptr->uri()
4228
2
                                     << " task type=" << metrics_context.operation_type;
4229
2
                        return std::make_pair(-1, rs_id);
4230
2
                    }
4231
204
                    return std::make_pair(0, rs_id);
4232
206
                });
4233
206
    }
4234
4235
246
    bool finished = true;
4236
246
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
4237
246
    for (auto& r : rets) {
4238
206
        if (r.first != 0) {
4239
2
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
4240
2
            ret = -1;
4241
2
        }
4242
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
4243
206
    }
4244
246
    ret = finished ? ret : -1;
4245
4246
246
    if (ret != 0) { // failed recycle tablet data
4247
2
        LOG_WARNING("ret!=0")
4248
2
                .tag("finished", finished)
4249
2
                .tag("ret", ret)
4250
2
                .tag("instance_id", instance_id_)
4251
2
                .tag("tablet_id", tablet_id);
4252
2
        return ret;
4253
2
    }
4254
4255
244
    tablet_metrics_context_.total_recycled_data_size +=
4256
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4257
244
    tablet_metrics_context_.total_recycled_num += 1;
4258
244
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4259
244
    segment_metrics_context_.total_recycled_data_size +=
4260
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4261
244
    metrics_context.total_recycled_data_size +=
4262
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4263
244
    tablet_metrics_context_.report();
4264
244
    segment_metrics_context_.report();
4265
244
    metrics_context.report();
4266
4267
244
    txn.reset();
4268
244
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4269
0
        LOG_WARNING("failed to recycle tablet ")
4270
0
                .tag("tablet id", tablet_id)
4271
0
                .tag("instance_id", instance_id_)
4272
0
                .tag("reason", "failed to create txn");
4273
0
        ret = -1;
4274
0
    }
4275
    // delete all rowset kv in this tablet
4276
244
    txn->remove(rs_key0, rs_key1);
4277
244
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4278
244
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4279
4280
    // remove delete bitmap for MoW table
4281
244
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4282
244
    txn->remove(pending_key);
4283
244
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4284
244
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4285
244
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4286
4287
244
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4288
244
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4289
244
    txn->remove(dbm_start_key, dbm_end_key);
4290
244
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4291
244
              << " end=" << hex(dbm_end_key);
4292
4293
244
    TxnErrorCode err = txn->commit();
4294
244
    if (err != TxnErrorCode::TXN_OK) {
4295
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4296
0
        ret = -1;
4297
0
    }
4298
4299
244
    if (ret == 0) {
4300
        // All object files under tablet have been deleted
4301
244
        std::lock_guard lock(recycled_tablets_mtx_);
4302
244
        recycled_tablets_.insert(tablet_id);
4303
244
    }
4304
4305
244
    return ret;
4306
246
}
4307
4308
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
4309
11
                                               RecyclerMetricsContext& metrics_context) {
4310
11
    int ret = 0;
4311
11
    auto start_time = steady_clock::now();
4312
4313
11
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4314
4315
    // collect resource ids
4316
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4317
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4318
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4319
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4320
4321
11
    int64_t recycle_rowsets_number = 0;
4322
11
    int64_t recycle_segments_number = 0;
4323
11
    int64_t recycle_rowsets_data_size = 0;
4324
11
    int64_t recycle_rowsets_index_size = 0;
4325
11
    int64_t max_rowset_version = 0;
4326
11
    int64_t min_rowset_creation_time = INT64_MAX;
4327
11
    int64_t max_rowset_creation_time = 0;
4328
11
    int64_t min_rowset_expiration_time = INT64_MAX;
4329
11
    int64_t max_rowset_expiration_time = 0;
4330
4331
11
    DORIS_CLOUD_DEFER {
4332
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4333
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4334
11
                .tag("instance_id", instance_id_)
4335
11
                .tag("tablet_id", tablet_id)
4336
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4337
11
                .tag("recycle segments number", recycle_segments_number)
4338
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4339
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4340
11
                .tag("max rowset version", max_rowset_version)
4341
11
                .tag("min rowset creation time", min_rowset_creation_time)
4342
11
                .tag("max rowset creation time", max_rowset_creation_time)
4343
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4344
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4345
11
                .tag("ret", ret);
4346
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4331
11
    DORIS_CLOUD_DEFER {
4332
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4333
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4334
11
                .tag("instance_id", instance_id_)
4335
11
                .tag("tablet_id", tablet_id)
4336
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4337
11
                .tag("recycle segments number", recycle_segments_number)
4338
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4339
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4340
11
                .tag("max rowset version", max_rowset_version)
4341
11
                .tag("min rowset creation time", min_rowset_creation_time)
4342
11
                .tag("max rowset creation time", max_rowset_creation_time)
4343
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4344
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4345
11
                .tag("ret", ret);
4346
11
    };
4347
4348
11
    std::unique_ptr<Transaction> txn;
4349
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4350
0
        LOG_WARNING("failed to recycle tablet ")
4351
0
                .tag("tablet id", tablet_id)
4352
0
                .tag("instance_id", instance_id_)
4353
0
                .tag("reason", "failed to create txn");
4354
0
        ret = -1;
4355
0
    }
4356
4357
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
4358
    // by the related operation logs.
4359
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
4360
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
4361
11
    MetaReader meta_reader(instance_id_);
4362
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
4363
11
    if (err == TxnErrorCode::TXN_OK) {
4364
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
4365
11
    }
4366
11
    if (err != TxnErrorCode::TXN_OK) {
4367
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4368
0
                .tag("tablet id", tablet_id)
4369
0
                .tag("err", err)
4370
0
                .tag("instance id", instance_id_);
4371
0
        ret = -1;
4372
0
    }
4373
4374
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
4375
11
             load_rowset_metas.size(), compact_rowset_metas.size())
4376
11
            .tag("instance_id", instance_id_)
4377
11
            .tag("tablet_id", tablet_id);
4378
4379
11
    SyncExecutor<int> concurrent_delete_executor(
4380
11
            _thread_pool_group.s3_producer_pool,
4381
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4382
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
4383
4384
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4385
60
        recycle_rowsets_number += 1;
4386
60
        recycle_segments_number += rs_meta.num_segments();
4387
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4388
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4389
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4390
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4391
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4392
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4393
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4394
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
4384
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4385
60
        recycle_rowsets_number += 1;
4386
60
        recycle_segments_number += rs_meta.num_segments();
4387
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4388
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4389
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4390
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4391
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4392
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4393
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4394
60
    };
4395
4396
11
    std::vector<RowsetDeleteTask> all_tasks;
4397
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
4398
60
        update_rowset_stats(rs_meta);
4399
        // Version 0-1 rowset has no resource_id and no actual data files,
4400
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4401
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4402
60
        RowsetDeleteTask task;
4403
60
        task.rowset_meta = rs_meta;
4404
60
        task.versioned_rowset_key =
4405
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
4406
60
        task.non_versioned_rowset_key =
4407
60
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4408
60
        task.versionstamp = versionstamp;
4409
60
        all_tasks.push_back(std::move(task));
4410
60
    }
4411
4412
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
4413
0
        update_rowset_stats(rs_meta);
4414
        // Version 0-1 rowset has no resource_id and no actual data files,
4415
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4416
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4417
0
        RowsetDeleteTask task;
4418
0
        task.rowset_meta = rs_meta;
4419
0
        task.versioned_rowset_key = versioned::meta_rowset_compact_key(
4420
0
                {instance_id_, tablet_id, rs_meta.end_version()});
4421
0
        task.non_versioned_rowset_key =
4422
0
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4423
0
        task.versionstamp = versionstamp;
4424
0
        all_tasks.push_back(std::move(task));
4425
0
    }
4426
4427
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
4428
0
        RecycleRowsetPB recycle_rowset;
4429
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
4430
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4431
0
            return -1;
4432
0
        }
4433
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
4434
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
4435
                // in old version, keep this key-value pair and it needs to be checked manually
4436
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4437
0
                return -1;
4438
0
            }
4439
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
4440
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4441
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4442
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
4443
0
                return -1;
4444
0
            }
4445
            // decode rowset_id
4446
0
            auto k1 = k;
4447
0
            k1.remove_prefix(1);
4448
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4449
0
            decode_key(&k1, &out);
4450
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4451
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4452
0
            LOG_INFO("delete old-version rowset data")
4453
0
                    .tag("instance_id", instance_id_)
4454
0
                    .tag("tablet_id", tablet_id)
4455
0
                    .tag("rowset_id", rowset_id);
4456
4457
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
4458
            // so we must use prefix deletion directly instead of batch delete.
4459
0
            concurrent_delete_executor.add(
4460
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
4461
                        // delete by prefix, the recycle rowset key will be deleted by range later.
4462
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
4463
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
4464
0
        } else {
4465
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
4466
            // Version 0-1 rowset has no resource_id and no actual data files,
4467
            // but still needs ref_count key cleanup, so we add it to all_tasks.
4468
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
4469
0
            RowsetDeleteTask task;
4470
0
            task.rowset_meta = rowset_meta;
4471
0
            task.recycle_rowset_key = k;
4472
0
            all_tasks.push_back(std::move(task));
4473
0
        }
4474
0
        return 0;
4475
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
4476
4477
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
4478
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
4479
0
                .tag("tablet id", tablet_id)
4480
0
                .tag("instance_id", instance_id_)
4481
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
4482
0
        ret = -1;
4483
0
    }
4484
4485
    // Phase 1: Classify tasks by ref_count
4486
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
4487
60
    for (auto& task : all_tasks) {
4488
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
4489
60
        if (classify_ret < 0) {
4490
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
4491
0
                    .tag("instance_id", instance_id_)
4492
0
                    .tag("tablet_id", tablet_id)
4493
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4494
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
4495
0
                return recycle_rowset_meta_and_data(t);
4496
0
            });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
4497
0
        }
4498
60
    }
4499
4500
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
4501
4502
11
    LOG_INFO("batch delete plan created")
4503
11
            .tag("instance_id", instance_id_)
4504
11
            .tag("tablet_id", tablet_id)
4505
11
            .tag("plan_count", batch_delete_tasks.size());
4506
4507
    // Phase 2: Execute batch delete using existing delete_rowset_data
4508
11
    if (!batch_delete_tasks.empty()) {
4509
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
4510
49
        for (const auto& task : batch_delete_tasks) {
4511
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
4512
49
            if (task.rowset_meta.resource_id().empty()) {
4513
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
4514
10
                        .tag("instance_id", instance_id_)
4515
10
                        .tag("tablet_id", tablet_id)
4516
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4517
10
                continue;
4518
10
            }
4519
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
4520
39
        }
4521
4522
        // Only call delete_rowset_data if there are rowsets with actual data to delete
4523
10
        bool delete_success = true;
4524
10
        if (!rowsets_to_delete.empty()) {
4525
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
4526
9
                                                         "batch_delete_versioned_tablet");
4527
9
            int delete_ret = delete_rowset_data(
4528
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
4529
9
            if (delete_ret != 0) {
4530
0
                LOG_WARNING("batch delete execution failed")
4531
0
                        .tag("instance_id", instance_id_)
4532
0
                        .tag("tablet_id", tablet_id);
4533
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
4534
0
                ret = -1;
4535
0
                delete_success = false;
4536
0
            }
4537
9
        }
4538
4539
        // Phase 3: Only cleanup metadata if data deletion succeeded.
4540
        // If deletion failed, keep recycle_rowset_key so next round will retry.
4541
10
        if (delete_success) {
4542
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
4543
10
            if (cleanup_ret != 0) {
4544
0
                LOG_WARNING("batch delete cleanup failed")
4545
0
                        .tag("instance_id", instance_id_)
4546
0
                        .tag("tablet_id", tablet_id);
4547
0
                ret = -1;
4548
0
            }
4549
10
        }
4550
10
    }
4551
4552
    // Always wait for fallback tasks to complete before returning
4553
11
    bool finished = true;
4554
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4555
11
    for (int r : rets) {
4556
0
        if (r != 0) {
4557
0
            ret = -1;
4558
0
        }
4559
0
    }
4560
4561
11
    ret = finished ? ret : -1;
4562
4563
11
    if (ret != 0) { // failed recycle tablet data
4564
0
        LOG_WARNING("recycle versioned tablet failed")
4565
0
                .tag("finished", finished)
4566
0
                .tag("ret", ret)
4567
0
                .tag("instance_id", instance_id_)
4568
0
                .tag("tablet_id", tablet_id);
4569
0
        return ret;
4570
0
    }
4571
4572
11
    tablet_metrics_context_.total_recycled_data_size +=
4573
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4574
11
    tablet_metrics_context_.total_recycled_num += 1;
4575
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4576
11
    segment_metrics_context_.total_recycled_data_size +=
4577
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4578
11
    metrics_context.total_recycled_data_size +=
4579
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4580
11
    tablet_metrics_context_.report();
4581
11
    segment_metrics_context_.report();
4582
11
    metrics_context.report();
4583
4584
11
    txn.reset();
4585
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4586
0
        LOG_WARNING("failed to recycle tablet ")
4587
0
                .tag("tablet id", tablet_id)
4588
0
                .tag("instance_id", instance_id_)
4589
0
                .tag("reason", "failed to create txn");
4590
0
        ret = -1;
4591
0
    }
4592
    // delete all rowset kv in this tablet
4593
11
    txn->remove(rs_key0, rs_key1);
4594
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4595
4596
    // remove delete bitmap for MoW table
4597
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4598
11
    txn->remove(pending_key);
4599
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4600
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4601
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4602
4603
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4604
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4605
11
    txn->remove(dbm_start_key, dbm_end_key);
4606
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4607
11
              << " end=" << hex(dbm_end_key);
4608
4609
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
4610
11
    std::string tablet_index_val;
4611
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
4612
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
4613
0
        LOG_WARNING("failed to get tablet index kv")
4614
0
                .tag("instance_id", instance_id_)
4615
0
                .tag("tablet_id", tablet_id)
4616
0
                .tag("err", err);
4617
0
        ret = -1;
4618
11
    } else if (err == TxnErrorCode::TXN_OK) {
4619
        // If the tablet index kv exists, we need to delete it
4620
10
        TabletIndexPB tablet_index_pb;
4621
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
4622
0
            LOG_WARNING("failed to parse tablet index pb")
4623
0
                    .tag("instance_id", instance_id_)
4624
0
                    .tag("tablet_id", tablet_id);
4625
0
            ret = -1;
4626
10
        } else {
4627
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
4628
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
4629
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
4630
10
            txn->remove(versioned_inverted_idx_key);
4631
10
            txn->remove(versioned_idx_key);
4632
10
        }
4633
10
    }
4634
4635
11
    err = txn->commit();
4636
11
    if (err != TxnErrorCode::TXN_OK) {
4637
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4638
0
        ret = -1;
4639
0
    }
4640
4641
11
    if (ret == 0) {
4642
        // All object files under tablet have been deleted
4643
11
        std::lock_guard lock(recycled_tablets_mtx_);
4644
11
        recycled_tablets_.insert(tablet_id);
4645
11
    }
4646
4647
11
    return ret;
4648
11
}
4649
4650
27
int InstanceRecycler::recycle_rowsets() {
4651
27
    if (should_recycle_versioned_keys()) {
4652
5
        return recycle_versioned_rowsets();
4653
5
    }
4654
4655
22
    const std::string task_name = "recycle_rowsets";
4656
22
    int64_t num_scanned = 0;
4657
22
    int64_t num_expired = 0;
4658
22
    int64_t num_prepare = 0;
4659
22
    int64_t num_compacted = 0;
4660
22
    int64_t num_empty_rowset = 0;
4661
22
    size_t total_rowset_key_size = 0;
4662
22
    size_t total_rowset_value_size = 0;
4663
22
    size_t expired_rowset_size = 0;
4664
22
    std::atomic_long num_recycled = 0;
4665
22
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4666
4667
22
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4668
22
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4669
22
    std::string recyc_rs_key0;
4670
22
    std::string recyc_rs_key1;
4671
22
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4672
22
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4673
4674
22
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4675
4676
22
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4677
22
    register_recycle_task(task_name, start_time);
4678
4679
22
    DORIS_CLOUD_DEFER {
4680
22
        unregister_recycle_task(task_name);
4681
22
        int64_t cost =
4682
22
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4683
22
        metrics_context.finish_report();
4684
22
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4685
22
                .tag("instance_id", instance_id_)
4686
22
                .tag("num_scanned", num_scanned)
4687
22
                .tag("num_expired", num_expired)
4688
22
                .tag("num_recycled", num_recycled)
4689
22
                .tag("num_recycled.prepare", num_prepare)
4690
22
                .tag("num_recycled.compacted", num_compacted)
4691
22
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4692
22
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4693
22
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4694
22
                .tag("expired_rowset_meta_size", expired_rowset_size);
4695
22
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4679
7
    DORIS_CLOUD_DEFER {
4680
7
        unregister_recycle_task(task_name);
4681
7
        int64_t cost =
4682
7
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4683
7
        metrics_context.finish_report();
4684
7
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4685
7
                .tag("instance_id", instance_id_)
4686
7
                .tag("num_scanned", num_scanned)
4687
7
                .tag("num_expired", num_expired)
4688
7
                .tag("num_recycled", num_recycled)
4689
7
                .tag("num_recycled.prepare", num_prepare)
4690
7
                .tag("num_recycled.compacted", num_compacted)
4691
7
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4692
7
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4693
7
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4694
7
                .tag("expired_rowset_meta_size", expired_rowset_size);
4695
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4679
15
    DORIS_CLOUD_DEFER {
4680
15
        unregister_recycle_task(task_name);
4681
15
        int64_t cost =
4682
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4683
15
        metrics_context.finish_report();
4684
15
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4685
15
                .tag("instance_id", instance_id_)
4686
15
                .tag("num_scanned", num_scanned)
4687
15
                .tag("num_expired", num_expired)
4688
15
                .tag("num_recycled", num_recycled)
4689
15
                .tag("num_recycled.prepare", num_prepare)
4690
15
                .tag("num_recycled.compacted", num_compacted)
4691
15
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4692
15
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4693
15
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4694
15
                .tag("expired_rowset_meta_size", expired_rowset_size);
4695
15
    };
4696
4697
22
    std::vector<std::string> rowset_keys;
4698
    // rowset_id -> rowset_meta
4699
    // store rowset id and meta for statistics rs size when delete
4700
22
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
4701
4702
    // Store keys of rowset recycled by background workers
4703
22
    std::mutex async_recycled_rowset_keys_mutex;
4704
22
    std::vector<std::string> async_recycled_rowset_keys;
4705
22
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4706
22
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4707
22
    worker_pool->start();
4708
    // TODO bacth delete
4709
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4710
4.00k
        std::string dbm_start_key =
4711
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4712
4.00k
        std::string dbm_end_key = dbm_start_key;
4713
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4714
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4715
4.00k
        if (ret != 0) {
4716
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4717
0
                         << instance_id_;
4718
0
        }
4719
4.00k
        return ret;
4720
4.00k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4709
2
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4710
2
        std::string dbm_start_key =
4711
2
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4712
2
        std::string dbm_end_key = dbm_start_key;
4713
2
        encode_int64(INT64_MAX, &dbm_end_key);
4714
2
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4715
2
        if (ret != 0) {
4716
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4717
0
                         << instance_id_;
4718
0
        }
4719
2
        return ret;
4720
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4709
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4710
4.00k
        std::string dbm_start_key =
4711
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4712
4.00k
        std::string dbm_end_key = dbm_start_key;
4713
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4714
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4715
4.00k
        if (ret != 0) {
4716
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4717
0
                         << instance_id_;
4718
0
        }
4719
4.00k
        return ret;
4720
4.00k
    };
4721
22
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4722
902
                                            int64_t tablet_id, const std::string& rowset_id) {
4723
        // Try to delete rowset data in background thread
4724
902
        int ret = worker_pool->submit_with_timeout(
4725
902
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4726
811
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4727
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4728
0
                        return;
4729
0
                    }
4730
811
                    std::vector<std::string> keys;
4731
811
                    {
4732
811
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4733
811
                        async_recycled_rowset_keys.push_back(std::move(key));
4734
811
                        if (async_recycled_rowset_keys.size() > 100) {
4735
7
                            keys.swap(async_recycled_rowset_keys);
4736
7
                        }
4737
811
                    }
4738
811
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4739
811
                    if (keys.empty()) return;
4740
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4741
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4742
0
                                     << instance_id_;
4743
7
                    } else {
4744
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4745
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4746
7
                                           num_recycled, start_time);
4747
7
                    }
4748
7
                },
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4725
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4726
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4727
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4728
0
                        return;
4729
0
                    }
4730
2
                    std::vector<std::string> keys;
4731
2
                    {
4732
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4733
2
                        async_recycled_rowset_keys.push_back(std::move(key));
4734
2
                        if (async_recycled_rowset_keys.size() > 100) {
4735
0
                            keys.swap(async_recycled_rowset_keys);
4736
0
                        }
4737
2
                    }
4738
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4739
2
                    if (keys.empty()) return;
4740
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4741
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4742
0
                                     << instance_id_;
4743
0
                    } else {
4744
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4745
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4746
0
                                           num_recycled, start_time);
4747
0
                    }
4748
0
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4725
809
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4726
809
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4727
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4728
0
                        return;
4729
0
                    }
4730
809
                    std::vector<std::string> keys;
4731
809
                    {
4732
809
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4733
809
                        async_recycled_rowset_keys.push_back(std::move(key));
4734
809
                        if (async_recycled_rowset_keys.size() > 100) {
4735
7
                            keys.swap(async_recycled_rowset_keys);
4736
7
                        }
4737
809
                    }
4738
809
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4739
809
                    if (keys.empty()) return;
4740
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4741
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4742
0
                                     << instance_id_;
4743
7
                    } else {
4744
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4745
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4746
7
                                           num_recycled, start_time);
4747
7
                    }
4748
7
                },
4749
902
                0);
4750
902
        if (ret == 0) return 0;
4751
        // Submit task failed, delete rowset data in current thread
4752
91
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4753
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4754
0
            return -1;
4755
0
        }
4756
91
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4757
0
            return -1;
4758
0
        }
4759
91
        rowset_keys.push_back(std::move(key));
4760
91
        return 0;
4761
91
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4722
2
                                            int64_t tablet_id, const std::string& rowset_id) {
4723
        // Try to delete rowset data in background thread
4724
2
        int ret = worker_pool->submit_with_timeout(
4725
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4726
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4727
2
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4728
2
                        return;
4729
2
                    }
4730
2
                    std::vector<std::string> keys;
4731
2
                    {
4732
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4733
2
                        async_recycled_rowset_keys.push_back(std::move(key));
4734
2
                        if (async_recycled_rowset_keys.size() > 100) {
4735
2
                            keys.swap(async_recycled_rowset_keys);
4736
2
                        }
4737
2
                    }
4738
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4739
2
                    if (keys.empty()) return;
4740
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4741
2
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4742
2
                                     << instance_id_;
4743
2
                    } else {
4744
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4745
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4746
2
                                           num_recycled, start_time);
4747
2
                    }
4748
2
                },
4749
2
                0);
4750
2
        if (ret == 0) return 0;
4751
        // Submit task failed, delete rowset data in current thread
4752
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4753
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4754
0
            return -1;
4755
0
        }
4756
0
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4757
0
            return -1;
4758
0
        }
4759
0
        rowset_keys.push_back(std::move(key));
4760
0
        return 0;
4761
0
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4722
900
                                            int64_t tablet_id, const std::string& rowset_id) {
4723
        // Try to delete rowset data in background thread
4724
900
        int ret = worker_pool->submit_with_timeout(
4725
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4726
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4727
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4728
900
                        return;
4729
900
                    }
4730
900
                    std::vector<std::string> keys;
4731
900
                    {
4732
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4733
900
                        async_recycled_rowset_keys.push_back(std::move(key));
4734
900
                        if (async_recycled_rowset_keys.size() > 100) {
4735
900
                            keys.swap(async_recycled_rowset_keys);
4736
900
                        }
4737
900
                    }
4738
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4739
900
                    if (keys.empty()) return;
4740
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4741
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4742
900
                                     << instance_id_;
4743
900
                    } else {
4744
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4745
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4746
900
                                           num_recycled, start_time);
4747
900
                    }
4748
900
                },
4749
900
                0);
4750
900
        if (ret == 0) return 0;
4751
        // Submit task failed, delete rowset data in current thread
4752
91
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4753
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4754
0
            return -1;
4755
0
        }
4756
91
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4757
0
            return -1;
4758
0
        }
4759
91
        rowset_keys.push_back(std::move(key));
4760
91
        return 0;
4761
91
    };
4762
4763
22
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4764
4765
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4766
7.75k
        ++num_scanned;
4767
7.75k
        total_rowset_key_size += k.size();
4768
7.75k
        total_rowset_value_size += v.size();
4769
7.75k
        RecycleRowsetPB rowset;
4770
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4771
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4772
0
            return -1;
4773
0
        }
4774
4775
7.75k
        int64_t current_time = ::time(nullptr);
4776
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4777
4778
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4779
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4780
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4781
7.75k
        if (current_time < expiration) { // not expired
4782
0
            return 0;
4783
0
        }
4784
7.75k
        ++num_expired;
4785
7.75k
        expired_rowset_size += v.size();
4786
4787
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4788
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4789
                // in old version, keep this key-value pair and it needs to be checked manually
4790
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4791
0
                return -1;
4792
0
            }
4793
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4794
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4795
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4796
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4797
0
                rowset_keys.emplace_back(k);
4798
0
                return -1;
4799
0
            }
4800
            // decode rowset_id
4801
250
            auto k1 = k;
4802
250
            k1.remove_prefix(1);
4803
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4804
250
            decode_key(&k1, &out);
4805
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4806
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4807
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4808
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4809
250
                      << " task_type=" << metrics_context.operation_type;
4810
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4811
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4812
0
                return -1;
4813
0
            }
4814
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4815
250
            metrics_context.total_recycled_num++;
4816
250
            segment_metrics_context_.total_recycled_data_size +=
4817
250
                    rowset.rowset_meta().total_disk_size();
4818
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4819
250
            return 0;
4820
250
        }
4821
4822
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
4823
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
4824
7.50k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4825
7.50k
            if (mark_ret == -1) {
4826
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4827
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4828
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4829
0
                             << "]";
4830
0
                return -1;
4831
7.50k
            } else if (mark_ret == 1) {
4832
3.75k
                LOG(INFO)
4833
3.75k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4834
3.75k
                           "next turn, instance_id="
4835
3.75k
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4836
3.75k
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4837
3.75k
                return 0;
4838
3.75k
            }
4839
7.50k
        }
4840
4841
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4842
3.75k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4843
3.75k
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4844
3.75k
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4845
4846
3.75k
            if (rowset_meta->end_version() != 1) {
4847
3.75k
                int ret = abort_txn_or_job_for_recycle(rowset);
4848
4849
3.75k
                if (ret != 0) {
4850
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4851
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4852
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4853
0
                                 << rowset_meta->end_version() << "]";
4854
0
                    return ret;
4855
0
                }
4856
3.75k
            }
4857
3.75k
        }
4858
4859
        // TODO(plat1ko): check rowset not referenced
4860
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4861
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4862
0
                LOG_INFO("recycle rowset that has empty resource id");
4863
0
            } else {
4864
                // other situations, keep this key-value pair and it needs to be checked manually
4865
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4866
0
                return -1;
4867
0
            }
4868
0
        }
4869
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4870
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4871
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4872
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4873
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4874
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4875
3.75k
                  << " rowset_meta_size=" << v.size()
4876
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4877
3.75k
                  << " task_type=" << metrics_context.operation_type;
4878
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4879
            // unable to calculate file path, can only be deleted by rowset id prefix
4880
652
            num_prepare += 1;
4881
652
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4882
652
                                             rowset_meta->tablet_id(),
4883
652
                                             rowset_meta->rowset_id_v2()) != 0) {
4884
0
                return -1;
4885
0
            }
4886
3.10k
        } else {
4887
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4888
3.10k
            rowset_keys.emplace_back(k);
4889
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4890
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4891
3.10k
                ++num_empty_rowset;
4892
3.10k
            }
4893
3.10k
        }
4894
3.75k
        return 0;
4895
3.75k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4765
7
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4766
7
        ++num_scanned;
4767
7
        total_rowset_key_size += k.size();
4768
7
        total_rowset_value_size += v.size();
4769
7
        RecycleRowsetPB rowset;
4770
7
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4771
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4772
0
            return -1;
4773
0
        }
4774
4775
7
        int64_t current_time = ::time(nullptr);
4776
7
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4777
4778
7
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4779
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4780
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4781
7
        if (current_time < expiration) { // not expired
4782
0
            return 0;
4783
0
        }
4784
7
        ++num_expired;
4785
7
        expired_rowset_size += v.size();
4786
4787
7
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4788
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4789
                // in old version, keep this key-value pair and it needs to be checked manually
4790
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4791
0
                return -1;
4792
0
            }
4793
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4794
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4795
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4796
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4797
0
                rowset_keys.emplace_back(k);
4798
0
                return -1;
4799
0
            }
4800
            // decode rowset_id
4801
0
            auto k1 = k;
4802
0
            k1.remove_prefix(1);
4803
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4804
0
            decode_key(&k1, &out);
4805
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4806
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4807
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4808
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4809
0
                      << " task_type=" << metrics_context.operation_type;
4810
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4811
0
                                             rowset.tablet_id(), rowset_id) != 0) {
4812
0
                return -1;
4813
0
            }
4814
0
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4815
0
            metrics_context.total_recycled_num++;
4816
0
            segment_metrics_context_.total_recycled_data_size +=
4817
0
                    rowset.rowset_meta().total_disk_size();
4818
0
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4819
0
            return 0;
4820
0
        }
4821
4822
7
        auto* rowset_meta = rowset.mutable_rowset_meta();
4823
7
        if (config::enable_mark_delete_rowset_before_recycle) {
4824
7
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4825
7
            if (mark_ret == -1) {
4826
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4827
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4828
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4829
0
                             << "]";
4830
0
                return -1;
4831
7
            } else if (mark_ret == 1) {
4832
5
                LOG(INFO)
4833
5
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4834
5
                           "next turn, instance_id="
4835
5
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4836
5
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4837
5
                return 0;
4838
5
            }
4839
7
        }
4840
4841
2
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4842
2
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4843
2
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4844
2
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4845
4846
2
            if (rowset_meta->end_version() != 1) {
4847
2
                int ret = abort_txn_or_job_for_recycle(rowset);
4848
4849
2
                if (ret != 0) {
4850
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4851
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4852
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4853
0
                                 << rowset_meta->end_version() << "]";
4854
0
                    return ret;
4855
0
                }
4856
2
            }
4857
2
        }
4858
4859
        // TODO(plat1ko): check rowset not referenced
4860
2
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4861
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4862
0
                LOG_INFO("recycle rowset that has empty resource id");
4863
0
            } else {
4864
                // other situations, keep this key-value pair and it needs to be checked manually
4865
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4866
0
                return -1;
4867
0
            }
4868
0
        }
4869
2
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4870
2
                  << " tablet_id=" << rowset_meta->tablet_id()
4871
2
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4872
2
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4873
2
                  << "] txn_id=" << rowset_meta->txn_id()
4874
2
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4875
2
                  << " rowset_meta_size=" << v.size()
4876
2
                  << " creation_time=" << rowset_meta->creation_time()
4877
2
                  << " task_type=" << metrics_context.operation_type;
4878
2
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4879
            // unable to calculate file path, can only be deleted by rowset id prefix
4880
2
            num_prepare += 1;
4881
2
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4882
2
                                             rowset_meta->tablet_id(),
4883
2
                                             rowset_meta->rowset_id_v2()) != 0) {
4884
0
                return -1;
4885
0
            }
4886
2
        } else {
4887
0
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4888
0
            rowset_keys.emplace_back(k);
4889
0
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4890
0
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4891
0
                ++num_empty_rowset;
4892
0
            }
4893
0
        }
4894
2
        return 0;
4895
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4765
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4766
7.75k
        ++num_scanned;
4767
7.75k
        total_rowset_key_size += k.size();
4768
7.75k
        total_rowset_value_size += v.size();
4769
7.75k
        RecycleRowsetPB rowset;
4770
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4771
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4772
0
            return -1;
4773
0
        }
4774
4775
7.75k
        int64_t current_time = ::time(nullptr);
4776
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4777
4778
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4779
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4780
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4781
7.75k
        if (current_time < expiration) { // not expired
4782
0
            return 0;
4783
0
        }
4784
7.75k
        ++num_expired;
4785
7.75k
        expired_rowset_size += v.size();
4786
4787
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4788
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4789
                // in old version, keep this key-value pair and it needs to be checked manually
4790
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4791
0
                return -1;
4792
0
            }
4793
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4794
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4795
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4796
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4797
0
                rowset_keys.emplace_back(k);
4798
0
                return -1;
4799
0
            }
4800
            // decode rowset_id
4801
250
            auto k1 = k;
4802
250
            k1.remove_prefix(1);
4803
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4804
250
            decode_key(&k1, &out);
4805
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4806
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4807
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4808
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4809
250
                      << " task_type=" << metrics_context.operation_type;
4810
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4811
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4812
0
                return -1;
4813
0
            }
4814
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4815
250
            metrics_context.total_recycled_num++;
4816
250
            segment_metrics_context_.total_recycled_data_size +=
4817
250
                    rowset.rowset_meta().total_disk_size();
4818
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4819
250
            return 0;
4820
250
        }
4821
4822
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
4823
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
4824
7.50k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4825
7.50k
            if (mark_ret == -1) {
4826
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4827
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4828
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4829
0
                             << "]";
4830
0
                return -1;
4831
7.50k
            } else if (mark_ret == 1) {
4832
3.75k
                LOG(INFO)
4833
3.75k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4834
3.75k
                           "next turn, instance_id="
4835
3.75k
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4836
3.75k
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4837
3.75k
                return 0;
4838
3.75k
            }
4839
7.50k
        }
4840
4841
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4842
3.75k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4843
3.75k
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4844
3.75k
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4845
4846
3.75k
            if (rowset_meta->end_version() != 1) {
4847
3.75k
                int ret = abort_txn_or_job_for_recycle(rowset);
4848
4849
3.75k
                if (ret != 0) {
4850
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4851
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4852
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4853
0
                                 << rowset_meta->end_version() << "]";
4854
0
                    return ret;
4855
0
                }
4856
3.75k
            }
4857
3.75k
        }
4858
4859
        // TODO(plat1ko): check rowset not referenced
4860
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4861
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4862
0
                LOG_INFO("recycle rowset that has empty resource id");
4863
0
            } else {
4864
                // other situations, keep this key-value pair and it needs to be checked manually
4865
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4866
0
                return -1;
4867
0
            }
4868
0
        }
4869
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4870
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4871
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4872
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4873
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4874
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4875
3.75k
                  << " rowset_meta_size=" << v.size()
4876
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4877
3.75k
                  << " task_type=" << metrics_context.operation_type;
4878
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4879
            // unable to calculate file path, can only be deleted by rowset id prefix
4880
650
            num_prepare += 1;
4881
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4882
650
                                             rowset_meta->tablet_id(),
4883
650
                                             rowset_meta->rowset_id_v2()) != 0) {
4884
0
                return -1;
4885
0
            }
4886
3.10k
        } else {
4887
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4888
3.10k
            rowset_keys.emplace_back(k);
4889
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4890
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4891
3.10k
                ++num_empty_rowset;
4892
3.10k
            }
4893
3.10k
        }
4894
3.75k
        return 0;
4895
3.75k
    };
4896
4897
49
    auto loop_done = [&]() -> int {
4898
49
        std::vector<std::string> rowset_keys_to_delete;
4899
        // rowset_id -> rowset_meta
4900
        // store rowset id and meta for statistics rs size when delete
4901
49
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4902
49
        rowset_keys_to_delete.swap(rowset_keys);
4903
49
        rowsets_to_delete.swap(rowsets);
4904
49
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4905
49
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4906
49
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4907
49
                                   metrics_context) != 0) {
4908
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4909
0
                return;
4910
0
            }
4911
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4912
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4913
0
                    return;
4914
0
                }
4915
3.10k
            }
4916
49
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4917
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4918
0
                return;
4919
0
            }
4920
49
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4921
49
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4905
7
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4906
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4907
7
                                   metrics_context) != 0) {
4908
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4909
0
                return;
4910
0
            }
4911
7
            for (const auto& [_, rs] : rowsets_to_delete) {
4912
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4913
0
                    return;
4914
0
                }
4915
0
            }
4916
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4917
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4918
0
                return;
4919
0
            }
4920
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4921
7
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4905
42
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4906
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4907
42
                                   metrics_context) != 0) {
4908
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4909
0
                return;
4910
0
            }
4911
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4912
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4913
0
                    return;
4914
0
                }
4915
3.10k
            }
4916
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4917
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4918
0
                return;
4919
0
            }
4920
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4921
42
        });
4922
49
        return 0;
4923
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4897
7
    auto loop_done = [&]() -> int {
4898
7
        std::vector<std::string> rowset_keys_to_delete;
4899
        // rowset_id -> rowset_meta
4900
        // store rowset id and meta for statistics rs size when delete
4901
7
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4902
7
        rowset_keys_to_delete.swap(rowset_keys);
4903
7
        rowsets_to_delete.swap(rowsets);
4904
7
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4905
7
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4906
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4907
7
                                   metrics_context) != 0) {
4908
7
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4909
7
                return;
4910
7
            }
4911
7
            for (const auto& [_, rs] : rowsets_to_delete) {
4912
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4913
7
                    return;
4914
7
                }
4915
7
            }
4916
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4917
7
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4918
7
                return;
4919
7
            }
4920
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4921
7
        });
4922
7
        return 0;
4923
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4897
42
    auto loop_done = [&]() -> int {
4898
42
        std::vector<std::string> rowset_keys_to_delete;
4899
        // rowset_id -> rowset_meta
4900
        // store rowset id and meta for statistics rs size when delete
4901
42
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4902
42
        rowset_keys_to_delete.swap(rowset_keys);
4903
42
        rowsets_to_delete.swap(rowsets);
4904
42
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4905
42
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4906
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4907
42
                                   metrics_context) != 0) {
4908
42
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4909
42
                return;
4910
42
            }
4911
42
            for (const auto& [_, rs] : rowsets_to_delete) {
4912
42
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4913
42
                    return;
4914
42
                }
4915
42
            }
4916
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4917
42
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4918
42
                return;
4919
42
            }
4920
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4921
42
        });
4922
42
        return 0;
4923
42
    };
4924
4925
22
    if (config::enable_recycler_stats_metrics) {
4926
0
        scan_and_statistics_rowsets();
4927
0
    }
4928
    // recycle_func and loop_done for scan and recycle
4929
22
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4930
22
                               std::move(loop_done));
4931
4932
22
    worker_pool->stop();
4933
4934
22
    if (!async_recycled_rowset_keys.empty()) {
4935
5
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
4936
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4937
0
            return -1;
4938
5
        } else {
4939
5
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
4940
5
        }
4941
5
    }
4942
4943
    // Report final metrics after all concurrent tasks completed
4944
22
    segment_metrics_context_.report();
4945
22
    metrics_context.report();
4946
4947
22
    return ret;
4948
22
}
4949
4950
13
int InstanceRecycler::recycle_restore_jobs() {
4951
13
    const std::string task_name = "recycle_restore_jobs";
4952
13
    int64_t num_scanned = 0;
4953
13
    int64_t num_expired = 0;
4954
13
    int64_t num_recycled = 0;
4955
13
    int64_t num_aborted = 0;
4956
4957
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4958
4959
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
4960
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
4961
13
    std::string restore_job_key0;
4962
13
    std::string restore_job_key1;
4963
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
4964
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
4965
4966
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
4967
4968
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4969
13
    register_recycle_task(task_name, start_time);
4970
4971
13
    DORIS_CLOUD_DEFER {
4972
13
        unregister_recycle_task(task_name);
4973
13
        int64_t cost =
4974
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4975
13
        metrics_context.finish_report();
4976
4977
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4978
13
                .tag("instance_id", instance_id_)
4979
13
                .tag("num_scanned", num_scanned)
4980
13
                .tag("num_expired", num_expired)
4981
13
                .tag("num_recycled", num_recycled)
4982
13
                .tag("num_aborted", num_aborted);
4983
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
4971
13
    DORIS_CLOUD_DEFER {
4972
13
        unregister_recycle_task(task_name);
4973
13
        int64_t cost =
4974
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4975
13
        metrics_context.finish_report();
4976
4977
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4978
13
                .tag("instance_id", instance_id_)
4979
13
                .tag("num_scanned", num_scanned)
4980
13
                .tag("num_expired", num_expired)
4981
13
                .tag("num_recycled", num_recycled)
4982
13
                .tag("num_aborted", num_aborted);
4983
13
    };
4984
4985
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4986
4987
13
    std::vector<std::string_view> restore_job_keys;
4988
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4989
41
        ++num_scanned;
4990
41
        RestoreJobCloudPB restore_job_pb;
4991
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4992
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4993
0
            return -1;
4994
0
        }
4995
41
        int64_t expiration =
4996
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4997
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4998
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4999
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5000
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5001
0
                   << " state=" << restore_job_pb.state();
5002
41
        int64_t current_time = ::time(nullptr);
5003
41
        if (current_time < expiration) { // not expired
5004
0
            return 0;
5005
0
        }
5006
41
        ++num_expired;
5007
5008
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5009
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5010
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5011
5012
41
        std::unique_ptr<Transaction> txn;
5013
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5014
41
        if (err != TxnErrorCode::TXN_OK) {
5015
0
            LOG_WARNING("failed to recycle restore job")
5016
0
                    .tag("err", err)
5017
0
                    .tag("tablet id", tablet_id)
5018
0
                    .tag("instance_id", instance_id_)
5019
0
                    .tag("reason", "failed to create txn");
5020
0
            return -1;
5021
0
        }
5022
5023
41
        std::string val;
5024
41
        err = txn->get(k, &val);
5025
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5026
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5027
0
            return 0;
5028
0
        }
5029
41
        if (err != TxnErrorCode::TXN_OK) {
5030
0
            LOG_WARNING("failed to get kv");
5031
0
            return -1;
5032
0
        }
5033
41
        restore_job_pb.Clear();
5034
41
        if (!restore_job_pb.ParseFromString(val)) {
5035
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5036
0
            return -1;
5037
0
        }
5038
5039
        // PREPARED or COMMITTED, change state to DROPPED and return
5040
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5041
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5042
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5043
0
            restore_job_pb.set_need_recycle_data(true);
5044
0
            txn->put(k, restore_job_pb.SerializeAsString());
5045
0
            err = txn->commit();
5046
0
            if (err != TxnErrorCode::TXN_OK) {
5047
0
                LOG_WARNING("failed to commit txn: {}", err);
5048
0
                return -1;
5049
0
            }
5050
0
            num_aborted++;
5051
0
            return 0;
5052
0
        }
5053
5054
        // Change state to RECYCLING
5055
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5056
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5057
21
            txn->put(k, restore_job_pb.SerializeAsString());
5058
21
            err = txn->commit();
5059
21
            if (err != TxnErrorCode::TXN_OK) {
5060
0
                LOG_WARNING("failed to commit txn: {}", err);
5061
0
                return -1;
5062
0
            }
5063
21
            return 0;
5064
21
        }
5065
5066
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5067
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5068
5069
        // Recycle all data associated with the restore job.
5070
        // This includes rowsets, segments, and related resources.
5071
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5072
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5073
0
            LOG_WARNING("failed to recycle tablet")
5074
0
                    .tag("tablet_id", tablet_id)
5075
0
                    .tag("instance_id", instance_id_);
5076
0
            return -1;
5077
0
        }
5078
5079
        // delete all restore job rowset kv
5080
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5081
5082
20
        err = txn->commit();
5083
20
        if (err != TxnErrorCode::TXN_OK) {
5084
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5085
0
                    .tag("err", err)
5086
0
                    .tag("tablet id", tablet_id)
5087
0
                    .tag("instance_id", instance_id_)
5088
0
                    .tag("reason", "failed to commit txn");
5089
0
            return -1;
5090
0
        }
5091
5092
20
        metrics_context.total_recycled_num = ++num_recycled;
5093
20
        metrics_context.report();
5094
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5095
20
        restore_job_keys.push_back(k);
5096
5097
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5098
20
                  << " tablet_id=" << tablet_id;
5099
20
        return 0;
5100
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4988
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4989
41
        ++num_scanned;
4990
41
        RestoreJobCloudPB restore_job_pb;
4991
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4992
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4993
0
            return -1;
4994
0
        }
4995
41
        int64_t expiration =
4996
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4997
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4998
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4999
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5000
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5001
0
                   << " state=" << restore_job_pb.state();
5002
41
        int64_t current_time = ::time(nullptr);
5003
41
        if (current_time < expiration) { // not expired
5004
0
            return 0;
5005
0
        }
5006
41
        ++num_expired;
5007
5008
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5009
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5010
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5011
5012
41
        std::unique_ptr<Transaction> txn;
5013
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5014
41
        if (err != TxnErrorCode::TXN_OK) {
5015
0
            LOG_WARNING("failed to recycle restore job")
5016
0
                    .tag("err", err)
5017
0
                    .tag("tablet id", tablet_id)
5018
0
                    .tag("instance_id", instance_id_)
5019
0
                    .tag("reason", "failed to create txn");
5020
0
            return -1;
5021
0
        }
5022
5023
41
        std::string val;
5024
41
        err = txn->get(k, &val);
5025
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5026
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5027
0
            return 0;
5028
0
        }
5029
41
        if (err != TxnErrorCode::TXN_OK) {
5030
0
            LOG_WARNING("failed to get kv");
5031
0
            return -1;
5032
0
        }
5033
41
        restore_job_pb.Clear();
5034
41
        if (!restore_job_pb.ParseFromString(val)) {
5035
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5036
0
            return -1;
5037
0
        }
5038
5039
        // PREPARED or COMMITTED, change state to DROPPED and return
5040
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5041
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5042
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5043
0
            restore_job_pb.set_need_recycle_data(true);
5044
0
            txn->put(k, restore_job_pb.SerializeAsString());
5045
0
            err = txn->commit();
5046
0
            if (err != TxnErrorCode::TXN_OK) {
5047
0
                LOG_WARNING("failed to commit txn: {}", err);
5048
0
                return -1;
5049
0
            }
5050
0
            num_aborted++;
5051
0
            return 0;
5052
0
        }
5053
5054
        // Change state to RECYCLING
5055
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5056
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5057
21
            txn->put(k, restore_job_pb.SerializeAsString());
5058
21
            err = txn->commit();
5059
21
            if (err != TxnErrorCode::TXN_OK) {
5060
0
                LOG_WARNING("failed to commit txn: {}", err);
5061
0
                return -1;
5062
0
            }
5063
21
            return 0;
5064
21
        }
5065
5066
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5067
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5068
5069
        // Recycle all data associated with the restore job.
5070
        // This includes rowsets, segments, and related resources.
5071
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5072
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5073
0
            LOG_WARNING("failed to recycle tablet")
5074
0
                    .tag("tablet_id", tablet_id)
5075
0
                    .tag("instance_id", instance_id_);
5076
0
            return -1;
5077
0
        }
5078
5079
        // delete all restore job rowset kv
5080
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5081
5082
20
        err = txn->commit();
5083
20
        if (err != TxnErrorCode::TXN_OK) {
5084
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5085
0
                    .tag("err", err)
5086
0
                    .tag("tablet id", tablet_id)
5087
0
                    .tag("instance_id", instance_id_)
5088
0
                    .tag("reason", "failed to commit txn");
5089
0
            return -1;
5090
0
        }
5091
5092
20
        metrics_context.total_recycled_num = ++num_recycled;
5093
20
        metrics_context.report();
5094
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5095
20
        restore_job_keys.push_back(k);
5096
5097
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5098
20
                  << " tablet_id=" << tablet_id;
5099
20
        return 0;
5100
20
    };
5101
5102
13
    auto loop_done = [&restore_job_keys, this]() -> int {
5103
3
        if (restore_job_keys.empty()) return 0;
5104
1
        DORIS_CLOUD_DEFER {
5105
1
            restore_job_keys.clear();
5106
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5104
1
        DORIS_CLOUD_DEFER {
5105
1
            restore_job_keys.clear();
5106
1
        };
5107
5108
1
        std::unique_ptr<Transaction> txn;
5109
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5110
1
        if (err != TxnErrorCode::TXN_OK) {
5111
0
            LOG_WARNING("failed to recycle restore job")
5112
0
                    .tag("err", err)
5113
0
                    .tag("instance_id", instance_id_)
5114
0
                    .tag("reason", "failed to create txn");
5115
0
            return -1;
5116
0
        }
5117
20
        for (auto& k : restore_job_keys) {
5118
20
            txn->remove(k);
5119
20
        }
5120
1
        err = txn->commit();
5121
1
        if (err != TxnErrorCode::TXN_OK) {
5122
0
            LOG_WARNING("failed to recycle restore job")
5123
0
                    .tag("err", err)
5124
0
                    .tag("instance_id", instance_id_)
5125
0
                    .tag("reason", "failed to commit txn");
5126
0
            return -1;
5127
0
        }
5128
1
        return 0;
5129
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
5102
3
    auto loop_done = [&restore_job_keys, this]() -> int {
5103
3
        if (restore_job_keys.empty()) return 0;
5104
1
        DORIS_CLOUD_DEFER {
5105
1
            restore_job_keys.clear();
5106
1
        };
5107
5108
1
        std::unique_ptr<Transaction> txn;
5109
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5110
1
        if (err != TxnErrorCode::TXN_OK) {
5111
0
            LOG_WARNING("failed to recycle restore job")
5112
0
                    .tag("err", err)
5113
0
                    .tag("instance_id", instance_id_)
5114
0
                    .tag("reason", "failed to create txn");
5115
0
            return -1;
5116
0
        }
5117
20
        for (auto& k : restore_job_keys) {
5118
20
            txn->remove(k);
5119
20
        }
5120
1
        err = txn->commit();
5121
1
        if (err != TxnErrorCode::TXN_OK) {
5122
0
            LOG_WARNING("failed to recycle restore job")
5123
0
                    .tag("err", err)
5124
0
                    .tag("instance_id", instance_id_)
5125
0
                    .tag("reason", "failed to commit txn");
5126
0
            return -1;
5127
0
        }
5128
1
        return 0;
5129
1
    };
5130
5131
13
    if (config::enable_recycler_stats_metrics) {
5132
0
        scan_and_statistics_restore_jobs();
5133
0
    }
5134
5135
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
5136
13
                            std::move(loop_done));
5137
13
}
5138
5139
10
int InstanceRecycler::recycle_versioned_rowsets() {
5140
10
    const std::string task_name = "recycle_rowsets";
5141
10
    int64_t num_scanned = 0;
5142
10
    int64_t num_expired = 0;
5143
10
    int64_t num_prepare = 0;
5144
10
    int64_t num_compacted = 0;
5145
10
    int64_t num_empty_rowset = 0;
5146
10
    size_t total_rowset_key_size = 0;
5147
10
    size_t total_rowset_value_size = 0;
5148
10
    size_t expired_rowset_size = 0;
5149
10
    std::atomic_long num_recycled = 0;
5150
10
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5151
5152
10
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
5153
10
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
5154
10
    std::string recyc_rs_key0;
5155
10
    std::string recyc_rs_key1;
5156
10
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
5157
10
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
5158
5159
10
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
5160
5161
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5162
10
    register_recycle_task(task_name, start_time);
5163
5164
10
    DORIS_CLOUD_DEFER {
5165
10
        unregister_recycle_task(task_name);
5166
10
        int64_t cost =
5167
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5168
10
        metrics_context.finish_report();
5169
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5170
10
                .tag("instance_id", instance_id_)
5171
10
                .tag("num_scanned", num_scanned)
5172
10
                .tag("num_expired", num_expired)
5173
10
                .tag("num_recycled", num_recycled)
5174
10
                .tag("num_recycled.prepare", num_prepare)
5175
10
                .tag("num_recycled.compacted", num_compacted)
5176
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5177
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5178
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5179
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5180
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
5164
10
    DORIS_CLOUD_DEFER {
5165
10
        unregister_recycle_task(task_name);
5166
10
        int64_t cost =
5167
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5168
10
        metrics_context.finish_report();
5169
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5170
10
                .tag("instance_id", instance_id_)
5171
10
                .tag("num_scanned", num_scanned)
5172
10
                .tag("num_expired", num_expired)
5173
10
                .tag("num_recycled", num_recycled)
5174
10
                .tag("num_recycled.prepare", num_prepare)
5175
10
                .tag("num_recycled.compacted", num_compacted)
5176
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5177
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5178
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5179
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5180
10
    };
5181
5182
10
    std::vector<std::string> orphan_rowset_keys;
5183
5184
    // Store keys of rowset recycled by background workers
5185
10
    std::mutex async_recycled_rowset_keys_mutex;
5186
10
    std::vector<std::string> async_recycled_rowset_keys;
5187
10
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5188
10
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
5189
10
    worker_pool->start();
5190
10
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
5191
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5192
        // Try to delete rowset data in background thread
5193
400
        int ret = worker_pool->submit_with_timeout(
5194
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5195
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5196
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5197
400
                        return;
5198
400
                    }
5199
                    // The async recycled rowsets are staled format or has not been used,
5200
                    // so we don't need to check the rowset ref count key.
5201
0
                    std::vector<std::string> keys;
5202
0
                    {
5203
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5204
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5205
0
                        if (async_recycled_rowset_keys.size() > 100) {
5206
0
                            keys.swap(async_recycled_rowset_keys);
5207
0
                        }
5208
0
                    }
5209
0
                    if (keys.empty()) return;
5210
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5211
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5212
0
                                     << instance_id_;
5213
0
                    } else {
5214
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5215
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5216
0
                                           num_recycled, start_time);
5217
0
                    }
5218
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5194
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5195
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5196
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5197
400
                        return;
5198
400
                    }
5199
                    // The async recycled rowsets are staled format or has not been used,
5200
                    // so we don't need to check the rowset ref count key.
5201
0
                    std::vector<std::string> keys;
5202
0
                    {
5203
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5204
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5205
0
                        if (async_recycled_rowset_keys.size() > 100) {
5206
0
                            keys.swap(async_recycled_rowset_keys);
5207
0
                        }
5208
0
                    }
5209
0
                    if (keys.empty()) return;
5210
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5211
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5212
0
                                     << instance_id_;
5213
0
                    } else {
5214
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5215
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5216
0
                                           num_recycled, start_time);
5217
0
                    }
5218
0
                },
5219
400
                0);
5220
400
        if (ret == 0) return 0;
5221
        // Submit task failed, delete rowset data in current thread
5222
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5223
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5224
0
            return -1;
5225
0
        }
5226
0
        orphan_rowset_keys.push_back(std::move(key));
5227
0
        return 0;
5228
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5191
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5192
        // Try to delete rowset data in background thread
5193
400
        int ret = worker_pool->submit_with_timeout(
5194
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5195
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5196
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5197
400
                        return;
5198
400
                    }
5199
                    // The async recycled rowsets are staled format or has not been used,
5200
                    // so we don't need to check the rowset ref count key.
5201
400
                    std::vector<std::string> keys;
5202
400
                    {
5203
400
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5204
400
                        async_recycled_rowset_keys.push_back(std::move(key));
5205
400
                        if (async_recycled_rowset_keys.size() > 100) {
5206
400
                            keys.swap(async_recycled_rowset_keys);
5207
400
                        }
5208
400
                    }
5209
400
                    if (keys.empty()) return;
5210
400
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5211
400
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5212
400
                                     << instance_id_;
5213
400
                    } else {
5214
400
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5215
400
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5216
400
                                           num_recycled, start_time);
5217
400
                    }
5218
400
                },
5219
400
                0);
5220
400
        if (ret == 0) return 0;
5221
        // Submit task failed, delete rowset data in current thread
5222
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5223
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5224
0
            return -1;
5225
0
        }
5226
0
        orphan_rowset_keys.push_back(std::move(key));
5227
0
        return 0;
5228
0
    };
5229
5230
10
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5231
5232
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5233
2.01k
        ++num_scanned;
5234
2.01k
        total_rowset_key_size += k.size();
5235
2.01k
        total_rowset_value_size += v.size();
5236
2.01k
        RecycleRowsetPB rowset;
5237
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5238
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5239
0
            return -1;
5240
0
        }
5241
5242
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5243
5244
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5245
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5246
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5247
2.01k
        int64_t current_time = ::time(nullptr);
5248
2.01k
        if (current_time < final_expiration) { // not expired
5249
0
            return 0;
5250
0
        }
5251
2.01k
        ++num_expired;
5252
2.01k
        expired_rowset_size += v.size();
5253
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5254
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5255
                // in old version, keep this key-value pair and it needs to be checked manually
5256
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5257
0
                return -1;
5258
0
            }
5259
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5260
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5261
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5262
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5263
0
                orphan_rowset_keys.emplace_back(k);
5264
0
                return -1;
5265
0
            }
5266
            // decode rowset_id
5267
0
            auto k1 = k;
5268
0
            k1.remove_prefix(1);
5269
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5270
0
            decode_key(&k1, &out);
5271
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5272
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5273
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5274
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5275
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5276
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5277
0
                return -1;
5278
0
            }
5279
0
            return 0;
5280
0
        }
5281
        // TODO(plat1ko): check rowset not referenced
5282
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5283
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5284
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5285
0
                LOG_INFO("recycle rowset that has empty resource id");
5286
0
            } else {
5287
                // other situations, keep this key-value pair and it needs to be checked manually
5288
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5289
0
                return -1;
5290
0
            }
5291
0
        }
5292
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5293
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5294
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5295
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5296
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5297
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5298
2.01k
                  << " rowset_meta_size=" << v.size()
5299
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5300
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5301
            // unable to calculate file path, can only be deleted by rowset id prefix
5302
400
            num_prepare += 1;
5303
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5304
400
                                             rowset_meta->tablet_id(),
5305
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5306
0
                return -1;
5307
0
            }
5308
1.61k
        } else {
5309
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5310
1.61k
            worker_pool->submit(
5311
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5312
                        // The load & compact rowset keys are recycled during recycling operation logs.
5313
1.61k
                        RowsetDeleteTask task;
5314
1.61k
                        task.rowset_meta = rowset_meta;
5315
1.61k
                        task.recycle_rowset_key = k;
5316
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5317
1.59k
                            return;
5318
1.59k
                        }
5319
14
                        num_compacted += is_compacted;
5320
14
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5321
14
                        if (rowset_meta.num_segments() == 0) {
5322
0
                            ++num_empty_rowset;
5323
0
                        }
5324
14
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
5311
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5312
                        // The load & compact rowset keys are recycled during recycling operation logs.
5313
1.61k
                        RowsetDeleteTask task;
5314
1.61k
                        task.rowset_meta = rowset_meta;
5315
1.61k
                        task.recycle_rowset_key = k;
5316
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5317
1.59k
                            return;
5318
1.59k
                        }
5319
14
                        num_compacted += is_compacted;
5320
14
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5321
14
                        if (rowset_meta.num_segments() == 0) {
5322
0
                            ++num_empty_rowset;
5323
0
                        }
5324
14
                    });
5325
1.61k
        }
5326
2.01k
        return 0;
5327
2.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5232
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5233
2.01k
        ++num_scanned;
5234
2.01k
        total_rowset_key_size += k.size();
5235
2.01k
        total_rowset_value_size += v.size();
5236
2.01k
        RecycleRowsetPB rowset;
5237
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5238
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5239
0
            return -1;
5240
0
        }
5241
5242
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5243
5244
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5245
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5246
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5247
2.01k
        int64_t current_time = ::time(nullptr);
5248
2.01k
        if (current_time < final_expiration) { // not expired
5249
0
            return 0;
5250
0
        }
5251
2.01k
        ++num_expired;
5252
2.01k
        expired_rowset_size += v.size();
5253
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5254
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5255
                // in old version, keep this key-value pair and it needs to be checked manually
5256
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5257
0
                return -1;
5258
0
            }
5259
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5260
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5261
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5262
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5263
0
                orphan_rowset_keys.emplace_back(k);
5264
0
                return -1;
5265
0
            }
5266
            // decode rowset_id
5267
0
            auto k1 = k;
5268
0
            k1.remove_prefix(1);
5269
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5270
0
            decode_key(&k1, &out);
5271
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5272
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5273
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5274
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5275
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5276
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5277
0
                return -1;
5278
0
            }
5279
0
            return 0;
5280
0
        }
5281
        // TODO(plat1ko): check rowset not referenced
5282
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5283
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5284
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5285
0
                LOG_INFO("recycle rowset that has empty resource id");
5286
0
            } else {
5287
                // other situations, keep this key-value pair and it needs to be checked manually
5288
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5289
0
                return -1;
5290
0
            }
5291
0
        }
5292
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5293
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5294
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5295
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5296
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5297
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5298
2.01k
                  << " rowset_meta_size=" << v.size()
5299
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5300
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5301
            // unable to calculate file path, can only be deleted by rowset id prefix
5302
400
            num_prepare += 1;
5303
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5304
400
                                             rowset_meta->tablet_id(),
5305
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5306
0
                return -1;
5307
0
            }
5308
1.61k
        } else {
5309
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5310
1.61k
            worker_pool->submit(
5311
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5312
                        // The load & compact rowset keys are recycled during recycling operation logs.
5313
1.61k
                        RowsetDeleteTask task;
5314
1.61k
                        task.rowset_meta = rowset_meta;
5315
1.61k
                        task.recycle_rowset_key = k;
5316
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5317
1.61k
                            return;
5318
1.61k
                        }
5319
1.61k
                        num_compacted += is_compacted;
5320
1.61k
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5321
1.61k
                        if (rowset_meta.num_segments() == 0) {
5322
1.61k
                            ++num_empty_rowset;
5323
1.61k
                        }
5324
1.61k
                    });
5325
1.61k
        }
5326
2.01k
        return 0;
5327
2.01k
    };
5328
5329
10
    if (config::enable_recycler_stats_metrics) {
5330
0
        scan_and_statistics_rowsets();
5331
0
    }
5332
5333
10
    auto loop_done = [&]() -> int {
5334
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5335
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5336
0
        }
5337
6
        orphan_rowset_keys.clear();
5338
6
        return 0;
5339
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
5333
6
    auto loop_done = [&]() -> int {
5334
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5335
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5336
0
        }
5337
6
        orphan_rowset_keys.clear();
5338
6
        return 0;
5339
6
    };
5340
5341
    // recycle_func and loop_done for scan and recycle
5342
10
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5343
10
                               std::move(loop_done));
5344
5345
10
    worker_pool->stop();
5346
5347
10
    if (!async_recycled_rowset_keys.empty()) {
5348
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5349
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5350
0
            return -1;
5351
0
        } else {
5352
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5353
0
        }
5354
0
    }
5355
5356
    // Report final metrics after all concurrent tasks completed
5357
10
    segment_metrics_context_.report();
5358
10
    metrics_context.report();
5359
5360
10
    return ret;
5361
10
}
5362
5363
1.61k
int InstanceRecycler::recycle_rowset_meta_and_data(const RowsetDeleteTask& task) {
5364
1.61k
    constexpr int MAX_RETRY = 10;
5365
1.61k
    const RowsetMetaCloudPB& rowset_meta = task.rowset_meta;
5366
1.61k
    int64_t tablet_id = rowset_meta.tablet_id();
5367
1.61k
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
5368
1.61k
    std::string_view reference_instance_id = instance_id_;
5369
1.61k
    if (rowset_meta.has_reference_instance_id()) {
5370
8
        reference_instance_id = rowset_meta.reference_instance_id();
5371
8
    }
5372
5373
1.61k
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
5374
1.61k
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
5375
1.61k
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(task.recycle_rowset_key));
5376
1.61k
    AnnotateTag instance_id_tag("instance_id", instance_id_);
5377
1.61k
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
5378
1.61k
    for (int i = 0; i < MAX_RETRY; ++i) {
5379
1.61k
        std::unique_ptr<Transaction> txn;
5380
1.61k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5381
1.61k
        if (err != TxnErrorCode::TXN_OK) {
5382
0
            LOG_WARNING("failed to create txn").tag("err", err);
5383
0
            return -1;
5384
0
        }
5385
5386
1.61k
        std::string rowset_ref_count_key =
5387
1.61k
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
5388
1.61k
        int64_t ref_count = 0;
5389
1.61k
        {
5390
1.61k
            std::string value;
5391
1.61k
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
5392
1.61k
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5393
                // This is the old version rowset, we could recycle it directly.
5394
1.60k
                ref_count = 1;
5395
1.60k
            } else if (err != TxnErrorCode::TXN_OK) {
5396
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
5397
0
                return -1;
5398
11
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
5399
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
5400
0
                return -1;
5401
0
            }
5402
1.61k
        }
5403
5404
1.61k
        if (ref_count == 1) {
5405
            // It would not be added since it is recycling.
5406
1.61k
            if (delete_rowset_data(rowset_meta) != 0) {
5407
1.60k
                LOG_WARNING("failed to delete rowset data");
5408
1.60k
                return -1;
5409
1.60k
            }
5410
5411
            // Reset the transaction to avoid timeout.
5412
10
            err = txn_kv_->create_txn(&txn);
5413
10
            if (err != TxnErrorCode::TXN_OK) {
5414
0
                LOG_WARNING("failed to create txn").tag("err", err);
5415
0
                return -1;
5416
0
            }
5417
10
            txn->remove(rowset_ref_count_key);
5418
10
            LOG_INFO("delete rowset data ref count key")
5419
10
                    .tag("txn_id", rowset_meta.txn_id())
5420
10
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5421
5422
10
            std::string dbm_start_key =
5423
10
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
5424
10
            std::string dbm_end_key = meta_delete_bitmap_key(
5425
10
                    {reference_instance_id, tablet_id, rowset_id,
5426
10
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
5427
10
            txn->remove(dbm_start_key, dbm_end_key);
5428
10
            LOG_INFO("remove delete bitmap kv")
5429
10
                    .tag("begin", hex(dbm_start_key))
5430
10
                    .tag("end", hex(dbm_end_key));
5431
5432
10
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
5433
10
                    {reference_instance_id, tablet_id, rowset_id});
5434
10
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
5435
10
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
5436
10
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5437
10
            LOG_INFO("remove versioned delete bitmap kv")
5438
10
                    .tag("begin", hex(versioned_dbm_start_key))
5439
10
                    .tag("end", hex(versioned_dbm_end_key));
5440
10
        } else {
5441
            // Decrease the rowset ref count.
5442
            //
5443
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
5444
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
5445
3
            txn->atomic_add(rowset_ref_count_key, -1);
5446
3
            LOG_INFO("decrease rowset data ref count")
5447
3
                    .tag("txn_id", rowset_meta.txn_id())
5448
3
                    .tag("ref_count", ref_count - 1)
5449
3
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5450
3
        }
5451
5452
13
        if (!task.versioned_rowset_key.empty()) {
5453
0
            versioned::document_remove<RowsetMetaCloudPB>(txn.get(), task.versioned_rowset_key,
5454
0
                                                          task.versionstamp);
5455
0
            LOG_INFO("remove versioned meta rowset key").tag("key", hex(task.versioned_rowset_key));
5456
0
        }
5457
5458
13
        if (!task.non_versioned_rowset_key.empty()) {
5459
0
            txn->remove(task.non_versioned_rowset_key);
5460
0
            LOG_INFO("remove non versioned rowset key")
5461
0
                    .tag("key", hex(task.non_versioned_rowset_key));
5462
0
        }
5463
5464
        // empty when recycle ref rowsets for deleted instance
5465
13
        if (!task.recycle_rowset_key.empty()) {
5466
13
            txn->remove(task.recycle_rowset_key);
5467
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(task.recycle_rowset_key));
5468
13
        }
5469
5470
13
        err = txn->commit();
5471
13
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
5472
            // The rowset ref count key has been changed, we need to retry.
5473
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
5474
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5475
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
5476
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5477
0
            continue;
5478
13
        } else if (err != TxnErrorCode::TXN_OK) {
5479
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
5480
0
            return -1;
5481
0
        }
5482
13
        LOG_INFO("recycle rowset meta and data success");
5483
13
        return 0;
5484
13
    }
5485
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
5486
0
            .tag("tablet_id", tablet_id)
5487
0
            .tag("rowset_id", rowset_id)
5488
0
            .tag("retry", MAX_RETRY);
5489
0
    return -1;
5490
1.61k
}
5491
5492
39
int InstanceRecycler::recycle_tmp_rowsets() {
5493
39
    const std::string task_name = "recycle_tmp_rowsets";
5494
39
    int64_t num_scanned = 0;
5495
39
    int64_t num_expired = 0;
5496
39
    std::atomic_long num_recycled = 0;
5497
39
    size_t expired_rowset_size = 0;
5498
39
    size_t total_rowset_key_size = 0;
5499
39
    size_t total_rowset_value_size = 0;
5500
39
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5501
5502
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5503
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5504
39
    std::string tmp_rs_key0;
5505
39
    std::string tmp_rs_key1;
5506
39
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5507
39
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5508
5509
39
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
5510
5511
39
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5512
39
    register_recycle_task(task_name, start_time);
5513
5514
39
    DORIS_CLOUD_DEFER {
5515
39
        unregister_recycle_task(task_name);
5516
39
        int64_t cost =
5517
39
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5518
39
        metrics_context.finish_report();
5519
39
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5520
39
                .tag("instance_id", instance_id_)
5521
39
                .tag("num_scanned", num_scanned)
5522
39
                .tag("num_expired", num_expired)
5523
39
                .tag("num_recycled", num_recycled)
5524
39
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5525
39
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5526
39
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5527
39
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5514
12
    DORIS_CLOUD_DEFER {
5515
12
        unregister_recycle_task(task_name);
5516
12
        int64_t cost =
5517
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5518
12
        metrics_context.finish_report();
5519
12
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5520
12
                .tag("instance_id", instance_id_)
5521
12
                .tag("num_scanned", num_scanned)
5522
12
                .tag("num_expired", num_expired)
5523
12
                .tag("num_recycled", num_recycled)
5524
12
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5525
12
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5526
12
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5527
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5514
27
    DORIS_CLOUD_DEFER {
5515
27
        unregister_recycle_task(task_name);
5516
27
        int64_t cost =
5517
27
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5518
27
        metrics_context.finish_report();
5519
27
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5520
27
                .tag("instance_id", instance_id_)
5521
27
                .tag("num_scanned", num_scanned)
5522
27
                .tag("num_expired", num_expired)
5523
27
                .tag("num_recycled", num_recycled)
5524
27
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5525
27
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5526
27
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5527
27
    };
5528
5529
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
5530
5531
39
    std::vector<std::string> tmp_rowset_keys;
5532
39
    std::vector<std::string> tmp_rowset_ref_count_keys;
5533
5534
    // rowset_id -> rowset_meta
5535
    // store tmp_rowset id and meta for statistics rs size when delete
5536
39
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
5537
39
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5538
39
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
5539
39
    worker_pool->start();
5540
5541
39
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5542
5543
39
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
5544
39
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
5545
39
                             &earlest_ts, &tmp_rowset_ref_count_keys, this,
5546
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5547
106k
        ++num_scanned;
5548
106k
        total_rowset_key_size += k.size();
5549
106k
        total_rowset_value_size += v.size();
5550
106k
        doris::RowsetMetaCloudPB rowset;
5551
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5552
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5553
0
            return -1;
5554
0
        }
5555
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5556
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5557
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5558
0
                   << " txn_expiration=" << rowset.txn_expiration()
5559
0
                   << " rowset_creation_time=" << rowset.creation_time();
5560
106k
        int64_t current_time = ::time(nullptr);
5561
106k
        if (current_time < expiration) { // not expired
5562
0
            return 0;
5563
0
        }
5564
5565
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5566
106k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5567
106k
            if (mark_ret == -1) {
5568
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5569
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5570
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5571
0
                return -1;
5572
106k
            } else if (mark_ret == 1) {
5573
52.0k
                LOG(INFO)
5574
52.0k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5575
52.0k
                           "next turn, instance_id="
5576
52.0k
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5577
52.0k
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5578
52.0k
                return 0;
5579
52.0k
            }
5580
106k
        }
5581
5582
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5583
54.0k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5584
54.0k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5585
54.0k
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5586
5587
54.0k
            int ret = abort_txn_or_job_for_recycle(rowset);
5588
54.0k
            if (ret != 0) {
5589
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5590
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5591
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5592
0
                return ret;
5593
0
            }
5594
54.0k
        }
5595
5596
54.0k
        ++num_expired;
5597
54.0k
        expired_rowset_size += v.size();
5598
54.0k
        if (!rowset.has_resource_id()) {
5599
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5600
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5601
0
                return -1;
5602
0
            }
5603
            // might be a delete pred rowset
5604
0
            tmp_rowset_keys.emplace_back(k);
5605
0
            return 0;
5606
0
        }
5607
        // TODO(plat1ko): check rowset not referenced
5608
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5609
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5610
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5611
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5612
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5613
54.0k
                  << " num_expired=" << num_expired
5614
54.0k
                  << " task_type=" << metrics_context.operation_type;
5615
5616
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5617
        // Remove the rowset ref count key directly since it has not been used.
5618
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5619
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5620
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5621
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5622
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5623
5624
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5625
54.0k
        return 0;
5626
54.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5546
16
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5547
16
        ++num_scanned;
5548
16
        total_rowset_key_size += k.size();
5549
16
        total_rowset_value_size += v.size();
5550
16
        doris::RowsetMetaCloudPB rowset;
5551
16
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5552
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5553
0
            return -1;
5554
0
        }
5555
16
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5556
16
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5557
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5558
0
                   << " txn_expiration=" << rowset.txn_expiration()
5559
0
                   << " rowset_creation_time=" << rowset.creation_time();
5560
16
        int64_t current_time = ::time(nullptr);
5561
16
        if (current_time < expiration) { // not expired
5562
0
            return 0;
5563
0
        }
5564
5565
16
        if (config::enable_mark_delete_rowset_before_recycle) {
5566
16
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5567
16
            if (mark_ret == -1) {
5568
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5569
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5570
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5571
0
                return -1;
5572
16
            } else if (mark_ret == 1) {
5573
9
                LOG(INFO)
5574
9
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5575
9
                           "next turn, instance_id="
5576
9
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5577
9
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5578
9
                return 0;
5579
9
            }
5580
16
        }
5581
5582
7
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5583
7
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5584
7
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5585
7
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5586
5587
7
            int ret = abort_txn_or_job_for_recycle(rowset);
5588
7
            if (ret != 0) {
5589
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5590
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5591
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5592
0
                return ret;
5593
0
            }
5594
7
        }
5595
5596
7
        ++num_expired;
5597
7
        expired_rowset_size += v.size();
5598
7
        if (!rowset.has_resource_id()) {
5599
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5600
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5601
0
                return -1;
5602
0
            }
5603
            // might be a delete pred rowset
5604
0
            tmp_rowset_keys.emplace_back(k);
5605
0
            return 0;
5606
0
        }
5607
        // TODO(plat1ko): check rowset not referenced
5608
7
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5609
7
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5610
7
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5611
7
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5612
7
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5613
7
                  << " num_expired=" << num_expired
5614
7
                  << " task_type=" << metrics_context.operation_type;
5615
5616
7
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5617
        // Remove the rowset ref count key directly since it has not been used.
5618
7
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5619
7
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5620
7
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5621
7
                  << "key=" << hex(rowset_ref_count_key);
5622
7
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5623
5624
7
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5625
7
        return 0;
5626
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5546
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5547
106k
        ++num_scanned;
5548
106k
        total_rowset_key_size += k.size();
5549
106k
        total_rowset_value_size += v.size();
5550
106k
        doris::RowsetMetaCloudPB rowset;
5551
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5552
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5553
0
            return -1;
5554
0
        }
5555
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5556
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5557
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5558
0
                   << " txn_expiration=" << rowset.txn_expiration()
5559
0
                   << " rowset_creation_time=" << rowset.creation_time();
5560
106k
        int64_t current_time = ::time(nullptr);
5561
106k
        if (current_time < expiration) { // not expired
5562
0
            return 0;
5563
0
        }
5564
5565
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5566
106k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5567
106k
            if (mark_ret == -1) {
5568
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5569
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5570
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5571
0
                return -1;
5572
106k
            } else if (mark_ret == 1) {
5573
52.0k
                LOG(INFO)
5574
52.0k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5575
52.0k
                           "next turn, instance_id="
5576
52.0k
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5577
52.0k
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5578
52.0k
                return 0;
5579
52.0k
            }
5580
106k
        }
5581
5582
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5583
54.0k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5584
54.0k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5585
54.0k
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5586
5587
54.0k
            int ret = abort_txn_or_job_for_recycle(rowset);
5588
54.0k
            if (ret != 0) {
5589
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5590
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5591
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5592
0
                return ret;
5593
0
            }
5594
54.0k
        }
5595
5596
54.0k
        ++num_expired;
5597
54.0k
        expired_rowset_size += v.size();
5598
54.0k
        if (!rowset.has_resource_id()) {
5599
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5600
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5601
0
                return -1;
5602
0
            }
5603
            // might be a delete pred rowset
5604
0
            tmp_rowset_keys.emplace_back(k);
5605
0
            return 0;
5606
0
        }
5607
        // TODO(plat1ko): check rowset not referenced
5608
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5609
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5610
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5611
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5612
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5613
54.0k
                  << " num_expired=" << num_expired
5614
54.0k
                  << " task_type=" << metrics_context.operation_type;
5615
5616
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5617
        // Remove the rowset ref count key directly since it has not been used.
5618
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5619
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5620
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5621
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5622
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5623
5624
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5625
54.0k
        return 0;
5626
54.0k
    };
5627
5628
    // TODO bacth delete
5629
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5630
51.0k
        std::string dbm_start_key =
5631
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5632
51.0k
        std::string dbm_end_key = dbm_start_key;
5633
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5634
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5635
51.0k
        if (ret != 0) {
5636
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5637
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5638
0
                         << ", rowset_id=" << rowset_id;
5639
0
        }
5640
51.0k
        return ret;
5641
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5629
7
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5630
7
        std::string dbm_start_key =
5631
7
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5632
7
        std::string dbm_end_key = dbm_start_key;
5633
7
        encode_int64(INT64_MAX, &dbm_end_key);
5634
7
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5635
7
        if (ret != 0) {
5636
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5637
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5638
0
                         << ", rowset_id=" << rowset_id;
5639
0
        }
5640
7
        return ret;
5641
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5629
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5630
51.0k
        std::string dbm_start_key =
5631
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5632
51.0k
        std::string dbm_end_key = dbm_start_key;
5633
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5634
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5635
51.0k
        if (ret != 0) {
5636
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5637
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5638
0
                         << ", rowset_id=" << rowset_id;
5639
0
        }
5640
51.0k
        return ret;
5641
51.0k
    };
5642
5643
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5644
51.0k
        auto delete_bitmap_start =
5645
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5646
51.0k
        auto delete_bitmap_end =
5647
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5648
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5649
51.0k
        if (ret != 0) {
5650
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5651
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5652
0
        }
5653
51.0k
        return ret;
5654
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5643
7
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5644
7
        auto delete_bitmap_start =
5645
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5646
7
        auto delete_bitmap_end =
5647
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5648
7
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5649
7
        if (ret != 0) {
5650
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5651
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5652
0
        }
5653
7
        return ret;
5654
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5643
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5644
51.0k
        auto delete_bitmap_start =
5645
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5646
51.0k
        auto delete_bitmap_end =
5647
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5648
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5649
51.0k
        if (ret != 0) {
5650
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5651
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5652
0
        }
5653
51.0k
        return ret;
5654
51.0k
    };
5655
5656
39
    auto loop_done = [&]() -> int {
5657
32
        DORIS_CLOUD_DEFER {
5658
32
            tmp_rowset_keys.clear();
5659
32
            tmp_rowsets.clear();
5660
32
            tmp_rowset_ref_count_keys.clear();
5661
32
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5657
12
        DORIS_CLOUD_DEFER {
5658
12
            tmp_rowset_keys.clear();
5659
12
            tmp_rowsets.clear();
5660
12
            tmp_rowset_ref_count_keys.clear();
5661
12
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5657
20
        DORIS_CLOUD_DEFER {
5658
20
            tmp_rowset_keys.clear();
5659
20
            tmp_rowsets.clear();
5660
20
            tmp_rowset_ref_count_keys.clear();
5661
20
        };
5662
32
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5663
32
                             tmp_rowsets_to_delete = tmp_rowsets,
5664
32
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5665
32
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5666
32
                                   metrics_context) != 0) {
5667
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5668
3
                return;
5669
3
            }
5670
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5671
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5672
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5673
0
                                 << rs.ShortDebugString();
5674
0
                    return;
5675
0
                }
5676
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5677
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5678
0
                                 << rs.ShortDebugString();
5679
0
                    return;
5680
0
                }
5681
51.0k
            }
5682
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5683
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5684
0
                return;
5685
0
            }
5686
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5687
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5688
0
                return;
5689
0
            }
5690
29
            num_recycled += tmp_rowset_keys.size();
5691
29
            return;
5692
29
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5664
12
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5665
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5666
12
                                   metrics_context) != 0) {
5667
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5668
0
                return;
5669
0
            }
5670
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5671
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5672
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5673
0
                                 << rs.ShortDebugString();
5674
0
                    return;
5675
0
                }
5676
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5677
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5678
0
                                 << rs.ShortDebugString();
5679
0
                    return;
5680
0
                }
5681
7
            }
5682
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5683
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5684
0
                return;
5685
0
            }
5686
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5687
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5688
0
                return;
5689
0
            }
5690
12
            num_recycled += tmp_rowset_keys.size();
5691
12
            return;
5692
12
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5664
20
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5665
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5666
20
                                   metrics_context) != 0) {
5667
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5668
3
                return;
5669
3
            }
5670
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5671
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5672
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5673
0
                                 << rs.ShortDebugString();
5674
0
                    return;
5675
0
                }
5676
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5677
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5678
0
                                 << rs.ShortDebugString();
5679
0
                    return;
5680
0
                }
5681
51.0k
            }
5682
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5683
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5684
0
                return;
5685
0
            }
5686
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5687
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5688
0
                return;
5689
0
            }
5690
17
            num_recycled += tmp_rowset_keys.size();
5691
17
            return;
5692
17
        });
5693
32
        return 0;
5694
32
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5656
12
    auto loop_done = [&]() -> int {
5657
12
        DORIS_CLOUD_DEFER {
5658
12
            tmp_rowset_keys.clear();
5659
12
            tmp_rowsets.clear();
5660
12
            tmp_rowset_ref_count_keys.clear();
5661
12
        };
5662
12
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5663
12
                             tmp_rowsets_to_delete = tmp_rowsets,
5664
12
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5665
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5666
12
                                   metrics_context) != 0) {
5667
12
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5668
12
                return;
5669
12
            }
5670
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5671
12
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5672
12
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5673
12
                                 << rs.ShortDebugString();
5674
12
                    return;
5675
12
                }
5676
12
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5677
12
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5678
12
                                 << rs.ShortDebugString();
5679
12
                    return;
5680
12
                }
5681
12
            }
5682
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5683
12
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5684
12
                return;
5685
12
            }
5686
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5687
12
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5688
12
                return;
5689
12
            }
5690
12
            num_recycled += tmp_rowset_keys.size();
5691
12
            return;
5692
12
        });
5693
12
        return 0;
5694
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5656
20
    auto loop_done = [&]() -> int {
5657
20
        DORIS_CLOUD_DEFER {
5658
20
            tmp_rowset_keys.clear();
5659
20
            tmp_rowsets.clear();
5660
20
            tmp_rowset_ref_count_keys.clear();
5661
20
        };
5662
20
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5663
20
                             tmp_rowsets_to_delete = tmp_rowsets,
5664
20
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5665
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5666
20
                                   metrics_context) != 0) {
5667
20
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5668
20
                return;
5669
20
            }
5670
20
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5671
20
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5672
20
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5673
20
                                 << rs.ShortDebugString();
5674
20
                    return;
5675
20
                }
5676
20
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5677
20
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5678
20
                                 << rs.ShortDebugString();
5679
20
                    return;
5680
20
                }
5681
20
            }
5682
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5683
20
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5684
20
                return;
5685
20
            }
5686
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5687
20
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5688
20
                return;
5689
20
            }
5690
20
            num_recycled += tmp_rowset_keys.size();
5691
20
            return;
5692
20
        });
5693
20
        return 0;
5694
20
    };
5695
5696
39
    if (config::enable_recycler_stats_metrics) {
5697
0
        scan_and_statistics_tmp_rowsets();
5698
0
    }
5699
    // recycle_func and loop_done for scan and recycle
5700
39
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
5701
39
                               std::move(loop_done));
5702
5703
39
    worker_pool->stop();
5704
5705
    // Report final metrics after all concurrent tasks completed
5706
39
    segment_metrics_context_.report();
5707
39
    metrics_context.report();
5708
5709
39
    return ret;
5710
39
}
5711
5712
int InstanceRecycler::scan_and_recycle(
5713
        std::string begin, std::string_view end,
5714
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
5715
268
        std::function<int()> loop_done) {
5716
268
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
5717
268
    int ret = 0;
5718
268
    int64_t cnt = 0;
5719
268
    int get_range_retried = 0;
5720
268
    std::string err;
5721
268
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5722
268
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5723
268
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5724
268
                  << " ret=" << ret << " err=" << err;
5725
268
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5721
31
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5722
31
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5723
31
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5724
31
                  << " ret=" << ret << " err=" << err;
5725
31
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5721
237
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5722
237
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5723
237
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5724
237
                  << " ret=" << ret << " err=" << err;
5725
237
    };
5726
5727
268
    std::unique_ptr<RangeGetIterator> it;
5728
321
    do {
5729
321
        if (get_range_retried > 1000) {
5730
0
            err = "txn_get exceeds max retry, may not scan all keys";
5731
0
            ret = -1;
5732
0
            return -1;
5733
0
        }
5734
321
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
5735
321
        if (get_ret != 0) { // txn kv may complain "Request for future version"
5736
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
5737
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
5738
0
                         << " get_range_retried=" << get_range_retried;
5739
0
            ++get_range_retried;
5740
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5741
0
            continue; // try again
5742
0
        }
5743
321
        if (!it->has_next()) {
5744
140
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
5745
140
            break; // scan finished
5746
140
        }
5747
154k
        while (it->has_next()) {
5748
154k
            ++cnt;
5749
            // recycle corresponding resources
5750
154k
            auto [k, v] = it->next();
5751
154k
            if (!it->has_next()) {
5752
181
                begin = k;
5753
181
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
5754
181
            }
5755
            // if we want to continue scanning, the recycle_func should not return non-zero
5756
154k
            if (recycle_func(k, v) != 0) {
5757
4.00k
                err = "recycle_func error";
5758
4.00k
                ret = -1;
5759
4.00k
            }
5760
154k
        }
5761
181
        begin.push_back('\x00'); // Update to next smallest key for iteration
5762
        // if we want to continue scanning, the recycle_func should not return non-zero
5763
181
        if (loop_done && loop_done() != 0) {
5764
4
            err = "loop_done error";
5765
4
            ret = -1;
5766
4
        }
5767
181
    } while (it->more() && !stopped());
5768
268
    return ret;
5769
268
}
5770
5771
19
int InstanceRecycler::abort_timeout_txn() {
5772
19
    const std::string task_name = "abort_timeout_txn";
5773
19
    int64_t num_scanned = 0;
5774
19
    int64_t num_timeout = 0;
5775
19
    int64_t num_abort = 0;
5776
19
    int64_t num_advance = 0;
5777
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5778
5779
19
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
5780
19
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5781
19
    std::string begin_txn_running_key;
5782
19
    std::string end_txn_running_key;
5783
19
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
5784
19
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
5785
5786
19
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
5787
5788
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5789
19
    register_recycle_task(task_name, start_time);
5790
5791
19
    DORIS_CLOUD_DEFER {
5792
19
        unregister_recycle_task(task_name);
5793
19
        int64_t cost =
5794
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5795
19
        metrics_context.finish_report();
5796
19
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5797
19
                .tag("instance_id", instance_id_)
5798
19
                .tag("num_scanned", num_scanned)
5799
19
                .tag("num_timeout", num_timeout)
5800
19
                .tag("num_abort", num_abort)
5801
19
                .tag("num_advance", num_advance);
5802
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5791
3
    DORIS_CLOUD_DEFER {
5792
3
        unregister_recycle_task(task_name);
5793
3
        int64_t cost =
5794
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5795
3
        metrics_context.finish_report();
5796
3
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5797
3
                .tag("instance_id", instance_id_)
5798
3
                .tag("num_scanned", num_scanned)
5799
3
                .tag("num_timeout", num_timeout)
5800
3
                .tag("num_abort", num_abort)
5801
3
                .tag("num_advance", num_advance);
5802
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5791
16
    DORIS_CLOUD_DEFER {
5792
16
        unregister_recycle_task(task_name);
5793
16
        int64_t cost =
5794
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5795
16
        metrics_context.finish_report();
5796
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5797
16
                .tag("instance_id", instance_id_)
5798
16
                .tag("num_scanned", num_scanned)
5799
16
                .tag("num_timeout", num_timeout)
5800
16
                .tag("num_abort", num_abort)
5801
16
                .tag("num_advance", num_advance);
5802
16
    };
5803
5804
19
    int64_t current_time =
5805
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5806
5807
19
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
5808
19
                                  &current_time, &metrics_context,
5809
19
                                  this](std::string_view k, std::string_view v) -> int {
5810
9
        ++num_scanned;
5811
5812
9
        std::unique_ptr<Transaction> txn;
5813
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5814
9
        if (err != TxnErrorCode::TXN_OK) {
5815
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5816
0
            return -1;
5817
0
        }
5818
9
        std::string_view k1 = k;
5819
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5820
9
        k1.remove_prefix(1); // Remove key space
5821
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5822
9
        if (decode_key(&k1, &out) != 0) {
5823
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5824
0
            return -1;
5825
0
        }
5826
9
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5827
9
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5828
9
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5829
        // Update txn_info
5830
9
        std::string txn_inf_key, txn_inf_val;
5831
9
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5832
9
        err = txn->get(txn_inf_key, &txn_inf_val);
5833
9
        if (err != TxnErrorCode::TXN_OK) {
5834
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5835
0
            return -1;
5836
0
        }
5837
9
        TxnInfoPB txn_info;
5838
9
        if (!txn_info.ParseFromString(txn_inf_val)) {
5839
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5840
0
            return -1;
5841
0
        }
5842
5843
9
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5844
3
            txn.reset();
5845
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5846
3
            std::shared_ptr<TxnLazyCommitTask> task =
5847
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5848
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5849
3
            if (ret.first != MetaServiceCode::OK) {
5850
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5851
0
                             << "msg=" << ret.second;
5852
0
                return -1;
5853
0
            }
5854
3
            ++num_advance;
5855
3
            return 0;
5856
6
        } else {
5857
6
            TxnRunningPB txn_running_pb;
5858
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5859
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5860
0
                return -1;
5861
0
            }
5862
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5863
4
                return 0;
5864
4
            }
5865
2
            ++num_timeout;
5866
5867
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5868
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5869
2
            txn_info.set_finish_time(current_time);
5870
2
            txn_info.set_reason("timeout");
5871
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5872
2
            txn_inf_val.clear();
5873
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5874
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5875
0
                return -1;
5876
0
            }
5877
2
            txn->put(txn_inf_key, txn_inf_val);
5878
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5879
            // Put recycle txn key
5880
2
            std::string recyc_txn_key, recyc_txn_val;
5881
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5882
2
            RecycleTxnPB recycle_txn_pb;
5883
2
            recycle_txn_pb.set_creation_time(current_time);
5884
2
            recycle_txn_pb.set_label(txn_info.label());
5885
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5886
0
                LOG_WARNING("failed to serialize txn recycle info")
5887
0
                        .tag("key", hex(k))
5888
0
                        .tag("db_id", db_id)
5889
0
                        .tag("txn_id", txn_id);
5890
0
                return -1;
5891
0
            }
5892
2
            txn->put(recyc_txn_key, recyc_txn_val);
5893
            // Remove txn running key
5894
2
            txn->remove(k);
5895
2
            err = txn->commit();
5896
2
            if (err != TxnErrorCode::TXN_OK) {
5897
0
                LOG_WARNING("failed to commit txn err={}", err)
5898
0
                        .tag("key", hex(k))
5899
0
                        .tag("db_id", db_id)
5900
0
                        .tag("txn_id", txn_id);
5901
0
                return -1;
5902
0
            }
5903
2
            metrics_context.total_recycled_num = ++num_abort;
5904
2
            metrics_context.report();
5905
2
        }
5906
5907
2
        return 0;
5908
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5809
3
                                  this](std::string_view k, std::string_view v) -> int {
5810
3
        ++num_scanned;
5811
5812
3
        std::unique_ptr<Transaction> txn;
5813
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5814
3
        if (err != TxnErrorCode::TXN_OK) {
5815
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5816
0
            return -1;
5817
0
        }
5818
3
        std::string_view k1 = k;
5819
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5820
3
        k1.remove_prefix(1); // Remove key space
5821
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5822
3
        if (decode_key(&k1, &out) != 0) {
5823
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5824
0
            return -1;
5825
0
        }
5826
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5827
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5828
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5829
        // Update txn_info
5830
3
        std::string txn_inf_key, txn_inf_val;
5831
3
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5832
3
        err = txn->get(txn_inf_key, &txn_inf_val);
5833
3
        if (err != TxnErrorCode::TXN_OK) {
5834
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5835
0
            return -1;
5836
0
        }
5837
3
        TxnInfoPB txn_info;
5838
3
        if (!txn_info.ParseFromString(txn_inf_val)) {
5839
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5840
0
            return -1;
5841
0
        }
5842
5843
3
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5844
3
            txn.reset();
5845
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5846
3
            std::shared_ptr<TxnLazyCommitTask> task =
5847
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5848
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5849
3
            if (ret.first != MetaServiceCode::OK) {
5850
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5851
0
                             << "msg=" << ret.second;
5852
0
                return -1;
5853
0
            }
5854
3
            ++num_advance;
5855
3
            return 0;
5856
3
        } else {
5857
0
            TxnRunningPB txn_running_pb;
5858
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5859
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5860
0
                return -1;
5861
0
            }
5862
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5863
0
                return 0;
5864
0
            }
5865
0
            ++num_timeout;
5866
5867
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5868
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5869
0
            txn_info.set_finish_time(current_time);
5870
0
            txn_info.set_reason("timeout");
5871
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5872
0
            txn_inf_val.clear();
5873
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5874
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5875
0
                return -1;
5876
0
            }
5877
0
            txn->put(txn_inf_key, txn_inf_val);
5878
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5879
            // Put recycle txn key
5880
0
            std::string recyc_txn_key, recyc_txn_val;
5881
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5882
0
            RecycleTxnPB recycle_txn_pb;
5883
0
            recycle_txn_pb.set_creation_time(current_time);
5884
0
            recycle_txn_pb.set_label(txn_info.label());
5885
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5886
0
                LOG_WARNING("failed to serialize txn recycle info")
5887
0
                        .tag("key", hex(k))
5888
0
                        .tag("db_id", db_id)
5889
0
                        .tag("txn_id", txn_id);
5890
0
                return -1;
5891
0
            }
5892
0
            txn->put(recyc_txn_key, recyc_txn_val);
5893
            // Remove txn running key
5894
0
            txn->remove(k);
5895
0
            err = txn->commit();
5896
0
            if (err != TxnErrorCode::TXN_OK) {
5897
0
                LOG_WARNING("failed to commit txn err={}", err)
5898
0
                        .tag("key", hex(k))
5899
0
                        .tag("db_id", db_id)
5900
0
                        .tag("txn_id", txn_id);
5901
0
                return -1;
5902
0
            }
5903
0
            metrics_context.total_recycled_num = ++num_abort;
5904
0
            metrics_context.report();
5905
0
        }
5906
5907
0
        return 0;
5908
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5809
6
                                  this](std::string_view k, std::string_view v) -> int {
5810
6
        ++num_scanned;
5811
5812
6
        std::unique_ptr<Transaction> txn;
5813
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5814
6
        if (err != TxnErrorCode::TXN_OK) {
5815
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5816
0
            return -1;
5817
0
        }
5818
6
        std::string_view k1 = k;
5819
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5820
6
        k1.remove_prefix(1); // Remove key space
5821
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5822
6
        if (decode_key(&k1, &out) != 0) {
5823
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5824
0
            return -1;
5825
0
        }
5826
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5827
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5828
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5829
        // Update txn_info
5830
6
        std::string txn_inf_key, txn_inf_val;
5831
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5832
6
        err = txn->get(txn_inf_key, &txn_inf_val);
5833
6
        if (err != TxnErrorCode::TXN_OK) {
5834
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5835
0
            return -1;
5836
0
        }
5837
6
        TxnInfoPB txn_info;
5838
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
5839
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5840
0
            return -1;
5841
0
        }
5842
5843
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5844
0
            txn.reset();
5845
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5846
0
            std::shared_ptr<TxnLazyCommitTask> task =
5847
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5848
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5849
0
            if (ret.first != MetaServiceCode::OK) {
5850
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5851
0
                             << "msg=" << ret.second;
5852
0
                return -1;
5853
0
            }
5854
0
            ++num_advance;
5855
0
            return 0;
5856
6
        } else {
5857
6
            TxnRunningPB txn_running_pb;
5858
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5859
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5860
0
                return -1;
5861
0
            }
5862
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5863
4
                return 0;
5864
4
            }
5865
2
            ++num_timeout;
5866
5867
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5868
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5869
2
            txn_info.set_finish_time(current_time);
5870
2
            txn_info.set_reason("timeout");
5871
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5872
2
            txn_inf_val.clear();
5873
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5874
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5875
0
                return -1;
5876
0
            }
5877
2
            txn->put(txn_inf_key, txn_inf_val);
5878
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5879
            // Put recycle txn key
5880
2
            std::string recyc_txn_key, recyc_txn_val;
5881
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5882
2
            RecycleTxnPB recycle_txn_pb;
5883
2
            recycle_txn_pb.set_creation_time(current_time);
5884
2
            recycle_txn_pb.set_label(txn_info.label());
5885
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5886
0
                LOG_WARNING("failed to serialize txn recycle info")
5887
0
                        .tag("key", hex(k))
5888
0
                        .tag("db_id", db_id)
5889
0
                        .tag("txn_id", txn_id);
5890
0
                return -1;
5891
0
            }
5892
2
            txn->put(recyc_txn_key, recyc_txn_val);
5893
            // Remove txn running key
5894
2
            txn->remove(k);
5895
2
            err = txn->commit();
5896
2
            if (err != TxnErrorCode::TXN_OK) {
5897
0
                LOG_WARNING("failed to commit txn err={}", err)
5898
0
                        .tag("key", hex(k))
5899
0
                        .tag("db_id", db_id)
5900
0
                        .tag("txn_id", txn_id);
5901
0
                return -1;
5902
0
            }
5903
2
            metrics_context.total_recycled_num = ++num_abort;
5904
2
            metrics_context.report();
5905
2
        }
5906
5907
2
        return 0;
5908
6
    };
5909
5910
19
    if (config::enable_recycler_stats_metrics) {
5911
0
        scan_and_statistics_abort_timeout_txn();
5912
0
    }
5913
    // recycle_func and loop_done for scan and recycle
5914
19
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
5915
19
                            std::move(handle_txn_running_kv));
5916
19
}
5917
5918
19
int InstanceRecycler::recycle_expired_txn_label() {
5919
19
    const std::string task_name = "recycle_expired_txn_label";
5920
19
    int64_t num_scanned = 0;
5921
19
    int64_t num_expired = 0;
5922
19
    std::atomic_long num_recycled = 0;
5923
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5924
19
    int ret = 0;
5925
5926
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
5927
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5928
19
    std::string begin_recycle_txn_key;
5929
19
    std::string end_recycle_txn_key;
5930
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
5931
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
5932
19
    std::vector<std::string> recycle_txn_info_keys;
5933
5934
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
5935
5936
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5937
19
    register_recycle_task(task_name, start_time);
5938
19
    DORIS_CLOUD_DEFER {
5939
19
        unregister_recycle_task(task_name);
5940
19
        int64_t cost =
5941
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5942
19
        metrics_context.finish_report();
5943
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5944
19
                .tag("instance_id", instance_id_)
5945
19
                .tag("num_scanned", num_scanned)
5946
19
                .tag("num_expired", num_expired)
5947
19
                .tag("num_recycled", num_recycled);
5948
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5938
1
    DORIS_CLOUD_DEFER {
5939
1
        unregister_recycle_task(task_name);
5940
1
        int64_t cost =
5941
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5942
1
        metrics_context.finish_report();
5943
1
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5944
1
                .tag("instance_id", instance_id_)
5945
1
                .tag("num_scanned", num_scanned)
5946
1
                .tag("num_expired", num_expired)
5947
1
                .tag("num_recycled", num_recycled);
5948
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5938
18
    DORIS_CLOUD_DEFER {
5939
18
        unregister_recycle_task(task_name);
5940
18
        int64_t cost =
5941
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5942
18
        metrics_context.finish_report();
5943
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5944
18
                .tag("instance_id", instance_id_)
5945
18
                .tag("num_scanned", num_scanned)
5946
18
                .tag("num_expired", num_expired)
5947
18
                .tag("num_recycled", num_recycled);
5948
18
    };
5949
5950
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5951
5952
19
    SyncExecutor<int> concurrent_delete_executor(
5953
19
            _thread_pool_group.s3_producer_pool,
5954
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
5955
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5955
1
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5955
23.0k
            [](const int& ret) { return ret != 0; });
5956
5957
19
    int64_t current_time_ms =
5958
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5959
5960
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5961
30.0k
        ++num_scanned;
5962
30.0k
        RecycleTxnPB recycle_txn_pb;
5963
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5964
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5965
0
            return -1;
5966
0
        }
5967
30.0k
        if ((config::force_immediate_recycle) ||
5968
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5969
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5970
30.0k
             current_time_ms)) {
5971
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5972
23.0k
            num_expired++;
5973
23.0k
            recycle_txn_info_keys.emplace_back(k);
5974
23.0k
        }
5975
30.0k
        return 0;
5976
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5960
1
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5961
1
        ++num_scanned;
5962
1
        RecycleTxnPB recycle_txn_pb;
5963
1
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5964
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5965
0
            return -1;
5966
0
        }
5967
1
        if ((config::force_immediate_recycle) ||
5968
1
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5969
1
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5970
1
             current_time_ms)) {
5971
1
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5972
1
            num_expired++;
5973
1
            recycle_txn_info_keys.emplace_back(k);
5974
1
        }
5975
1
        return 0;
5976
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5960
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5961
30.0k
        ++num_scanned;
5962
30.0k
        RecycleTxnPB recycle_txn_pb;
5963
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5964
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5965
0
            return -1;
5966
0
        }
5967
30.0k
        if ((config::force_immediate_recycle) ||
5968
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5969
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5970
30.0k
             current_time_ms)) {
5971
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5972
23.0k
            num_expired++;
5973
23.0k
            recycle_txn_info_keys.emplace_back(k);
5974
23.0k
        }
5975
30.0k
        return 0;
5976
30.0k
    };
5977
5978
    // int 0 for success, 1 for conflict, -1 for error
5979
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5980
23.0k
        std::string_view k1 = k;
5981
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5982
23.0k
        k1.remove_prefix(1); // Remove key space
5983
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5984
23.0k
        int ret = decode_key(&k1, &out);
5985
23.0k
        if (ret != 0) {
5986
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5987
0
            return -1;
5988
0
        }
5989
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5990
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5991
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5992
23.0k
        std::unique_ptr<Transaction> txn;
5993
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5994
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5995
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5996
0
            return -1;
5997
0
        }
5998
        // Remove txn index kv
5999
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6000
23.0k
        txn->remove(index_key);
6001
        // Remove txn info kv
6002
23.0k
        std::string info_key, info_val;
6003
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6004
23.0k
        err = txn->get(info_key, &info_val);
6005
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6006
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6007
0
            return -1;
6008
0
        }
6009
23.0k
        TxnInfoPB txn_info;
6010
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6011
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6012
0
            return -1;
6013
0
        }
6014
23.0k
        txn->remove(info_key);
6015
        // Remove sub txn index kvs
6016
23.0k
        std::vector<std::string> sub_txn_index_keys;
6017
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6018
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6019
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6020
22.9k
        }
6021
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6022
22.9k
            txn->remove(sub_txn_index_key);
6023
22.9k
        }
6024
        // Update txn label
6025
23.0k
        std::string label_key, label_val;
6026
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6027
23.0k
        err = txn->get(label_key, &label_val);
6028
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6029
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6030
0
                         << " err=" << err;
6031
0
            return -1;
6032
0
        }
6033
23.0k
        TxnLabelPB txn_label;
6034
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6035
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6036
0
            return -1;
6037
0
        }
6038
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6039
23.0k
        if (it != txn_label.txn_ids().end()) {
6040
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6041
23.0k
        }
6042
23.0k
        if (txn_label.txn_ids().empty()) {
6043
23.0k
            txn->remove(label_key);
6044
23.0k
            TEST_SYNC_POINT_CALLBACK(
6045
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6046
23.0k
        } else {
6047
73
            if (!txn_label.SerializeToString(&label_val)) {
6048
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6049
0
                return -1;
6050
0
            }
6051
73
            TEST_SYNC_POINT_CALLBACK(
6052
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6053
73
            txn->atomic_set_ver_value(label_key, label_val);
6054
73
            TEST_SYNC_POINT_CALLBACK(
6055
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6056
73
        }
6057
        // Remove recycle txn kv
6058
23.0k
        txn->remove(k);
6059
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6060
23.0k
        err = txn->commit();
6061
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6062
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6063
62
                TEST_SYNC_POINT_CALLBACK(
6064
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6065
                // log the txn_id and label
6066
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6067
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6068
62
                             << " txn_label=" << txn_info.label();
6069
62
                return 1;
6070
62
            }
6071
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6072
0
            return -1;
6073
62
        }
6074
23.0k
        ++num_recycled;
6075
6076
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6077
23.0k
        return 0;
6078
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5979
1
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5980
1
        std::string_view k1 = k;
5981
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5982
1
        k1.remove_prefix(1); // Remove key space
5983
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5984
1
        int ret = decode_key(&k1, &out);
5985
1
        if (ret != 0) {
5986
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5987
0
            return -1;
5988
0
        }
5989
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5990
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5991
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5992
1
        std::unique_ptr<Transaction> txn;
5993
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5994
1
        if (err != TxnErrorCode::TXN_OK) {
5995
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5996
0
            return -1;
5997
0
        }
5998
        // Remove txn index kv
5999
1
        auto index_key = txn_index_key({instance_id_, txn_id});
6000
1
        txn->remove(index_key);
6001
        // Remove txn info kv
6002
1
        std::string info_key, info_val;
6003
1
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6004
1
        err = txn->get(info_key, &info_val);
6005
1
        if (err != TxnErrorCode::TXN_OK) {
6006
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6007
0
            return -1;
6008
0
        }
6009
1
        TxnInfoPB txn_info;
6010
1
        if (!txn_info.ParseFromString(info_val)) {
6011
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6012
0
            return -1;
6013
0
        }
6014
1
        txn->remove(info_key);
6015
        // Remove sub txn index kvs
6016
1
        std::vector<std::string> sub_txn_index_keys;
6017
1
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6018
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6019
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
6020
0
        }
6021
1
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6022
0
            txn->remove(sub_txn_index_key);
6023
0
        }
6024
        // Update txn label
6025
1
        std::string label_key, label_val;
6026
1
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6027
1
        err = txn->get(label_key, &label_val);
6028
1
        if (err != TxnErrorCode::TXN_OK) {
6029
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6030
0
                         << " err=" << err;
6031
0
            return -1;
6032
0
        }
6033
1
        TxnLabelPB txn_label;
6034
1
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6035
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6036
0
            return -1;
6037
0
        }
6038
1
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6039
1
        if (it != txn_label.txn_ids().end()) {
6040
1
            txn_label.mutable_txn_ids()->erase(it);
6041
1
        }
6042
1
        if (txn_label.txn_ids().empty()) {
6043
1
            txn->remove(label_key);
6044
1
            TEST_SYNC_POINT_CALLBACK(
6045
1
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6046
1
        } else {
6047
0
            if (!txn_label.SerializeToString(&label_val)) {
6048
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6049
0
                return -1;
6050
0
            }
6051
0
            TEST_SYNC_POINT_CALLBACK(
6052
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6053
0
            txn->atomic_set_ver_value(label_key, label_val);
6054
0
            TEST_SYNC_POINT_CALLBACK(
6055
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6056
0
        }
6057
        // Remove recycle txn kv
6058
1
        txn->remove(k);
6059
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6060
1
        err = txn->commit();
6061
1
        if (err != TxnErrorCode::TXN_OK) {
6062
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
6063
0
                TEST_SYNC_POINT_CALLBACK(
6064
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6065
                // log the txn_id and label
6066
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6067
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6068
0
                             << " txn_label=" << txn_info.label();
6069
0
                return 1;
6070
0
            }
6071
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6072
0
            return -1;
6073
0
        }
6074
1
        ++num_recycled;
6075
6076
1
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6077
1
        return 0;
6078
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5979
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5980
23.0k
        std::string_view k1 = k;
5981
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5982
23.0k
        k1.remove_prefix(1); // Remove key space
5983
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5984
23.0k
        int ret = decode_key(&k1, &out);
5985
23.0k
        if (ret != 0) {
5986
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5987
0
            return -1;
5988
0
        }
5989
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5990
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5991
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5992
23.0k
        std::unique_ptr<Transaction> txn;
5993
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5994
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5995
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5996
0
            return -1;
5997
0
        }
5998
        // Remove txn index kv
5999
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6000
23.0k
        txn->remove(index_key);
6001
        // Remove txn info kv
6002
23.0k
        std::string info_key, info_val;
6003
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6004
23.0k
        err = txn->get(info_key, &info_val);
6005
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6006
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6007
0
            return -1;
6008
0
        }
6009
23.0k
        TxnInfoPB txn_info;
6010
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6011
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6012
0
            return -1;
6013
0
        }
6014
23.0k
        txn->remove(info_key);
6015
        // Remove sub txn index kvs
6016
23.0k
        std::vector<std::string> sub_txn_index_keys;
6017
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6018
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6019
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6020
22.9k
        }
6021
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6022
22.9k
            txn->remove(sub_txn_index_key);
6023
22.9k
        }
6024
        // Update txn label
6025
23.0k
        std::string label_key, label_val;
6026
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6027
23.0k
        err = txn->get(label_key, &label_val);
6028
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6029
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6030
0
                         << " err=" << err;
6031
0
            return -1;
6032
0
        }
6033
23.0k
        TxnLabelPB txn_label;
6034
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6035
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6036
0
            return -1;
6037
0
        }
6038
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6039
23.0k
        if (it != txn_label.txn_ids().end()) {
6040
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6041
23.0k
        }
6042
23.0k
        if (txn_label.txn_ids().empty()) {
6043
23.0k
            txn->remove(label_key);
6044
23.0k
            TEST_SYNC_POINT_CALLBACK(
6045
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6046
23.0k
        } else {
6047
73
            if (!txn_label.SerializeToString(&label_val)) {
6048
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6049
0
                return -1;
6050
0
            }
6051
73
            TEST_SYNC_POINT_CALLBACK(
6052
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6053
73
            txn->atomic_set_ver_value(label_key, label_val);
6054
73
            TEST_SYNC_POINT_CALLBACK(
6055
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6056
73
        }
6057
        // Remove recycle txn kv
6058
23.0k
        txn->remove(k);
6059
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6060
23.0k
        err = txn->commit();
6061
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6062
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6063
62
                TEST_SYNC_POINT_CALLBACK(
6064
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6065
                // log the txn_id and label
6066
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6067
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6068
62
                             << " txn_label=" << txn_info.label();
6069
62
                return 1;
6070
62
            }
6071
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6072
0
            return -1;
6073
62
        }
6074
23.0k
        ++num_recycled;
6075
6076
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6077
23.0k
        return 0;
6078
23.0k
    };
6079
6080
19
    auto loop_done = [&]() -> int {
6081
10
        DORIS_CLOUD_DEFER {
6082
10
            recycle_txn_info_keys.clear();
6083
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6081
1
        DORIS_CLOUD_DEFER {
6082
1
            recycle_txn_info_keys.clear();
6083
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6081
9
        DORIS_CLOUD_DEFER {
6082
9
            recycle_txn_info_keys.clear();
6083
9
        };
6084
10
        TEST_SYNC_POINT_CALLBACK(
6085
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6086
10
                &recycle_txn_info_keys);
6087
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6088
23.0k
            concurrent_delete_executor.add([&]() {
6089
23.0k
                int ret = delete_recycle_txn_kv(k);
6090
23.0k
                if (ret == 1) {
6091
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6092
54
                    for (int i = 1; i <= max_retry; ++i) {
6093
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6094
54
                        ret = delete_recycle_txn_kv(k);
6095
                        // clang-format off
6096
54
                        TEST_SYNC_POINT_CALLBACK(
6097
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6098
                        // clang-format off
6099
54
                        if (ret != 1) {
6100
18
                            break;
6101
18
                        }
6102
                        // random sleep 0-100 ms to retry
6103
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6104
36
                    }
6105
18
                }
6106
23.0k
                if (ret != 0) {
6107
9
                    LOG_WARNING("failed to delete recycle txn kv")
6108
9
                            .tag("instance id", instance_id_)
6109
9
                            .tag("key", hex(k));
6110
9
                    return -1;
6111
9
                }
6112
23.0k
                return 0;
6113
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6088
1
            concurrent_delete_executor.add([&]() {
6089
1
                int ret = delete_recycle_txn_kv(k);
6090
1
                if (ret == 1) {
6091
0
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6092
0
                    for (int i = 1; i <= max_retry; ++i) {
6093
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6094
0
                        ret = delete_recycle_txn_kv(k);
6095
                        // clang-format off
6096
0
                        TEST_SYNC_POINT_CALLBACK(
6097
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6098
                        // clang-format off
6099
0
                        if (ret != 1) {
6100
0
                            break;
6101
0
                        }
6102
                        // random sleep 0-100 ms to retry
6103
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6104
0
                    }
6105
0
                }
6106
1
                if (ret != 0) {
6107
0
                    LOG_WARNING("failed to delete recycle txn kv")
6108
0
                            .tag("instance id", instance_id_)
6109
0
                            .tag("key", hex(k));
6110
0
                    return -1;
6111
0
                }
6112
1
                return 0;
6113
1
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6088
23.0k
            concurrent_delete_executor.add([&]() {
6089
23.0k
                int ret = delete_recycle_txn_kv(k);
6090
23.0k
                if (ret == 1) {
6091
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6092
54
                    for (int i = 1; i <= max_retry; ++i) {
6093
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6094
54
                        ret = delete_recycle_txn_kv(k);
6095
                        // clang-format off
6096
54
                        TEST_SYNC_POINT_CALLBACK(
6097
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6098
                        // clang-format off
6099
54
                        if (ret != 1) {
6100
18
                            break;
6101
18
                        }
6102
                        // random sleep 0-100 ms to retry
6103
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6104
36
                    }
6105
18
                }
6106
23.0k
                if (ret != 0) {
6107
9
                    LOG_WARNING("failed to delete recycle txn kv")
6108
9
                            .tag("instance id", instance_id_)
6109
9
                            .tag("key", hex(k));
6110
9
                    return -1;
6111
9
                }
6112
23.0k
                return 0;
6113
23.0k
            });
6114
23.0k
        }
6115
10
        bool finished = true;
6116
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6117
23.0k
        for (int r : rets) {
6118
23.0k
            if (r != 0) {
6119
9
                ret = -1;
6120
9
            }
6121
23.0k
        }
6122
6123
10
        ret = finished ? ret : -1;
6124
6125
        // Update metrics after all concurrent tasks completed
6126
10
        metrics_context.total_recycled_num = num_recycled.load();
6127
10
        metrics_context.report();
6128
6129
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6130
6131
10
        if (ret != 0) {
6132
3
            LOG_WARNING("recycle txn kv ret!=0")
6133
3
                    .tag("finished", finished)
6134
3
                    .tag("ret", ret)
6135
3
                    .tag("instance_id", instance_id_);
6136
3
            return ret;
6137
3
        }
6138
7
        return ret;
6139
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6080
1
    auto loop_done = [&]() -> int {
6081
1
        DORIS_CLOUD_DEFER {
6082
1
            recycle_txn_info_keys.clear();
6083
1
        };
6084
1
        TEST_SYNC_POINT_CALLBACK(
6085
1
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6086
1
                &recycle_txn_info_keys);
6087
1
        for (const auto& k : recycle_txn_info_keys) {
6088
1
            concurrent_delete_executor.add([&]() {
6089
1
                int ret = delete_recycle_txn_kv(k);
6090
1
                if (ret == 1) {
6091
1
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6092
1
                    for (int i = 1; i <= max_retry; ++i) {
6093
1
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6094
1
                        ret = delete_recycle_txn_kv(k);
6095
                        // clang-format off
6096
1
                        TEST_SYNC_POINT_CALLBACK(
6097
1
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6098
                        // clang-format off
6099
1
                        if (ret != 1) {
6100
1
                            break;
6101
1
                        }
6102
                        // random sleep 0-100 ms to retry
6103
1
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6104
1
                    }
6105
1
                }
6106
1
                if (ret != 0) {
6107
1
                    LOG_WARNING("failed to delete recycle txn kv")
6108
1
                            .tag("instance id", instance_id_)
6109
1
                            .tag("key", hex(k));
6110
1
                    return -1;
6111
1
                }
6112
1
                return 0;
6113
1
            });
6114
1
        }
6115
1
        bool finished = true;
6116
1
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6117
1
        for (int r : rets) {
6118
1
            if (r != 0) {
6119
0
                ret = -1;
6120
0
            }
6121
1
        }
6122
6123
1
        ret = finished ? ret : -1;
6124
6125
        // Update metrics after all concurrent tasks completed
6126
1
        metrics_context.total_recycled_num = num_recycled.load();
6127
1
        metrics_context.report();
6128
6129
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6130
6131
1
        if (ret != 0) {
6132
0
            LOG_WARNING("recycle txn kv ret!=0")
6133
0
                    .tag("finished", finished)
6134
0
                    .tag("ret", ret)
6135
0
                    .tag("instance_id", instance_id_);
6136
0
            return ret;
6137
0
        }
6138
1
        return ret;
6139
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6080
9
    auto loop_done = [&]() -> int {
6081
9
        DORIS_CLOUD_DEFER {
6082
9
            recycle_txn_info_keys.clear();
6083
9
        };
6084
9
        TEST_SYNC_POINT_CALLBACK(
6085
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6086
9
                &recycle_txn_info_keys);
6087
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6088
23.0k
            concurrent_delete_executor.add([&]() {
6089
23.0k
                int ret = delete_recycle_txn_kv(k);
6090
23.0k
                if (ret == 1) {
6091
23.0k
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6092
23.0k
                    for (int i = 1; i <= max_retry; ++i) {
6093
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6094
23.0k
                        ret = delete_recycle_txn_kv(k);
6095
                        // clang-format off
6096
23.0k
                        TEST_SYNC_POINT_CALLBACK(
6097
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6098
                        // clang-format off
6099
23.0k
                        if (ret != 1) {
6100
23.0k
                            break;
6101
23.0k
                        }
6102
                        // random sleep 0-100 ms to retry
6103
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6104
23.0k
                    }
6105
23.0k
                }
6106
23.0k
                if (ret != 0) {
6107
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
6108
23.0k
                            .tag("instance id", instance_id_)
6109
23.0k
                            .tag("key", hex(k));
6110
23.0k
                    return -1;
6111
23.0k
                }
6112
23.0k
                return 0;
6113
23.0k
            });
6114
23.0k
        }
6115
9
        bool finished = true;
6116
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6117
23.0k
        for (int r : rets) {
6118
23.0k
            if (r != 0) {
6119
9
                ret = -1;
6120
9
            }
6121
23.0k
        }
6122
6123
9
        ret = finished ? ret : -1;
6124
6125
        // Update metrics after all concurrent tasks completed
6126
9
        metrics_context.total_recycled_num = num_recycled.load();
6127
9
        metrics_context.report();
6128
6129
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6130
6131
9
        if (ret != 0) {
6132
3
            LOG_WARNING("recycle txn kv ret!=0")
6133
3
                    .tag("finished", finished)
6134
3
                    .tag("ret", ret)
6135
3
                    .tag("instance_id", instance_id_);
6136
3
            return ret;
6137
3
        }
6138
6
        return ret;
6139
9
    };
6140
6141
19
    if (config::enable_recycler_stats_metrics) {
6142
0
        scan_and_statistics_expired_txn_label();
6143
0
    }
6144
    // recycle_func and loop_done for scan and recycle
6145
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
6146
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
6147
19
}
6148
6149
struct CopyJobIdTuple {
6150
    std::string instance_id;
6151
    std::string stage_id;
6152
    long table_id;
6153
    std::string copy_id;
6154
    std::string stage_path;
6155
};
6156
struct BatchObjStoreAccessor {
6157
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
6158
                          TxnKv* txn_kv)
6159
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
6160
3
    ~BatchObjStoreAccessor() {
6161
3
        if (!paths_.empty()) {
6162
3
            consume();
6163
3
        }
6164
3
    }
6165
6166
    /**
6167
    * To implicitely do batch work and submit the batch delete task to s3
6168
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
6169
    *
6170
    * @param copy_job The protubuf struct consists of the copy job files.
6171
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
6172
    *            it would last until we finish the delete task, here we need pass one string value
6173
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
6174
    */
6175
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
6176
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
6177
5
        auto& file_keys = copy_file_keys_[key];
6178
5
        file_keys.log_trace =
6179
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
6180
5
                            instance_id, stage_id, table_id, copy_id, path);
6181
5
        std::string_view log_trace = file_keys.log_trace;
6182
2.03k
        for (const auto& file : copy_job.object_files()) {
6183
2.03k
            auto relative_path = file.relative_path();
6184
2.03k
            paths_.push_back(relative_path);
6185
2.03k
            file_keys.keys.push_back(copy_file_key(
6186
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
6187
2.03k
            LOG_INFO(log_trace)
6188
2.03k
                    .tag("relative_path", relative_path)
6189
2.03k
                    .tag("batch_count", batch_count_);
6190
2.03k
        }
6191
5
        LOG_INFO(log_trace)
6192
5
                .tag("objects_num", copy_job.object_files().size())
6193
5
                .tag("batch_count", batch_count_);
6194
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
6195
        // recommend using delete objects when objects num is less than 10)
6196
5
        if (paths_.size() < 1000) {
6197
3
            return;
6198
3
        }
6199
2
        consume();
6200
2
    }
6201
6202
private:
6203
5
    void consume() {
6204
5
        DORIS_CLOUD_DEFER {
6205
5
            paths_.clear();
6206
5
            copy_file_keys_.clear();
6207
5
            batch_count_++;
6208
6209
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
6210
5
                        batch_count_);
6211
5
        };
6212
6213
5
        StopWatch sw;
6214
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
6215
5
        if (0 != accessor_->delete_files(paths_)) {
6216
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
6217
2
                        paths_.size(), batch_count_, sw.elapsed_us());
6218
2
            return;
6219
2
        }
6220
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
6221
3
                    paths_.size(), batch_count_, sw.elapsed_us());
6222
        // delete fdb's keys
6223
3
        for (auto& file_keys : copy_file_keys_) {
6224
3
            auto& [log_trace, keys] = file_keys.second;
6225
3
            std::unique_ptr<Transaction> txn;
6226
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
6227
0
                LOG(WARNING) << "failed to create txn";
6228
0
                continue;
6229
0
            }
6230
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6231
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6232
            // limited, should not cause the txn commit failed.
6233
1.02k
            for (const auto& key : keys) {
6234
1.02k
                txn->remove(key);
6235
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
6236
1.02k
            }
6237
3
            txn->remove(file_keys.first);
6238
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
6239
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
6240
0
                continue;
6241
0
            }
6242
3
        }
6243
3
    }
6244
    std::shared_ptr<StorageVaultAccessor> accessor_;
6245
    // the path of the s3 files to be deleted
6246
    std::vector<std::string> paths_;
6247
    struct CopyFiles {
6248
        std::string log_trace;
6249
        std::vector<std::string> keys;
6250
    };
6251
    // pair<std::string, std::vector<std::string>>
6252
    // first: instance_id_ stage_id table_id query_id
6253
    // second: keys to be deleted
6254
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
6255
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
6256
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
6257
    // which can together uniquely identifies different tasks for tracing log
6258
    uint64_t& batch_count_;
6259
    TxnKv* txn_kv_;
6260
};
6261
6262
13
int InstanceRecycler::recycle_copy_jobs() {
6263
13
    int64_t num_scanned = 0;
6264
13
    int64_t num_finished = 0;
6265
13
    int64_t num_expired = 0;
6266
13
    int64_t num_recycled = 0;
6267
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
6268
13
    uint64_t batch_count = 0;
6269
13
    const std::string task_name = "recycle_copy_jobs";
6270
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6271
6272
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
6273
6274
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6275
13
    register_recycle_task(task_name, start_time);
6276
6277
13
    DORIS_CLOUD_DEFER {
6278
13
        unregister_recycle_task(task_name);
6279
13
        int64_t cost =
6280
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6281
13
        metrics_context.finish_report();
6282
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6283
13
                .tag("instance_id", instance_id_)
6284
13
                .tag("num_scanned", num_scanned)
6285
13
                .tag("num_finished", num_finished)
6286
13
                .tag("num_expired", num_expired)
6287
13
                .tag("num_recycled", num_recycled);
6288
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
6277
13
    DORIS_CLOUD_DEFER {
6278
13
        unregister_recycle_task(task_name);
6279
13
        int64_t cost =
6280
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6281
13
        metrics_context.finish_report();
6282
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6283
13
                .tag("instance_id", instance_id_)
6284
13
                .tag("num_scanned", num_scanned)
6285
13
                .tag("num_finished", num_finished)
6286
13
                .tag("num_expired", num_expired)
6287
13
                .tag("num_recycled", num_recycled);
6288
13
    };
6289
6290
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6291
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6292
13
    std::string key0;
6293
13
    std::string key1;
6294
13
    copy_job_key(key_info0, &key0);
6295
13
    copy_job_key(key_info1, &key1);
6296
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
6297
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
6298
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
6299
16
                         this](std::string_view k, std::string_view v) -> int {
6300
16
        ++num_scanned;
6301
16
        CopyJobPB copy_job;
6302
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6303
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6304
0
            return -1;
6305
0
        }
6306
6307
        // decode copy job key
6308
16
        auto k1 = k;
6309
16
        k1.remove_prefix(1);
6310
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6311
16
        decode_key(&k1, &out);
6312
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6313
        // -> CopyJobPB
6314
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6315
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6316
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6317
6318
16
        bool check_storage = true;
6319
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6320
12
            ++num_finished;
6321
6322
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6323
7
                auto it = stage_accessor_map.find(stage_id);
6324
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6325
7
                std::string_view path;
6326
7
                if (it != stage_accessor_map.end()) {
6327
2
                    accessor = it->second;
6328
5
                } else {
6329
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6330
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6331
5
                                                      &inner_accessor);
6332
5
                    if (ret < 0) { // error
6333
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6334
0
                        return -1;
6335
5
                    } else if (ret == 0) {
6336
3
                        path = inner_accessor->uri();
6337
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6338
3
                                inner_accessor, batch_count, txn_kv_.get());
6339
3
                        stage_accessor_map.emplace(stage_id, accessor);
6340
3
                    } else { // stage not found, skip check storage
6341
2
                        check_storage = false;
6342
2
                    }
6343
5
                }
6344
7
                if (check_storage) {
6345
                    // TODO delete objects with key and etag is not supported
6346
5
                    accessor->add(std::move(copy_job), std::string(k),
6347
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6348
5
                    return 0;
6349
5
                }
6350
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6351
5
                int64_t current_time =
6352
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6353
5
                if (copy_job.finish_time_ms() > 0) {
6354
2
                    if (!config::force_immediate_recycle &&
6355
2
                        current_time < copy_job.finish_time_ms() +
6356
2
                                               config::copy_job_max_retention_second * 1000) {
6357
1
                        return 0;
6358
1
                    }
6359
3
                } else {
6360
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6361
3
                    if (!config::force_immediate_recycle &&
6362
3
                        current_time < copy_job.start_time_ms() +
6363
3
                                               config::copy_job_max_retention_second * 1000) {
6364
1
                        return 0;
6365
1
                    }
6366
3
                }
6367
5
            }
6368
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6369
4
            int64_t current_time =
6370
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6371
            // if copy job is timeout: delete all copy file kvs and copy job kv
6372
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6373
2
                return 0;
6374
2
            }
6375
2
            ++num_expired;
6376
2
        }
6377
6378
        // delete all copy files
6379
7
        std::vector<std::string> copy_file_keys;
6380
70
        for (auto& file : copy_job.object_files()) {
6381
70
            copy_file_keys.push_back(copy_file_key(
6382
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6383
70
        }
6384
7
        std::unique_ptr<Transaction> txn;
6385
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6386
0
            LOG(WARNING) << "failed to create txn";
6387
0
            return -1;
6388
0
        }
6389
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6390
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6391
        // limited, should not cause the txn commit failed.
6392
70
        for (const auto& key : copy_file_keys) {
6393
70
            txn->remove(key);
6394
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6395
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6396
70
                      << ", query_id=" << copy_id;
6397
70
        }
6398
7
        txn->remove(k);
6399
7
        TxnErrorCode err = txn->commit();
6400
7
        if (err != TxnErrorCode::TXN_OK) {
6401
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6402
0
            return -1;
6403
0
        }
6404
6405
7
        metrics_context.total_recycled_num = ++num_recycled;
6406
7
        metrics_context.report();
6407
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6408
7
        return 0;
6409
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6299
16
                         this](std::string_view k, std::string_view v) -> int {
6300
16
        ++num_scanned;
6301
16
        CopyJobPB copy_job;
6302
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6303
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6304
0
            return -1;
6305
0
        }
6306
6307
        // decode copy job key
6308
16
        auto k1 = k;
6309
16
        k1.remove_prefix(1);
6310
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6311
16
        decode_key(&k1, &out);
6312
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6313
        // -> CopyJobPB
6314
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6315
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6316
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6317
6318
16
        bool check_storage = true;
6319
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6320
12
            ++num_finished;
6321
6322
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6323
7
                auto it = stage_accessor_map.find(stage_id);
6324
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6325
7
                std::string_view path;
6326
7
                if (it != stage_accessor_map.end()) {
6327
2
                    accessor = it->second;
6328
5
                } else {
6329
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6330
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6331
5
                                                      &inner_accessor);
6332
5
                    if (ret < 0) { // error
6333
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6334
0
                        return -1;
6335
5
                    } else if (ret == 0) {
6336
3
                        path = inner_accessor->uri();
6337
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6338
3
                                inner_accessor, batch_count, txn_kv_.get());
6339
3
                        stage_accessor_map.emplace(stage_id, accessor);
6340
3
                    } else { // stage not found, skip check storage
6341
2
                        check_storage = false;
6342
2
                    }
6343
5
                }
6344
7
                if (check_storage) {
6345
                    // TODO delete objects with key and etag is not supported
6346
5
                    accessor->add(std::move(copy_job), std::string(k),
6347
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6348
5
                    return 0;
6349
5
                }
6350
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6351
5
                int64_t current_time =
6352
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6353
5
                if (copy_job.finish_time_ms() > 0) {
6354
2
                    if (!config::force_immediate_recycle &&
6355
2
                        current_time < copy_job.finish_time_ms() +
6356
2
                                               config::copy_job_max_retention_second * 1000) {
6357
1
                        return 0;
6358
1
                    }
6359
3
                } else {
6360
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6361
3
                    if (!config::force_immediate_recycle &&
6362
3
                        current_time < copy_job.start_time_ms() +
6363
3
                                               config::copy_job_max_retention_second * 1000) {
6364
1
                        return 0;
6365
1
                    }
6366
3
                }
6367
5
            }
6368
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6369
4
            int64_t current_time =
6370
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6371
            // if copy job is timeout: delete all copy file kvs and copy job kv
6372
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6373
2
                return 0;
6374
2
            }
6375
2
            ++num_expired;
6376
2
        }
6377
6378
        // delete all copy files
6379
7
        std::vector<std::string> copy_file_keys;
6380
70
        for (auto& file : copy_job.object_files()) {
6381
70
            copy_file_keys.push_back(copy_file_key(
6382
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6383
70
        }
6384
7
        std::unique_ptr<Transaction> txn;
6385
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6386
0
            LOG(WARNING) << "failed to create txn";
6387
0
            return -1;
6388
0
        }
6389
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6390
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6391
        // limited, should not cause the txn commit failed.
6392
70
        for (const auto& key : copy_file_keys) {
6393
70
            txn->remove(key);
6394
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6395
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6396
70
                      << ", query_id=" << copy_id;
6397
70
        }
6398
7
        txn->remove(k);
6399
7
        TxnErrorCode err = txn->commit();
6400
7
        if (err != TxnErrorCode::TXN_OK) {
6401
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6402
0
            return -1;
6403
0
        }
6404
6405
7
        metrics_context.total_recycled_num = ++num_recycled;
6406
7
        metrics_context.report();
6407
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6408
7
        return 0;
6409
7
    };
6410
6411
13
    if (config::enable_recycler_stats_metrics) {
6412
0
        scan_and_statistics_copy_jobs();
6413
0
    }
6414
    // recycle_func and loop_done for scan and recycle
6415
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
6416
13
}
6417
6418
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
6419
                                             const StagePB::StageType& stage_type,
6420
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
6421
5
#ifdef UNIT_TEST
6422
    // In unit test, external use the same accessor as the internal stage
6423
5
    auto it = accessor_map_.find(stage_id);
6424
5
    if (it != accessor_map_.end()) {
6425
3
        *accessor = it->second;
6426
3
    } else {
6427
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
6428
2
        return 1;
6429
2
    }
6430
#else
6431
    // init s3 accessor and add to accessor map
6432
    auto stage_it =
6433
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
6434
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
6435
6436
    if (stage_it == instance_info_.stages().end()) {
6437
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
6438
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
6439
        return 1;
6440
    }
6441
6442
    const auto& object_store_info = stage_it->obj_info();
6443
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
6444
6445
    S3Conf s3_conf;
6446
    if (stage_type == StagePB::EXTERNAL) {
6447
        if (stage_access_type == StagePB::AKSK) {
6448
            auto conf = S3Conf::from_obj_store_info(object_store_info);
6449
            if (!conf) {
6450
                return -1;
6451
            }
6452
6453
            s3_conf = std::move(*conf);
6454
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
6455
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
6456
            if (!conf) {
6457
                return -1;
6458
            }
6459
6460
            s3_conf = std::move(*conf);
6461
            if (instance_info_.ram_user().has_encryption_info()) {
6462
                AkSkPair plain_ak_sk_pair;
6463
                int ret = decrypt_ak_sk_helper(
6464
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
6465
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
6466
                if (ret != 0) {
6467
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
6468
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
6469
                    return -1;
6470
                }
6471
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
6472
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
6473
            } else {
6474
                s3_conf.ak = instance_info_.ram_user().ak();
6475
                s3_conf.sk = instance_info_.ram_user().sk();
6476
            }
6477
        } else {
6478
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
6479
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
6480
            return -1;
6481
        }
6482
    } else if (stage_type == StagePB::INTERNAL) {
6483
        int idx = stoi(object_store_info.id());
6484
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6485
            LOG(WARNING) << "invalid idx: " << idx;
6486
            return -1;
6487
        }
6488
6489
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6490
        auto conf = S3Conf::from_obj_store_info(old_obj);
6491
        if (!conf) {
6492
            return -1;
6493
        }
6494
6495
        s3_conf = std::move(*conf);
6496
        s3_conf.prefix = object_store_info.prefix();
6497
    } else {
6498
        LOG(WARNING) << "unknown stage type " << stage_type;
6499
        return -1;
6500
    }
6501
6502
    std::shared_ptr<S3Accessor> s3_accessor;
6503
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
6504
    if (ret != 0) {
6505
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
6506
        return -1;
6507
    }
6508
6509
    *accessor = std::move(s3_accessor);
6510
#endif
6511
3
    return 0;
6512
5
}
6513
6514
11
int InstanceRecycler::recycle_stage() {
6515
11
    int64_t num_scanned = 0;
6516
11
    int64_t num_recycled = 0;
6517
11
    const std::string task_name = "recycle_stage";
6518
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6519
6520
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
6521
6522
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6523
11
    register_recycle_task(task_name, start_time);
6524
6525
11
    DORIS_CLOUD_DEFER {
6526
11
        unregister_recycle_task(task_name);
6527
11
        int64_t cost =
6528
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6529
11
        metrics_context.finish_report();
6530
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6531
11
                .tag("instance_id", instance_id_)
6532
11
                .tag("num_scanned", num_scanned)
6533
11
                .tag("num_recycled", num_recycled);
6534
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
6525
11
    DORIS_CLOUD_DEFER {
6526
11
        unregister_recycle_task(task_name);
6527
11
        int64_t cost =
6528
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6529
11
        metrics_context.finish_report();
6530
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6531
11
                .tag("instance_id", instance_id_)
6532
11
                .tag("num_scanned", num_scanned)
6533
11
                .tag("num_recycled", num_recycled);
6534
11
    };
6535
6536
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6537
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6538
11
    std::string key0 = recycle_stage_key(key_info0);
6539
11
    std::string key1 = recycle_stage_key(key_info1);
6540
6541
11
    std::vector<std::string_view> stage_keys;
6542
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
6543
11
                         this](std::string_view k, std::string_view v) -> int {
6544
1
        ++num_scanned;
6545
1
        RecycleStagePB recycle_stage;
6546
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6547
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6548
0
            return -1;
6549
0
        }
6550
6551
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6552
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6553
0
            LOG(WARNING) << "invalid idx: " << idx;
6554
0
            return -1;
6555
0
        }
6556
6557
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6558
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6559
1
                [&] {
6560
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6561
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6562
1
                    if (!s3_conf) {
6563
1
                        return -1;
6564
1
                    }
6565
6566
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6567
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6568
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6569
1
                    if (ret != 0) {
6570
1
                        return -1;
6571
1
                    }
6572
6573
1
                    accessor = std::move(s3_accessor);
6574
1
                    return 0;
6575
1
                }(),
6576
1
                "recycle_stage:get_accessor", &accessor);
6577
6578
1
        if (ret != 0) {
6579
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6580
0
            return ret;
6581
0
        }
6582
6583
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6584
1
                .tag("instance_id", instance_id_)
6585
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6586
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6587
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6588
1
                .tag("obj_info_id", idx)
6589
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6590
1
        ret = accessor->delete_all();
6591
1
        if (ret != 0) {
6592
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6593
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6594
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6595
0
                         << ", ret=" << ret;
6596
0
            return -1;
6597
0
        }
6598
1
        metrics_context.total_recycled_num = ++num_recycled;
6599
1
        metrics_context.report();
6600
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6601
1
        stage_keys.push_back(k);
6602
1
        return 0;
6603
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6543
1
                         this](std::string_view k, std::string_view v) -> int {
6544
1
        ++num_scanned;
6545
1
        RecycleStagePB recycle_stage;
6546
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6547
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6548
0
            return -1;
6549
0
        }
6550
6551
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6552
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6553
0
            LOG(WARNING) << "invalid idx: " << idx;
6554
0
            return -1;
6555
0
        }
6556
6557
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6558
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6559
1
                [&] {
6560
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6561
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6562
1
                    if (!s3_conf) {
6563
1
                        return -1;
6564
1
                    }
6565
6566
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6567
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6568
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6569
1
                    if (ret != 0) {
6570
1
                        return -1;
6571
1
                    }
6572
6573
1
                    accessor = std::move(s3_accessor);
6574
1
                    return 0;
6575
1
                }(),
6576
1
                "recycle_stage:get_accessor", &accessor);
6577
6578
1
        if (ret != 0) {
6579
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6580
0
            return ret;
6581
0
        }
6582
6583
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6584
1
                .tag("instance_id", instance_id_)
6585
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6586
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6587
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6588
1
                .tag("obj_info_id", idx)
6589
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6590
1
        ret = accessor->delete_all();
6591
1
        if (ret != 0) {
6592
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6593
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6594
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6595
0
                         << ", ret=" << ret;
6596
0
            return -1;
6597
0
        }
6598
1
        metrics_context.total_recycled_num = ++num_recycled;
6599
1
        metrics_context.report();
6600
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6601
1
        stage_keys.push_back(k);
6602
1
        return 0;
6603
1
    };
6604
6605
11
    auto loop_done = [&stage_keys, this]() -> int {
6606
1
        if (stage_keys.empty()) return 0;
6607
1
        DORIS_CLOUD_DEFER {
6608
1
            stage_keys.clear();
6609
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6607
1
        DORIS_CLOUD_DEFER {
6608
1
            stage_keys.clear();
6609
1
        };
6610
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6611
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6612
0
            return -1;
6613
0
        }
6614
1
        return 0;
6615
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
6605
1
    auto loop_done = [&stage_keys, this]() -> int {
6606
1
        if (stage_keys.empty()) return 0;
6607
1
        DORIS_CLOUD_DEFER {
6608
1
            stage_keys.clear();
6609
1
        };
6610
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6611
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6612
0
            return -1;
6613
0
        }
6614
1
        return 0;
6615
1
    };
6616
11
    if (config::enable_recycler_stats_metrics) {
6617
0
        scan_and_statistics_stage();
6618
0
    }
6619
    // recycle_func and loop_done for scan and recycle
6620
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
6621
11
}
6622
6623
10
int InstanceRecycler::recycle_expired_stage_objects() {
6624
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
6625
6626
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6627
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6628
6629
10
    DORIS_CLOUD_DEFER {
6630
10
        int64_t cost =
6631
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6632
10
        metrics_context.finish_report();
6633
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6634
10
                .tag("instance_id", instance_id_);
6635
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
6629
10
    DORIS_CLOUD_DEFER {
6630
10
        int64_t cost =
6631
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6632
10
        metrics_context.finish_report();
6633
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6634
10
                .tag("instance_id", instance_id_);
6635
10
    };
6636
6637
10
    int ret = 0;
6638
6639
10
    if (config::enable_recycler_stats_metrics) {
6640
0
        scan_and_statistics_expired_stage_objects();
6641
0
    }
6642
6643
10
    for (const auto& stage : instance_info_.stages()) {
6644
0
        std::stringstream ss;
6645
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
6646
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
6647
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
6648
0
           << ", prefix=" << stage.obj_info().prefix();
6649
6650
0
        if (stopped()) {
6651
0
            break;
6652
0
        }
6653
0
        if (stage.type() == StagePB::EXTERNAL) {
6654
0
            continue;
6655
0
        }
6656
0
        int idx = stoi(stage.obj_info().id());
6657
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6658
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
6659
0
            continue;
6660
0
        }
6661
6662
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6663
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6664
0
        if (!s3_conf) {
6665
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
6666
0
            continue;
6667
0
        }
6668
6669
0
        s3_conf->prefix = stage.obj_info().prefix();
6670
0
        std::shared_ptr<S3Accessor> accessor;
6671
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
6672
0
        if (ret1 != 0) {
6673
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
6674
0
            ret = -1;
6675
0
            continue;
6676
0
        }
6677
6678
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6679
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
6680
0
            ret = -1;
6681
0
            continue;
6682
0
        }
6683
6684
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
6685
0
        int64_t expiration_time =
6686
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
6687
0
                config::internal_stage_objects_expire_time_second;
6688
0
        if (config::force_immediate_recycle) {
6689
0
            expiration_time = INT64_MAX;
6690
0
        }
6691
0
        ret1 = accessor->delete_all(expiration_time);
6692
0
        if (ret1 != 0) {
6693
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
6694
0
                         << ss.str();
6695
0
            ret = -1;
6696
0
            continue;
6697
0
        }
6698
0
        metrics_context.total_recycled_num++;
6699
0
        metrics_context.report();
6700
0
    }
6701
10
    return ret;
6702
10
}
6703
6704
193
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
6705
193
    std::lock_guard lock(recycle_tasks_mutex);
6706
193
    running_recycle_tasks[task_name] = start_time;
6707
193
}
6708
6709
193
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
6710
193
    std::lock_guard lock(recycle_tasks_mutex);
6711
193
    DCHECK(running_recycle_tasks[task_name] > 0);
6712
193
    running_recycle_tasks.erase(task_name);
6713
193
}
6714
6715
21
bool InstanceRecycler::check_recycle_tasks() {
6716
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
6717
21
    {
6718
21
        std::lock_guard lock(recycle_tasks_mutex);
6719
21
        tmp_running_recycle_tasks = running_recycle_tasks;
6720
21
    }
6721
6722
21
    bool found = false;
6723
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6724
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
6725
20
        int64_t cost = now - start_time;
6726
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
6727
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
6728
20
                    .tag("instance_id", instance_id_)
6729
20
                    .tag("task", task_name);
6730
20
            found = true;
6731
20
        }
6732
20
    }
6733
6734
21
    return found;
6735
21
}
6736
6737
// Scan and statistics indexes that need to be recycled
6738
0
int InstanceRecycler::scan_and_statistics_indexes() {
6739
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
6740
6741
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
6742
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
6743
0
    std::string index_key0;
6744
0
    std::string index_key1;
6745
0
    recycle_index_key(index_key_info0, &index_key0);
6746
0
    recycle_index_key(index_key_info1, &index_key1);
6747
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6748
6749
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
6750
0
        RecycleIndexPB index_pb;
6751
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
6752
0
            return 0;
6753
0
        }
6754
0
        int64_t current_time = ::time(nullptr);
6755
0
        if (current_time <
6756
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
6757
0
            return 0;
6758
0
        }
6759
        // decode index_id
6760
0
        auto k1 = k;
6761
0
        k1.remove_prefix(1);
6762
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6763
0
        decode_key(&k1, &out);
6764
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
6765
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
6766
0
        std::unique_ptr<Transaction> txn;
6767
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6768
0
        if (err != TxnErrorCode::TXN_OK) {
6769
0
            return 0;
6770
0
        }
6771
0
        std::string val;
6772
0
        err = txn->get(k, &val);
6773
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6774
0
            return 0;
6775
0
        }
6776
0
        if (err != TxnErrorCode::TXN_OK) {
6777
0
            return 0;
6778
0
        }
6779
0
        index_pb.Clear();
6780
0
        if (!index_pb.ParseFromString(val)) {
6781
0
            return 0;
6782
0
        }
6783
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
6784
0
            return 0;
6785
0
        }
6786
0
        metrics_context.total_need_recycle_num++;
6787
0
        return 0;
6788
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6789
6790
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
6791
0
    metrics_context.report(true);
6792
0
    segment_metrics_context_.report(true);
6793
0
    tablet_metrics_context_.report(true);
6794
0
    return ret;
6795
0
}
6796
6797
// Scan and statistics partitions that need to be recycled
6798
0
int InstanceRecycler::scan_and_statistics_partitions() {
6799
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
6800
6801
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
6802
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
6803
0
    std::string part_key0;
6804
0
    std::string part_key1;
6805
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6806
6807
0
    recycle_partition_key(part_key_info0, &part_key0);
6808
0
    recycle_partition_key(part_key_info1, &part_key1);
6809
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
6810
0
        RecyclePartitionPB part_pb;
6811
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
6812
0
            return 0;
6813
0
        }
6814
0
        int64_t current_time = ::time(nullptr);
6815
0
        if (current_time <
6816
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
6817
0
            return 0;
6818
0
        }
6819
        // decode partition_id
6820
0
        auto k1 = k;
6821
0
        k1.remove_prefix(1);
6822
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6823
0
        decode_key(&k1, &out);
6824
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
6825
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
6826
        // Change state to RECYCLING
6827
0
        std::unique_ptr<Transaction> txn;
6828
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6829
0
        if (err != TxnErrorCode::TXN_OK) {
6830
0
            return 0;
6831
0
        }
6832
0
        std::string val;
6833
0
        err = txn->get(k, &val);
6834
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6835
0
            return 0;
6836
0
        }
6837
0
        if (err != TxnErrorCode::TXN_OK) {
6838
0
            return 0;
6839
0
        }
6840
0
        part_pb.Clear();
6841
0
        if (!part_pb.ParseFromString(val)) {
6842
0
            return 0;
6843
0
        }
6844
        // Partitions with PREPARED state MUST have no data
6845
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
6846
0
        int ret = 0;
6847
0
        for (int64_t index_id : part_pb.index_id()) {
6848
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
6849
0
                                            partition_id, is_empty_tablet) != 0) {
6850
0
                ret = 0;
6851
0
            }
6852
0
        }
6853
0
        metrics_context.total_need_recycle_num++;
6854
0
        return ret;
6855
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6856
6857
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
6858
0
    metrics_context.report(true);
6859
0
    segment_metrics_context_.report(true);
6860
0
    tablet_metrics_context_.report(true);
6861
0
    return ret;
6862
0
}
6863
6864
// Scan and statistics rowsets that need to be recycled
6865
0
int InstanceRecycler::scan_and_statistics_rowsets() {
6866
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
6867
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
6868
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
6869
0
    std::string recyc_rs_key0;
6870
0
    std::string recyc_rs_key1;
6871
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
6872
0
                recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
6873
0
       int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6874
6875
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
6876
0
        RecycleRowsetPB rowset;
6877
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6878
0
            return 0;
6879
0
        }
6880
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
6881
0
        int64_t current_time = ::time(nullptr);
6882
0
        if (current_time <
6883
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
6884
0
            return 0;
6885
0
        }
6886
6887
0
        if (!rowset.has_type()) {
6888
0
            if (!rowset.has_resource_id()) [[unlikely]] {
6889
0
                return 0;
6890
0
            }
6891
0
            if (rowset.resource_id().empty()) [[unlikely]] {
6892
0
                return 0;
6893
0
            }
6894
0
            metrics_context.total_need_recycle_num++;
6895
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6896
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
6897
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6898
0
            return 0;
6899
0
        }
6900
6901
0
        if(!rowset_meta->has_is_recycled() || !rowset_meta->is_recycled()) {
6902
0
            return 0;
6903
0
        }
6904
6905
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
6906
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
6907
0
                return 0;
6908
0
            }
6909
0
        }
6910
0
        metrics_context.total_need_recycle_num++;
6911
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
6912
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
6913
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
6914
0
        return 0;
6915
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6916
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
6917
0
    metrics_context.report(true);
6918
0
    segment_metrics_context_.report(true);
6919
0
    return ret;
6920
0
}
6921
6922
// Scan and statistics tmp_rowsets that need to be recycled
6923
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
6924
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
6925
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
6926
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
6927
0
    std::string tmp_rs_key0;
6928
0
    std::string tmp_rs_key1;
6929
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
6930
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
6931
6932
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6933
6934
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
6935
0
        doris::RowsetMetaCloudPB rowset;
6936
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6937
0
            return 0;
6938
0
        }
6939
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
6940
0
        int64_t current_time = ::time(nullptr);
6941
0
        if (current_time < expiration) {
6942
0
            return 0;
6943
0
        }
6944
6945
0
        DCHECK_GT(rowset.txn_id(), 0)
6946
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
6947
6948
0
        if(!rowset.has_is_recycled() || !rowset.is_recycled()) {
6949
0
            return 0;
6950
0
        }
6951
6952
0
        if (!rowset.has_resource_id()) {
6953
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6954
0
                return 0;
6955
0
            }
6956
0
            return 0;
6957
0
        }
6958
6959
0
        metrics_context.total_need_recycle_num++;
6960
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
6961
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
6962
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
6963
0
        return 0;
6964
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6965
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
6966
0
    metrics_context.report(true);
6967
0
    segment_metrics_context_.report(true);
6968
0
    return ret;
6969
0
}
6970
6971
// Scan and statistics abort_timeout_txn that need to be recycled
6972
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
6973
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
6974
6975
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
6976
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6977
0
    std::string begin_txn_running_key;
6978
0
    std::string end_txn_running_key;
6979
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
6980
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
6981
6982
0
    int64_t current_time =
6983
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6984
6985
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
6986
0
                                               std::string_view k, std::string_view v) -> int {
6987
0
        std::unique_ptr<Transaction> txn;
6988
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6989
0
        if (err != TxnErrorCode::TXN_OK) {
6990
0
            return 0;
6991
0
        }
6992
0
        std::string_view k1 = k;
6993
0
        k1.remove_prefix(1);
6994
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6995
0
        if (decode_key(&k1, &out) != 0) {
6996
0
            return 0;
6997
0
        }
6998
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6999
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
7000
        // Update txn_info
7001
0
        std::string txn_inf_key, txn_inf_val;
7002
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
7003
0
        err = txn->get(txn_inf_key, &txn_inf_val);
7004
0
        if (err != TxnErrorCode::TXN_OK) {
7005
0
            return 0;
7006
0
        }
7007
0
        TxnInfoPB txn_info;
7008
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
7009
0
            return 0;
7010
0
        }
7011
7012
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
7013
0
            TxnRunningPB txn_running_pb;
7014
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
7015
0
                return 0;
7016
0
            }
7017
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
7018
0
                return 0;
7019
0
            }
7020
0
            metrics_context.total_need_recycle_num++;
7021
0
        }
7022
0
        return 0;
7023
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7024
7025
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
7026
0
    metrics_context.report(true);
7027
0
    return ret;
7028
0
}
7029
7030
// Scan and statistics expired_txn_label that need to be recycled
7031
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
7032
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
7033
7034
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
7035
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7036
0
    std::string begin_recycle_txn_key;
7037
0
    std::string end_recycle_txn_key;
7038
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
7039
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
7040
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7041
0
    int64_t current_time_ms =
7042
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7043
7044
    // for calculate the total num or bytes of recyled objects
7045
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
7046
0
        RecycleTxnPB recycle_txn_pb;
7047
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
7048
0
            return 0;
7049
0
        }
7050
0
        if ((config::force_immediate_recycle) ||
7051
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
7052
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
7053
0
             current_time_ms)) {
7054
0
            metrics_context.total_need_recycle_num++;
7055
0
        }
7056
0
        return 0;
7057
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7058
7059
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
7060
0
    metrics_context.report(true);
7061
0
    return ret;
7062
0
}
7063
7064
// Scan and statistics copy_jobs that need to be recycled
7065
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
7066
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
7067
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
7068
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
7069
0
    std::string key0;
7070
0
    std::string key1;
7071
0
    copy_job_key(key_info0, &key0);
7072
0
    copy_job_key(key_info1, &key1);
7073
7074
    // for calculate the total num or bytes of recyled objects
7075
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
7076
0
        CopyJobPB copy_job;
7077
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
7078
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
7079
0
            return 0;
7080
0
        }
7081
7082
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
7083
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
7084
0
                int64_t current_time =
7085
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7086
0
                if (copy_job.finish_time_ms() > 0) {
7087
0
                    if (!config::force_immediate_recycle &&
7088
0
                        current_time < copy_job.finish_time_ms() +
7089
0
                                               config::copy_job_max_retention_second * 1000) {
7090
0
                        return 0;
7091
0
                    }
7092
0
                } else {
7093
0
                    if (!config::force_immediate_recycle &&
7094
0
                        current_time < copy_job.start_time_ms() +
7095
0
                                               config::copy_job_max_retention_second * 1000) {
7096
0
                        return 0;
7097
0
                    }
7098
0
                }
7099
0
            }
7100
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
7101
0
            int64_t current_time =
7102
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7103
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
7104
0
                return 0;
7105
0
            }
7106
0
        }
7107
0
        metrics_context.total_need_recycle_num++;
7108
0
        return 0;
7109
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7110
7111
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7112
0
    metrics_context.report(true);
7113
0
    return ret;
7114
0
}
7115
7116
// Scan and statistics stage that need to be recycled
7117
0
int InstanceRecycler::scan_and_statistics_stage() {
7118
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
7119
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
7120
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
7121
0
    std::string key0 = recycle_stage_key(key_info0);
7122
0
    std::string key1 = recycle_stage_key(key_info1);
7123
7124
    // for calculate the total num or bytes of recyled objects
7125
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
7126
0
                                                        std::string_view v) -> int {
7127
0
        RecycleStagePB recycle_stage;
7128
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
7129
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
7130
0
            return 0;
7131
0
        }
7132
7133
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
7134
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7135
0
            LOG(WARNING) << "invalid idx: " << idx;
7136
0
            return 0;
7137
0
        }
7138
7139
0
        std::shared_ptr<StorageVaultAccessor> accessor;
7140
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
7141
0
                [&] {
7142
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
7143
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7144
0
                    if (!s3_conf) {
7145
0
                        return 0;
7146
0
                    }
7147
7148
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
7149
0
                    std::shared_ptr<S3Accessor> s3_accessor;
7150
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
7151
0
                    if (ret != 0) {
7152
0
                        return 0;
7153
0
                    }
7154
7155
0
                    accessor = std::move(s3_accessor);
7156
0
                    return 0;
7157
0
                }(),
7158
0
                "recycle_stage:get_accessor", &accessor);
7159
7160
0
        if (ret != 0) {
7161
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
7162
0
            return 0;
7163
0
        }
7164
7165
0
        metrics_context.total_need_recycle_num++;
7166
0
        return 0;
7167
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7168
7169
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7170
0
    metrics_context.report(true);
7171
0
    return ret;
7172
0
}
7173
7174
// Scan and statistics expired_stage_objects that need to be recycled
7175
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
7176
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
7177
7178
    // for calculate the total num or bytes of recyled objects
7179
0
    auto scan_and_statistics = [&metrics_context, this]() {
7180
0
        for (const auto& stage : instance_info_.stages()) {
7181
0
            if (stopped()) {
7182
0
                break;
7183
0
            }
7184
0
            if (stage.type() == StagePB::EXTERNAL) {
7185
0
                continue;
7186
0
            }
7187
0
            int idx = stoi(stage.obj_info().id());
7188
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
7189
0
                continue;
7190
0
            }
7191
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
7192
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7193
0
            if (!s3_conf) {
7194
0
                continue;
7195
0
            }
7196
0
            s3_conf->prefix = stage.obj_info().prefix();
7197
0
            std::shared_ptr<S3Accessor> accessor;
7198
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
7199
0
            if (ret1 != 0) {
7200
0
                continue;
7201
0
            }
7202
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
7203
0
                continue;
7204
0
            }
7205
0
            metrics_context.total_need_recycle_num++;
7206
0
        }
7207
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
7208
7209
0
    scan_and_statistics();
7210
0
    metrics_context.report(true);
7211
0
    return 0;
7212
0
}
7213
7214
// Scan and statistics versions that need to be recycled
7215
0
int InstanceRecycler::scan_and_statistics_versions() {
7216
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
7217
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
7218
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
7219
7220
0
    int64_t last_scanned_table_id = 0;
7221
0
    bool is_recycled = false; // Is last scanned kv recycled
7222
    // for calculate the total num or bytes of recyled objects
7223
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
7224
0
                                       std::string_view k, std::string_view) {
7225
0
        auto k1 = k;
7226
0
        k1.remove_prefix(1);
7227
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
7228
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7229
0
        decode_key(&k1, &out);
7230
0
        DCHECK_EQ(out.size(), 6) << k;
7231
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
7232
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
7233
0
            metrics_context.total_need_recycle_num +=
7234
0
                    is_recycled; // Version kv of this table has been recycled
7235
0
            return 0;
7236
0
        }
7237
0
        last_scanned_table_id = table_id;
7238
0
        is_recycled = false;
7239
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
7240
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
7241
0
        std::unique_ptr<Transaction> txn;
7242
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7243
0
        if (err != TxnErrorCode::TXN_OK) {
7244
0
            return 0;
7245
0
        }
7246
0
        std::unique_ptr<RangeGetIterator> iter;
7247
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
7248
0
        if (err != TxnErrorCode::TXN_OK) {
7249
0
            return 0;
7250
0
        }
7251
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
7252
0
            return 0;
7253
0
        }
7254
0
        metrics_context.total_need_recycle_num++;
7255
0
        is_recycled = true;
7256
0
        return 0;
7257
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7258
7259
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
7260
0
    metrics_context.report(true);
7261
0
    return ret;
7262
0
}
7263
7264
// Scan and statistics restore jobs that need to be recycled
7265
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
7266
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
7267
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
7268
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
7269
0
    std::string restore_job_key0;
7270
0
    std::string restore_job_key1;
7271
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
7272
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
7273
7274
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7275
7276
    // for calculate the total num or bytes of recyled objects
7277
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
7278
0
        RestoreJobCloudPB restore_job_pb;
7279
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
7280
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
7281
0
            return 0;
7282
0
        }
7283
0
        int64_t expiration =
7284
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
7285
0
        int64_t current_time = ::time(nullptr);
7286
0
        if (current_time < expiration) { // not expired
7287
0
            return 0;
7288
0
        }
7289
0
        metrics_context.total_need_recycle_num++;
7290
0
        if(restore_job_pb.need_recycle_data()) {
7291
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
7292
0
        }
7293
0
        return 0;
7294
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7295
7296
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
7297
0
    metrics_context.report(true);
7298
0
    return ret;
7299
0
}
7300
7301
3
void InstanceRecycler::scan_and_statistics_operation_logs() {
7302
3
    if (!should_recycle_versioned_keys()) {
7303
0
        return;
7304
0
    }
7305
7306
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_operation_logs");
7307
7308
3
    OperationLogRecycleChecker recycle_checker(instance_id_, txn_kv_.get(), instance_info_);
7309
3
    if (recycle_checker.init() != 0) {
7310
0
        return;
7311
0
    }
7312
7313
3
    std::string log_key_prefix = versioned::log_key(instance_id_);
7314
3
    std::string begin_key = encode_versioned_key(log_key_prefix, Versionstamp::min());
7315
3
    std::string end_key = encode_versioned_key(log_key_prefix, Versionstamp::max());
7316
7317
3
    std::unique_ptr<BlobIterator> iter = blob_get_range(txn_kv_, begin_key, end_key);
7318
8
    for (; iter->valid(); iter->next()) {
7319
5
        OperationLogPB operation_log;
7320
5
        if (!iter->parse_value(&operation_log)) {
7321
0
            continue;
7322
0
        }
7323
7324
5
        std::string_view key = iter->key();
7325
5
        Versionstamp log_versionstamp;
7326
5
        if (!decode_versioned_key(&key, &log_versionstamp)) {
7327
0
            continue;
7328
0
        }
7329
7330
5
        OperationLogReferenceInfo ref_info;
7331
5
        if (recycle_checker.can_recycle(log_versionstamp, operation_log.min_timestamp(),
7332
5
                                         &ref_info)) {
7333
4
            metrics_context.total_need_recycle_num++;
7334
4
            metrics_context.total_need_recycle_data_size += operation_log.ByteSizeLong();
7335
4
        }
7336
5
    }
7337
7338
3
    metrics_context.report(true);
7339
3
}
7340
7341
int InstanceRecycler::classify_rowset_task_by_ref_count(
7342
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
7343
60
    constexpr int MAX_RETRY = 10;
7344
60
    const auto& rowset_meta = task.rowset_meta;
7345
60
    int64_t tablet_id = rowset_meta.tablet_id();
7346
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
7347
60
    std::string_view reference_instance_id = instance_id_;
7348
60
    if (rowset_meta.has_reference_instance_id()) {
7349
5
        reference_instance_id = rowset_meta.reference_instance_id();
7350
5
    }
7351
7352
61
    for (int i = 0; i < MAX_RETRY; ++i) {
7353
61
        std::unique_ptr<Transaction> txn;
7354
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7355
61
        if (err != TxnErrorCode::TXN_OK) {
7356
0
            LOG_WARNING("failed to create txn when classifying rowset task")
7357
0
                    .tag("instance_id", instance_id_)
7358
0
                    .tag("tablet_id", tablet_id)
7359
0
                    .tag("rowset_id", rowset_id)
7360
0
                    .tag("err", err);
7361
0
            return -1;
7362
0
        }
7363
7364
61
        std::string rowset_ref_count_key =
7365
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
7366
61
        task.rowset_ref_count_key = rowset_ref_count_key;
7367
7368
61
        int64_t ref_count = 0;
7369
61
        {
7370
61
            std::string value;
7371
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
7372
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7373
0
                ref_count = 1;
7374
61
            } else if (err != TxnErrorCode::TXN_OK) {
7375
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
7376
0
                        .tag("instance_id", instance_id_)
7377
0
                        .tag("tablet_id", tablet_id)
7378
0
                        .tag("rowset_id", rowset_id)
7379
0
                        .tag("err", err);
7380
0
                return -1;
7381
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
7382
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
7383
0
                        .tag("instance_id", instance_id_)
7384
0
                        .tag("tablet_id", tablet_id)
7385
0
                        .tag("rowset_id", rowset_id)
7386
0
                        .tag("value", hex(value));
7387
0
                return -1;
7388
0
            }
7389
61
        }
7390
7391
61
        if (ref_count > 1) {
7392
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
7393
12
            txn->atomic_add(rowset_ref_count_key, -1);
7394
12
            LOG_INFO("decrease rowset data ref count in classification phase")
7395
12
                    .tag("instance_id", instance_id_)
7396
12
                    .tag("tablet_id", tablet_id)
7397
12
                    .tag("rowset_id", rowset_id)
7398
12
                    .tag("ref_count", ref_count - 1)
7399
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
7400
7401
12
            if (!task.recycle_rowset_key.empty()) {
7402
0
                txn->remove(task.recycle_rowset_key);
7403
0
                LOG_INFO("remove recycle rowset key in classification phase")
7404
0
                        .tag("key", hex(task.recycle_rowset_key));
7405
0
            }
7406
12
            if (!task.non_versioned_rowset_key.empty()) {
7407
12
                txn->remove(task.non_versioned_rowset_key);
7408
12
                LOG_INFO("remove non versioned rowset key in classification phase")
7409
12
                        .tag("key", hex(task.non_versioned_rowset_key));
7410
12
            }
7411
7412
12
            err = txn->commit();
7413
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
7414
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
7415
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
7416
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
7417
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
7418
1
                continue;
7419
11
            } else if (err != TxnErrorCode::TXN_OK) {
7420
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
7421
0
                        .tag("instance_id", instance_id_)
7422
0
                        .tag("tablet_id", tablet_id)
7423
0
                        .tag("rowset_id", rowset_id)
7424
0
                        .tag("err", err);
7425
0
                return -1;
7426
0
            }
7427
11
            return 1; // handled, not added to batch delete
7428
49
        } else {
7429
            // ref_count == 1: Add to batch delete plan without modifying any KV.
7430
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
7431
49
            LOG_INFO("add rowset to batch delete plan")
7432
49
                    .tag("instance_id", instance_id_)
7433
49
                    .tag("tablet_id", tablet_id)
7434
49
                    .tag("rowset_id", rowset_id)
7435
49
                    .tag("resource_id", rowset_meta.resource_id())
7436
49
                    .tag("ref_count", ref_count);
7437
7438
49
            batch_delete_tasks.push_back(std::move(task));
7439
49
            return 0; // added to batch delete
7440
49
        }
7441
61
    }
7442
7443
0
    LOG_WARNING("failed to classify rowset task after retry")
7444
0
            .tag("instance_id", instance_id_)
7445
0
            .tag("tablet_id", tablet_id)
7446
0
            .tag("rowset_id", rowset_id)
7447
0
            .tag("retry", MAX_RETRY);
7448
0
    return -1;
7449
60
}
7450
7451
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
7452
10
    int ret = 0;
7453
49
    for (const auto& task : tasks) {
7454
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
7455
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
7456
7457
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
7458
        // so we don't need to call it again here.
7459
7460
        // Remove all metadata keys in one transaction
7461
49
        std::unique_ptr<Transaction> txn;
7462
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7463
49
        if (err != TxnErrorCode::TXN_OK) {
7464
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
7465
0
                    .tag("instance_id", instance_id_)
7466
0
                    .tag("tablet_id", tablet_id)
7467
0
                    .tag("rowset_id", rowset_id)
7468
0
                    .tag("err", err);
7469
0
            ret = -1;
7470
0
            continue;
7471
0
        }
7472
7473
49
        std::string_view reference_instance_id = instance_id_;
7474
49
        if (task.rowset_meta.has_reference_instance_id()) {
7475
5
            reference_instance_id = task.rowset_meta.reference_instance_id();
7476
5
        }
7477
7478
49
        txn->remove(task.rowset_ref_count_key);
7479
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
7480
49
                .tag("instance_id", instance_id_)
7481
49
                .tag("tablet_id", tablet_id)
7482
49
                .tag("rowset_id", rowset_id)
7483
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
7484
7485
49
        std::string dbm_start_key =
7486
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
7487
49
        std::string dbm_end_key = meta_delete_bitmap_key(
7488
49
                {reference_instance_id, tablet_id, rowset_id,
7489
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
7490
49
        txn->remove(dbm_start_key, dbm_end_key);
7491
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
7492
49
                .tag("instance_id", instance_id_)
7493
49
                .tag("tablet_id", tablet_id)
7494
49
                .tag("rowset_id", rowset_id)
7495
49
                .tag("begin", hex(dbm_start_key))
7496
49
                .tag("end", hex(dbm_end_key));
7497
7498
49
        std::string versioned_dbm_start_key =
7499
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
7500
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
7501
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
7502
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
7503
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
7504
49
                .tag("instance_id", instance_id_)
7505
49
                .tag("tablet_id", tablet_id)
7506
49
                .tag("rowset_id", rowset_id)
7507
49
                .tag("begin", hex(versioned_dbm_start_key))
7508
49
                .tag("end", hex(versioned_dbm_end_key));
7509
7510
        // Remove versioned meta rowset key
7511
49
        if (!task.versioned_rowset_key.empty()) {
7512
49
            versioned::document_remove<RowsetMetaCloudPB>(
7513
49
                txn.get(), task.versioned_rowset_key, task.versionstamp);
7514
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
7515
49
                    .tag("instance_id", instance_id_)
7516
49
                    .tag("tablet_id", tablet_id)
7517
49
                    .tag("rowset_id", rowset_id)
7518
49
                    .tag("key_prefix", hex(task.versioned_rowset_key));
7519
49
        }
7520
7521
49
        if (!task.non_versioned_rowset_key.empty()) {
7522
49
            txn->remove(task.non_versioned_rowset_key);
7523
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
7524
49
                    .tag("instance_id", instance_id_)
7525
49
                    .tag("tablet_id", tablet_id)
7526
49
                    .tag("rowset_id", rowset_id)
7527
49
                    .tag("key", hex(task.non_versioned_rowset_key));
7528
49
        }
7529
7530
        // Remove recycle_rowset_key last to ensure retry safety:
7531
        // if cleanup fails, this key remains and triggers next round retry.
7532
49
        if (!task.recycle_rowset_key.empty()) {
7533
0
            txn->remove(task.recycle_rowset_key);
7534
0
            LOG_INFO("remove recycle rowset key in cleanup phase")
7535
0
                    .tag("instance_id", instance_id_)
7536
0
                    .tag("tablet_id", tablet_id)
7537
0
                    .tag("rowset_id", rowset_id)
7538
0
                    .tag("key", hex(task.recycle_rowset_key));
7539
0
        }
7540
7541
49
        err = txn->commit();
7542
49
        if (err != TxnErrorCode::TXN_OK) {
7543
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
7544
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
7545
0
                    .tag("instance_id", instance_id_)
7546
0
                    .tag("tablet_id", tablet_id)
7547
0
                    .tag("rowset_id", rowset_id)
7548
0
                    .tag("err", err);
7549
0
            ret = -1;
7550
0
            continue;
7551
0
        }
7552
7553
49
        LOG_INFO("cleanup rowset metadata success")
7554
49
                .tag("instance_id", instance_id_)
7555
49
                .tag("tablet_id", tablet_id)
7556
49
                .tag("rowset_id", rowset_id);
7557
49
    }
7558
10
    return ret;
7559
10
}
7560
7561
} // namespace doris::cloud