Coverage Report

Created: 2026-01-27 17:05

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <random>
40
#include <string>
41
#include <string_view>
42
#include <thread>
43
#include <unordered_map>
44
#include <utility>
45
#include <variant>
46
47
#include "common/defer.h"
48
#include "common/stopwatch.h"
49
#include "meta-service/meta_service.h"
50
#include "meta-service/meta_service_helper.h"
51
#include "meta-service/meta_service_schema.h"
52
#include "meta-store/blob_message.h"
53
#include "meta-store/meta_reader.h"
54
#include "meta-store/txn_kv.h"
55
#include "meta-store/txn_kv_error.h"
56
#include "meta-store/versioned_value.h"
57
#include "recycler/checker.h"
58
#ifdef ENABLE_HDFS_STORAGE_VAULT
59
#include "recycler/hdfs_accessor.h"
60
#endif
61
#include "recycler/s3_accessor.h"
62
#include "recycler/storage_vault_accessor.h"
63
#ifdef UNIT_TEST
64
#include "../test/mock_accessor.h"
65
#endif
66
#include "common/bvars.h"
67
#include "common/config.h"
68
#include "common/encryption_util.h"
69
#include "common/logging.h"
70
#include "common/simple_thread_pool.h"
71
#include "common/util.h"
72
#include "cpp/sync_point.h"
73
#include "meta-store/codec.h"
74
#include "meta-store/document_message.h"
75
#include "meta-store/keys.h"
76
#include "recycler/recycler_service.h"
77
#include "recycler/sync_executor.h"
78
#include "recycler/util.h"
79
80
namespace doris::cloud {
81
82
using namespace std::chrono;
83
84
namespace {
85
86
0
int64_t packed_file_retry_sleep_ms() {
87
0
    const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms);
88
0
    const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms);
89
0
    thread_local std::mt19937_64 gen(std::random_device {}());
90
0
    std::uniform_int_distribution<int64_t> dist(min_ms, max_ms);
91
0
    return dist(gen);
92
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
93
94
0
void sleep_for_packed_file_retry() {
95
0
    std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms()));
96
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
97
98
} // namespace
99
100
// return 0 for success get a key, 1 for key not found, negative for error
101
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
102
0
    std::unique_ptr<Transaction> txn;
103
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
104
0
    if (err != TxnErrorCode::TXN_OK) {
105
0
        return -1;
106
0
    }
107
0
    switch (txn->get(key, &val, true)) {
108
0
    case TxnErrorCode::TXN_OK:
109
0
        return 0;
110
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
111
0
        return 1;
112
0
    default:
113
0
        return -1;
114
0
    };
115
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
116
117
// 0 for success, negative for error
118
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
119
324
                   std::unique_ptr<RangeGetIterator>& it) {
120
324
    std::unique_ptr<Transaction> txn;
121
324
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
324
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
324
    switch (txn->get(begin, end, &it, true)) {
126
324
    case TxnErrorCode::TXN_OK:
127
324
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
324
    };
133
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
119
31
                   std::unique_ptr<RangeGetIterator>& it) {
120
31
    std::unique_ptr<Transaction> txn;
121
31
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
31
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
31
    switch (txn->get(begin, end, &it, true)) {
126
31
    case TxnErrorCode::TXN_OK:
127
31
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
31
    };
133
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
119
293
                   std::unique_ptr<RangeGetIterator>& it) {
120
293
    std::unique_ptr<Transaction> txn;
121
293
    TxnErrorCode err = txn_kv->create_txn(&txn);
122
293
    if (err != TxnErrorCode::TXN_OK) {
123
0
        return -1;
124
0
    }
125
293
    switch (txn->get(begin, end, &it, true)) {
126
293
    case TxnErrorCode::TXN_OK:
127
293
        return 0;
128
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
129
0
        return 1;
130
0
    default:
131
0
        return -1;
132
293
    };
133
0
}
134
135
// return 0 for success otherwise error
136
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
6
    std::unique_ptr<Transaction> txn;
138
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
6
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
10
    for (auto k : keys) {
143
10
        txn->remove(k);
144
10
    }
145
6
    switch (txn->commit()) {
146
6
    case TxnErrorCode::TXN_OK:
147
6
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
6
    }
153
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
136
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
1
    std::unique_ptr<Transaction> txn;
138
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
1
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
1
    for (auto k : keys) {
143
1
        txn->remove(k);
144
1
    }
145
1
    switch (txn->commit()) {
146
1
    case TxnErrorCode::TXN_OK:
147
1
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
1
    }
153
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
136
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
137
5
    std::unique_ptr<Transaction> txn;
138
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
139
5
    if (err != TxnErrorCode::TXN_OK) {
140
0
        return -1;
141
0
    }
142
9
    for (auto k : keys) {
143
9
        txn->remove(k);
144
9
    }
145
5
    switch (txn->commit()) {
146
5
    case TxnErrorCode::TXN_OK:
147
5
        return 0;
148
0
    case TxnErrorCode::TXN_CONFLICT:
149
0
        return -1;
150
0
    default:
151
0
        return -1;
152
5
    }
153
5
}
154
155
// return 0 for success otherwise error
156
118
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
118
    std::unique_ptr<Transaction> txn;
158
118
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
118
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
106k
    for (auto& k : keys) {
163
106k
        txn->remove(k);
164
106k
    }
165
118
    switch (txn->commit()) {
166
118
    case TxnErrorCode::TXN_OK:
167
118
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
118
    }
173
118
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
156
33
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
33
    std::unique_ptr<Transaction> txn;
158
33
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
33
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
33
    for (auto& k : keys) {
163
16
        txn->remove(k);
164
16
    }
165
33
    switch (txn->commit()) {
166
33
    case TxnErrorCode::TXN_OK:
167
33
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
33
    }
173
33
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
156
85
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
157
85
    std::unique_ptr<Transaction> txn;
158
85
    TxnErrorCode err = txn_kv->create_txn(&txn);
159
85
    if (err != TxnErrorCode::TXN_OK) {
160
0
        return -1;
161
0
    }
162
106k
    for (auto& k : keys) {
163
106k
        txn->remove(k);
164
106k
    }
165
85
    switch (txn->commit()) {
166
85
    case TxnErrorCode::TXN_OK:
167
85
        return 0;
168
0
    case TxnErrorCode::TXN_CONFLICT:
169
0
        return -1;
170
0
    default:
171
0
        return -1;
172
85
    }
173
85
}
174
175
// return 0 for success otherwise error
176
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
177
106k
                                       std::string_view end) {
178
106k
    std::unique_ptr<Transaction> txn;
179
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
106k
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
106k
    txn->remove(begin, end);
184
106k
    switch (txn->commit()) {
185
106k
    case TxnErrorCode::TXN_OK:
186
106k
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
106k
    }
192
106k
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
177
16
                                       std::string_view end) {
178
16
    std::unique_ptr<Transaction> txn;
179
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
16
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
16
    txn->remove(begin, end);
184
16
    switch (txn->commit()) {
185
16
    case TxnErrorCode::TXN_OK:
186
16
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
16
    }
192
16
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
177
106k
                                       std::string_view end) {
178
106k
    std::unique_ptr<Transaction> txn;
179
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
180
106k
    if (err != TxnErrorCode::TXN_OK) {
181
0
        return -1;
182
0
    }
183
106k
    txn->remove(begin, end);
184
106k
    switch (txn->commit()) {
185
106k
    case TxnErrorCode::TXN_OK:
186
106k
        return 0;
187
0
    case TxnErrorCode::TXN_CONFLICT:
188
0
        return -1;
189
0
    default:
190
0
        return -1;
191
106k
    }
192
106k
}
193
194
void scan_restore_job_rowset(
195
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
196
        std::string& msg,
197
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
198
199
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
200
                                      int64_t num_scanned, int64_t num_recycled,
201
52
                                      int64_t start_time) {
202
52
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
52
    return;
214
52
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
201
2
                                      int64_t start_time) {
202
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
2
    return;
214
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
201
50
                                      int64_t start_time) {
202
50
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
203
0
        int64_t cost =
204
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
205
0
        if (cost > config::recycle_task_threshold_seconds) {
206
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
207
0
                    .tag("instance_id", instance_id)
208
0
                    .tag("task", task_name)
209
0
                    .tag("num_scanned", num_scanned)
210
0
                    .tag("num_recycled", num_recycled);
211
0
        }
212
0
    }
213
50
    return;
214
50
}
215
216
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
217
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
218
219
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
220
4
                                                               "s3_producer_pool");
221
4
    s3_producer_pool->start();
222
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
223
4
                                                                  "recycle_tablet_pool");
224
4
    recycle_tablet_pool->start();
225
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
226
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
227
4
    group_recycle_function_pool->start();
228
4
    _thread_pool_group =
229
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
230
4
                                    std::move(group_recycle_function_pool));
231
232
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
233
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
234
4
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
235
4
}
236
237
4
Recycler::~Recycler() {
238
4
    if (!stopped()) {
239
0
        stop();
240
0
    }
241
4
}
242
243
4
void Recycler::instance_scanner_callback() {
244
    // sleep 60 seconds before scheduling for the launch procedure to complete:
245
    // some bad hdfs connection may cause some log to stdout stderr
246
    // which may pollute .out file and affect the script to check success
247
4
    std::this_thread::sleep_for(
248
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
249
8
    while (!stopped()) {
250
4
        std::vector<InstanceInfoPB> instances;
251
4
        get_all_instances(txn_kv_.get(), instances);
252
        // TODO(plat1ko): delete job recycle kv of non-existent instances
253
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
254
4
            std::stringstream ss;
255
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
256
4
            return ss.str();
257
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
253
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
254
4
            std::stringstream ss;
255
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
256
4
            return ss.str();
257
4
        }();
258
4
        if (!instances.empty()) {
259
            // enqueue instances
260
3
            std::lock_guard lock(mtx_);
261
30
            for (auto& instance : instances) {
262
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
263
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
264
                // skip instance already in pending queue
265
30
                if (success) {
266
30
                    pending_instance_queue_.push_back(std::move(instance));
267
30
                }
268
30
            }
269
3
            pending_instance_cond_.notify_all();
270
3
        }
271
4
        {
272
4
            std::unique_lock lock(mtx_);
273
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
274
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
274
7
                               [&]() { return stopped(); });
275
4
        }
276
4
    }
277
4
}
278
279
8
void Recycler::recycle_callback() {
280
38
    while (!stopped()) {
281
38
        InstanceInfoPB instance;
282
38
        {
283
38
            std::unique_lock lock(mtx_);
284
38
            pending_instance_cond_.wait(
285
50
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
285
50
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
286
38
            if (stopped()) {
287
8
                return;
288
8
            }
289
30
            instance = std::move(pending_instance_queue_.front());
290
30
            pending_instance_queue_.pop_front();
291
30
            pending_instance_set_.erase(instance.instance_id());
292
30
        }
293
0
        auto& instance_id = instance.instance_id();
294
30
        {
295
30
            std::lock_guard lock(mtx_);
296
            // skip instance in recycling
297
30
            if (recycling_instance_map_.count(instance_id)) continue;
298
30
        }
299
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
300
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
301
302
30
        if (int r = instance_recycler->init(); r != 0) {
303
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
304
0
                         << " ret=" << r;
305
0
            continue;
306
0
        }
307
30
        std::string recycle_job_key;
308
30
        job_recycle_key({instance_id}, &recycle_job_key);
309
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
310
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
311
30
        if (ret != 0) { // Prepare failed
312
20
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
313
20
                         << " ret=" << ret;
314
20
            continue;
315
20
        } else {
316
10
            std::lock_guard lock(mtx_);
317
10
            recycling_instance_map_.emplace(instance_id, instance_recycler);
318
10
        }
319
10
        if (stopped()) return;
320
10
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
321
10
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
322
10
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
323
10
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
324
10
        ret = instance_recycler->do_recycle();
325
        // If instance recycler has been aborted, don't finish this job
326
327
10
        if (!instance_recycler->stopped()) {
328
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
329
10
                                        ret == 0, ctime_ms);
330
10
        }
331
10
        if (instance_recycler->stopped() || ret != 0) {
332
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
333
0
        }
334
10
        {
335
10
            std::lock_guard lock(mtx_);
336
10
            recycling_instance_map_.erase(instance_id);
337
10
        }
338
339
10
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
340
10
        auto elpased_ms = now - ctime_ms;
341
10
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
342
10
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
343
10
        g_bvar_recycler_instance_next_ts.put({instance_id},
344
10
                                             now + config::recycle_interval_seconds * 1000);
345
10
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
346
10
        LOG(INFO) << "recycle instance done, "
347
10
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
348
10
                  << " now: " << now;
349
350
10
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
351
352
10
        LOG_WARNING("finish recycle instance")
353
10
                .tag("instance_id", instance_id)
354
10
                .tag("cost_ms", elpased_ms);
355
10
    }
356
8
}
357
358
4
void Recycler::lease_recycle_jobs() {
359
54
    while (!stopped()) {
360
50
        std::vector<std::string> instances;
361
50
        instances.reserve(recycling_instance_map_.size());
362
50
        {
363
50
            std::lock_guard lock(mtx_);
364
50
            for (auto& [id, _] : recycling_instance_map_) {
365
30
                instances.push_back(id);
366
30
            }
367
50
        }
368
50
        for (auto& i : instances) {
369
30
            std::string recycle_job_key;
370
30
            job_recycle_key({i}, &recycle_job_key);
371
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
372
30
            if (ret == 1) {
373
0
                std::lock_guard lock(mtx_);
374
0
                if (auto it = recycling_instance_map_.find(i);
375
0
                    it != recycling_instance_map_.end()) {
376
0
                    it->second->stop();
377
0
                }
378
0
            }
379
30
        }
380
50
        {
381
50
            std::unique_lock lock(mtx_);
382
50
            notifier_.wait_for(lock,
383
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
384
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
384
100
                               [&]() { return stopped(); });
385
50
        }
386
50
    }
387
4
}
388
389
4
void Recycler::check_recycle_tasks() {
390
7
    while (!stopped()) {
391
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
392
3
        {
393
3
            std::lock_guard lock(mtx_);
394
3
            recycling_instance_map = recycling_instance_map_;
395
3
        }
396
3
        for (auto& entry : recycling_instance_map) {
397
0
            entry.second->check_recycle_tasks();
398
0
        }
399
400
3
        std::unique_lock lock(mtx_);
401
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
402
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
402
6
                           [&]() { return stopped(); });
403
3
    }
404
4
}
405
406
4
int Recycler::start(brpc::Server* server) {
407
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
408
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
409
4
    S3Environment::getInstance();
410
411
4
    if (config::enable_checker) {
412
0
        checker_ = std::make_unique<Checker>(txn_kv_);
413
0
        int ret = checker_->start();
414
0
        std::string msg;
415
0
        if (ret != 0) {
416
0
            msg = "failed to start checker";
417
0
            LOG(ERROR) << msg;
418
0
            std::cerr << msg << std::endl;
419
0
            return ret;
420
0
        }
421
0
        msg = "checker started";
422
0
        LOG(INFO) << msg;
423
0
        std::cout << msg << std::endl;
424
0
    }
425
426
4
    if (server) {
427
        // Add service
428
1
        auto recycler_service =
429
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
430
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
431
1
    }
432
433
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
433
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
434
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
435
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
435
8
        workers_.emplace_back([this] { recycle_callback(); });
436
8
    }
437
438
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
439
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
440
441
4
    if (config::enable_snapshot_data_migrator) {
442
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
443
0
        int ret = snapshot_data_migrator_->start();
444
0
        if (ret != 0) {
445
0
            LOG(ERROR) << "failed to start snapshot data migrator";
446
0
            return ret;
447
0
        }
448
0
        LOG(INFO) << "snapshot data migrator started";
449
0
    }
450
451
4
    if (config::enable_snapshot_chain_compactor) {
452
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
453
0
        int ret = snapshot_chain_compactor_->start();
454
0
        if (ret != 0) {
455
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
456
0
            return ret;
457
0
        }
458
0
        LOG(INFO) << "snapshot chain compactor started";
459
0
    }
460
461
4
    return 0;
462
4
}
463
464
4
void Recycler::stop() {
465
4
    stopped_ = true;
466
4
    notifier_.notify_all();
467
4
    pending_instance_cond_.notify_all();
468
4
    {
469
4
        std::lock_guard lock(mtx_);
470
4
        for (auto& [_, recycler] : recycling_instance_map_) {
471
0
            recycler->stop();
472
0
        }
473
4
    }
474
20
    for (auto& w : workers_) {
475
20
        if (w.joinable()) w.join();
476
20
    }
477
4
    if (checker_) {
478
0
        checker_->stop();
479
0
    }
480
4
    if (snapshot_data_migrator_) {
481
0
        snapshot_data_migrator_->stop();
482
0
    }
483
4
    if (snapshot_chain_compactor_) {
484
0
        snapshot_chain_compactor_->stop();
485
0
    }
486
4
}
487
488
class InstanceRecycler::InvertedIndexIdCache {
489
public:
490
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
491
126
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
492
493
    // Return 0 if success, 1 if schema kv not found, negative for error
494
    // For the same index_id, schema_version, res, since `get` is not completely atomic
495
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
496
    // resulting in repeated addition and inaccuracy.
497
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
498
    // repeated addition does not affect correctness.
499
28.0k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
500
28.0k
        {
501
28.0k
            std::lock_guard lock(mtx_);
502
28.0k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
503
3.78k
                return 0;
504
3.78k
            }
505
24.2k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
506
24.2k
                it != inverted_index_id_map_.end()) {
507
17.0k
                res = it->second;
508
17.0k
                return 0;
509
17.0k
            }
510
24.2k
        }
511
        // Get schema from kv
512
        // TODO(plat1ko): Single flight
513
7.13k
        std::unique_ptr<Transaction> txn;
514
7.13k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
515
7.13k
        if (err != TxnErrorCode::TXN_OK) {
516
0
            LOG(WARNING) << "failed to create txn, err=" << err;
517
0
            return -1;
518
0
        }
519
7.13k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
520
7.13k
        ValueBuf val_buf;
521
7.13k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
522
7.13k
        if (err != TxnErrorCode::TXN_OK) {
523
504
            LOG(WARNING) << "failed to get schema, err=" << err;
524
504
            return static_cast<int>(err);
525
504
        }
526
6.63k
        doris::TabletSchemaCloudPB schema;
527
6.63k
        if (!parse_schema_value(val_buf, &schema)) {
528
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
529
0
            return -1;
530
0
        }
531
6.63k
        if (schema.index_size() > 0) {
532
4.82k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
533
4.82k
            if (schema.has_inverted_index_storage_format()) {
534
4.81k
                index_format = schema.inverted_index_storage_format();
535
4.81k
            }
536
4.82k
            res.first = index_format;
537
4.82k
            res.second.reserve(schema.index_size());
538
12.4k
            for (auto& i : schema.index()) {
539
12.4k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
540
12.4k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
541
12.4k
                }
542
12.4k
            }
543
4.82k
        }
544
6.63k
        insert(index_id, schema_version, res);
545
6.63k
        return 0;
546
6.63k
    }
547
548
    // Empty `ids` means this schema has no inverted index
549
6.63k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
550
6.63k
        if (index_info.second.empty()) {
551
1.81k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
552
1.81k
            std::lock_guard lock(mtx_);
553
1.81k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
554
4.82k
        } else {
555
4.82k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
556
4.82k
            std::lock_guard lock(mtx_);
557
4.82k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
558
4.82k
        }
559
6.63k
    }
560
561
private:
562
    std::string instance_id_;
563
    std::shared_ptr<TxnKv> txn_kv_;
564
565
    std::mutex mtx_;
566
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
567
    struct HashOfKey {
568
58.8k
        size_t operator()(const Key& key) const {
569
58.8k
            size_t seed = 0;
570
58.8k
            seed = std::hash<int64_t> {}(key.first);
571
58.8k
            seed = std::hash<int32_t> {}(key.second);
572
58.8k
            return seed;
573
58.8k
        }
574
    };
575
    // <index_id, schema_version> -> inverted_index_ids
576
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
577
    // Store <index_id, schema_version> of schema which doesn't have inverted index
578
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
579
};
580
581
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
582
                                   RecyclerThreadPoolGroup thread_pool_group,
583
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
584
        : txn_kv_(std::move(txn_kv)),
585
          instance_id_(instance.instance_id()),
586
          instance_info_(instance),
587
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
588
          _thread_pool_group(std::move(thread_pool_group)),
589
          txn_lazy_committer_(std::move(txn_lazy_committer)),
590
          delete_bitmap_lock_white_list_(std::make_shared<DeleteBitmapLockWhiteList>()),
591
126
          resource_mgr_(std::make_shared<ResourceManager>(txn_kv_)) {
592
126
    delete_bitmap_lock_white_list_->init();
593
126
    resource_mgr_->init();
594
126
    snapshot_manager_ = std::make_shared<SnapshotManager>(txn_kv_);
595
596
    // Since the recycler's resource manager could not be notified when instance info changes,
597
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
598
126
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
599
126
};
600
601
126
InstanceRecycler::~InstanceRecycler() = default;
602
603
110
int InstanceRecycler::init_obj_store_accessors() {
604
110
    for (const auto& obj_info : instance_info_.obj_info()) {
605
70
#ifdef UNIT_TEST
606
70
        auto accessor = std::make_shared<MockAccessor>();
607
#else
608
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
609
        if (!s3_conf) {
610
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
611
            return -1;
612
        }
613
614
        std::shared_ptr<S3Accessor> accessor;
615
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
616
        if (ret != 0) {
617
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
618
                         << " resource_id=" << obj_info.id();
619
            return ret;
620
        }
621
#endif
622
70
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
623
70
    }
624
625
110
    return 0;
626
110
}
627
628
110
int InstanceRecycler::init_storage_vault_accessors() {
629
110
    if (instance_info_.resource_ids().empty()) {
630
103
        return 0;
631
103
    }
632
633
7
    FullRangeGetOptions opts(txn_kv_);
634
7
    opts.prefetch = true;
635
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
636
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
637
638
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
639
18
        auto [k, v] = *kv;
640
18
        StorageVaultPB vault;
641
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
642
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
643
0
            return -1;
644
0
        }
645
18
        std::string recycler_storage_vault_white_list = accumulate(
646
18
                config::recycler_storage_vault_white_list.begin(),
647
18
                config::recycler_storage_vault_white_list.end(), std::string(),
648
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
648
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
649
18
        LOG_INFO("config::recycler_storage_vault_white_list")
650
18
                .tag("", recycler_storage_vault_white_list);
651
18
        if (!config::recycler_storage_vault_white_list.empty()) {
652
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
653
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
654
8
                it == config::recycler_storage_vault_white_list.end()) {
655
2
                LOG_WARNING(
656
2
                        "failed to init accessor for vault because this vault is not in "
657
2
                        "config::recycler_storage_vault_white_list. ")
658
2
                        .tag(" vault name:", vault.name())
659
2
                        .tag(" config::recycler_storage_vault_white_list:",
660
2
                             recycler_storage_vault_white_list);
661
2
                continue;
662
2
            }
663
8
        }
664
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
665
16
                                 &accessor_map_, &vault);
666
16
        if (vault.has_hdfs_info()) {
667
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
668
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
669
9
            int ret = accessor->init();
670
9
            if (ret != 0) {
671
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
672
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
673
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
674
4
                continue;
675
4
            }
676
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
677
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
678
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
679
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
680
#else
681
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
682
                       << "but HDFS storage vaults were detected";
683
#endif
684
7
        } else if (vault.has_obj_info()) {
685
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
686
7
            if (!s3_conf) {
687
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
688
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
689
1
                continue;
690
1
            }
691
692
6
            std::shared_ptr<S3Accessor> accessor;
693
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
694
6
            if (ret != 0) {
695
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
696
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
697
0
                             << " ret=" << ret
698
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
699
0
                continue;
700
0
            }
701
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
702
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
703
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
704
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
705
6
        }
706
16
    }
707
708
7
    if (!it->is_valid()) {
709
0
        LOG_WARNING("failed to get storage vault kv");
710
0
        return -1;
711
0
    }
712
713
7
    if (accessor_map_.empty()) {
714
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
715
1
        return -2;
716
1
    }
717
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
718
6
             instance_id_);
719
720
6
    return 0;
721
7
}
722
723
110
int InstanceRecycler::init() {
724
110
    int ret = init_obj_store_accessors();
725
110
    if (ret != 0) {
726
0
        return ret;
727
0
    }
728
729
110
    return init_storage_vault_accessors();
730
110
}
731
732
template <typename... Func>
733
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
120
    return [funcs...]() {
735
120
        return [](std::initializer_list<int> ret_vals) {
736
120
            int i = 0;
737
140
            for (int ret : ret_vals) {
738
140
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
140
            }
742
120
            return i;
743
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
20
            for (int ret : ret_vals) {
738
20
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
20
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
20
            for (int ret : ret_vals) {
738
20
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
20
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
0
                    i = ret;
740
0
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
733
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
734
10
    return [funcs...]() {
735
10
        return [](std::initializer_list<int> ret_vals) {
736
10
            int i = 0;
737
10
            for (int ret : ret_vals) {
738
10
                if (ret != 0) {
739
10
                    i = ret;
740
10
                }
741
10
            }
742
10
            return i;
743
10
        }({funcs()...});
744
10
    };
745
10
}
746
747
10
int InstanceRecycler::do_recycle() {
748
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
749
10
    tablet_metrics_context_.reset();
750
10
    segment_metrics_context_.reset();
751
10
    DORIS_CLOUD_DEFER {
752
10
        tablet_metrics_context_.finish_report();
753
10
        segment_metrics_context_.finish_report();
754
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
751
10
    DORIS_CLOUD_DEFER {
752
10
        tablet_metrics_context_.finish_report();
753
10
        segment_metrics_context_.finish_report();
754
10
    };
755
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
756
0
        int res = recycle_cluster_snapshots();
757
0
        if (res != 0) {
758
0
            return -1;
759
0
        }
760
0
        return recycle_deleted_instance();
761
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
762
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
763
10
                                        fmt::format("instance id {}", instance_id_),
764
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
764
120
                                        [](int r) { return r != 0; });
765
10
        sync_executor
766
10
                .add(task_wrapper(
767
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
767
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
768
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
768
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
769
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
770
                                   // becase they may both recycle the same set of tablets
771
                        // recycle dropped table or idexes(mv, rollup)
772
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
772
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
773
                        // recycle dropped partitions
774
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
774
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
775
10
                .add(task_wrapper(
776
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
776
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
777
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
777
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
778
10
                .add(task_wrapper(
779
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
779
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
780
10
                .add(task_wrapper(
781
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
781
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
782
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
782
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
783
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
783
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
784
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
784
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
785
10
                .add(task_wrapper(
786
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
786
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
787
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
787
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
788
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
788
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
789
10
        bool finished = true;
790
10
        std::vector<int> rets = sync_executor.when_all(&finished);
791
120
        for (int ret : rets) {
792
120
            if (ret != 0) {
793
0
                return ret;
794
0
            }
795
120
        }
796
10
        return finished ? 0 : -1;
797
10
    } else {
798
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
799
0
                     << " instance_id=" << instance_id_;
800
0
        return -1;
801
0
    }
802
10
}
803
804
/**
805
* 1. delete all remote data
806
* 2. delete all kv
807
* 3. remove instance kv
808
*/
809
4
int InstanceRecycler::recycle_deleted_instance() {
810
4
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
811
812
4
    int ret = 0;
813
4
    auto start_time = steady_clock::now();
814
815
4
    DORIS_CLOUD_DEFER {
816
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
817
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
818
4
                     << " recycle deleted instance, cost=" << cost
819
4
                     << "s, instance_id=" << instance_id_;
820
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
815
4
    DORIS_CLOUD_DEFER {
816
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
817
4
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
818
4
                     << " recycle deleted instance, cost=" << cost
819
4
                     << "s, instance_id=" << instance_id_;
820
4
    };
821
822
4
    bool has_snapshots = false;
823
4
    if (has_cluster_snapshots(&has_snapshots) != 0) {
824
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
825
0
        return -1;
826
4
    } else if (has_snapshots) {
827
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
828
1
        return 0;
829
1
    }
830
831
3
    if (recycle_operation_logs() != 0) {
832
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
833
0
        return -1;
834
0
    }
835
836
3
    if (recycle_versioned_rowsets() != 0) {
837
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
838
0
        return -1;
839
0
    }
840
841
3
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
842
3
                            instance_info().snapshot_switch_status() !=
843
0
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
844
3
    if (snapshot_enabled) {
845
0
        bool has_unrecycled_rowsets = false;
846
0
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
847
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
848
0
            return -1;
849
0
        } else if (has_unrecycled_rowsets) {
850
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
851
0
                    .tag("instance_id", instance_id_);
852
0
            return ret;
853
0
        }
854
3
    } else { // delete all remote data if snapshot is disabled
855
3
        for (auto& [_, accessor] : accessor_map_) {
856
3
            if (stopped()) {
857
0
                return ret;
858
0
            }
859
860
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
861
3
            int del_ret = accessor->delete_all();
862
3
            if (del_ret == 0) {
863
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
864
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
865
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
866
                // so the recycling has been successful.
867
0
                ret = -1;
868
0
            }
869
3
        }
870
871
3
        if (ret != 0) {
872
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
873
0
            return ret;
874
0
        }
875
3
    }
876
877
    // delete all kv
878
3
    std::unique_ptr<Transaction> txn;
879
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
880
3
    if (err != TxnErrorCode::TXN_OK) {
881
0
        LOG(WARNING) << "failed to create txn";
882
0
        ret = -1;
883
0
        return -1;
884
0
    }
885
3
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
886
    // delete kv before deleting objects to prevent the checker from misjudging data loss
887
3
    std::string start_txn_key = txn_key_prefix(instance_id_);
888
3
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
889
3
    txn->remove(start_txn_key, end_txn_key);
890
3
    std::string start_version_key = version_key_prefix(instance_id_);
891
3
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
892
3
    txn->remove(start_version_key, end_version_key);
893
3
    std::string start_meta_key = meta_key_prefix(instance_id_);
894
3
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
895
3
    txn->remove(start_meta_key, end_meta_key);
896
3
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
897
3
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
898
3
    txn->remove(start_recycle_key, end_recycle_key);
899
3
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
900
3
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
901
3
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
902
3
    std::string start_copy_key = copy_key_prefix(instance_id_);
903
3
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
904
3
    txn->remove(start_copy_key, end_copy_key);
905
    // should not remove job key range, because we need to reserve job recycle kv
906
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
907
3
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
908
3
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
909
3
    txn->remove(start_job_tablet_key, end_job_tablet_key);
910
3
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
911
3
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
912
3
    std::string start_vault_key = storage_vault_key(key_info0);
913
3
    std::string end_vault_key = storage_vault_key(key_info1);
914
3
    txn->remove(start_vault_key, end_vault_key);
915
3
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
916
3
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
917
3
    txn->remove(versioned_version_key_start, versioned_version_key_end);
918
3
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
919
3
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
920
3
    txn->remove(versioned_index_key_start, versioned_index_key_end);
921
3
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
922
3
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
923
3
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
924
3
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
925
3
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
926
3
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
927
3
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
928
3
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
929
3
    txn->remove(versioned_data_key_start, versioned_data_key_end);
930
3
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
931
3
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
932
3
    txn->remove(versioned_log_key_start, versioned_log_key_end);
933
3
    err = txn->commit();
934
3
    if (err != TxnErrorCode::TXN_OK) {
935
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
936
0
        ret = -1;
937
0
    }
938
939
3
    if (ret == 0) {
940
        // remove instance kv
941
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
942
3
        err = txn_kv_->create_txn(&txn);
943
3
        if (err != TxnErrorCode::TXN_OK) {
944
0
            LOG(WARNING) << "failed to create txn";
945
0
            ret = -1;
946
0
            return ret;
947
0
        }
948
3
        std::string key;
949
3
        instance_key({instance_id_}, &key);
950
3
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
951
3
        txn->remove(key);
952
3
        err = txn->commit();
953
3
        if (err != TxnErrorCode::TXN_OK) {
954
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
955
0
                         << " err=" << err;
956
0
            ret = -1;
957
0
        }
958
3
    }
959
3
    return ret;
960
3
}
961
962
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
963
9
                                          bool* exists, PackedFileRecycleStats* stats) {
964
9
    if (exists == nullptr) {
965
0
        return -1;
966
0
    }
967
9
    *exists = false;
968
969
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
970
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
971
9
    std::string scan_begin = begin;
972
973
9
    while (true) {
974
9
        std::unique_ptr<RangeGetIterator> it_range;
975
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
976
9
        if (get_ret < 0) {
977
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
978
0
                    .tag("instance_id", instance_id_)
979
0
                    .tag("tablet_id", tablet_id)
980
0
                    .tag("ret", get_ret);
981
0
            return -1;
982
0
        }
983
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
984
6
            return 0;
985
6
        }
986
987
3
        std::string last_key;
988
3
        while (it_range->has_next()) {
989
3
            auto [k, v] = it_range->next();
990
3
            last_key.assign(k.data(), k.size());
991
3
            doris::RowsetMetaCloudPB rowset_meta;
992
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
993
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
994
0
                        .tag("instance_id", instance_id_)
995
0
                        .tag("tablet_id", tablet_id)
996
0
                        .tag("key", hex(k));
997
0
                continue;
998
0
            }
999
3
            if (stats) {
1000
3
                ++stats->rowset_scan_count;
1001
3
            }
1002
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
1003
3
                *exists = true;
1004
3
                return 0;
1005
3
            }
1006
3
        }
1007
1008
0
        if (!it_range->more()) {
1009
0
            return 0;
1010
0
        }
1011
1012
        // Continue scanning from the next key to keep each transaction short.
1013
0
        scan_begin = std::move(last_key);
1014
0
        scan_begin.push_back('\x00');
1015
0
    }
1016
9
}
1017
1018
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
1019
                                                          const std::string& rowset_id,
1020
                                                          int64_t txn_id, bool* recycle_exists,
1021
11
                                                          bool* tmp_exists) {
1022
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
1023
0
        return -1;
1024
0
    }
1025
11
    *recycle_exists = false;
1026
11
    *tmp_exists = false;
1027
1028
11
    if (txn_id <= 0) {
1029
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
1030
0
                .tag("instance_id", instance_id_)
1031
0
                .tag("tablet_id", tablet_id)
1032
0
                .tag("rowset_id", rowset_id)
1033
0
                .tag("txn_id", txn_id);
1034
0
        return -1;
1035
0
    }
1036
1037
11
    std::unique_ptr<Transaction> txn;
1038
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1039
11
    if (err != TxnErrorCode::TXN_OK) {
1040
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
1041
0
                .tag("instance_id", instance_id_)
1042
0
                .tag("tablet_id", tablet_id)
1043
0
                .tag("rowset_id", rowset_id)
1044
0
                .tag("txn_id", txn_id)
1045
0
                .tag("err", err);
1046
0
        return -1;
1047
0
    }
1048
1049
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1050
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1051
11
    if (ret == TxnErrorCode::TXN_OK) {
1052
1
        *recycle_exists = true;
1053
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1054
0
        LOG_WARNING("failed to check recycle rowset existence")
1055
0
                .tag("instance_id", instance_id_)
1056
0
                .tag("tablet_id", tablet_id)
1057
0
                .tag("rowset_id", rowset_id)
1058
0
                .tag("key", hex(recycle_key))
1059
0
                .tag("err", ret);
1060
0
        return -1;
1061
0
    }
1062
1063
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1064
11
    ret = key_exists(txn.get(), tmp_key, true);
1065
11
    if (ret == TxnErrorCode::TXN_OK) {
1066
1
        *tmp_exists = true;
1067
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1068
0
        LOG_WARNING("failed to check tmp rowset existence")
1069
0
                .tag("instance_id", instance_id_)
1070
0
                .tag("tablet_id", tablet_id)
1071
0
                .tag("txn_id", txn_id)
1072
0
                .tag("key", hex(tmp_key))
1073
0
                .tag("err", ret);
1074
0
        return -1;
1075
0
    }
1076
1077
11
    return 0;
1078
11
}
1079
1080
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1081
8
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1082
8
    if (!hint.empty()) {
1083
8
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1084
8
            return {hint, it->second};
1085
8
        }
1086
8
    }
1087
1088
0
    return {"", nullptr};
1089
8
}
1090
1091
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1092
                                               const std::string& packed_file_path,
1093
3
                                               PackedFileRecycleStats* stats) {
1094
3
    bool local_changed = false;
1095
3
    int64_t left_num = 0;
1096
3
    int64_t left_bytes = 0;
1097
3
    bool all_small_files_confirmed = true;
1098
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1099
1100
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1101
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1102
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1103
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1104
14
        LOG_INFO("packed slice correction status")
1105
14
                .tag("instance_id", instance_id_)
1106
14
                .tag("packed_file_path", packed_file_path)
1107
14
                .tag("small_file_path", file.path())
1108
14
                .tag("tablet_id", tablet_id)
1109
14
                .tag("rowset_id", rowset_id)
1110
14
                .tag("txn_id", txn_id)
1111
14
                .tag("size", file.size())
1112
14
                .tag("deleted", file.deleted())
1113
14
                .tag("corrected", file.corrected())
1114
14
                .tag("confirmed_this_round", confirmed_this_round);
1115
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1100
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1101
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1102
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1103
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1104
14
        LOG_INFO("packed slice correction status")
1105
14
                .tag("instance_id", instance_id_)
1106
14
                .tag("packed_file_path", packed_file_path)
1107
14
                .tag("small_file_path", file.path())
1108
14
                .tag("tablet_id", tablet_id)
1109
14
                .tag("rowset_id", rowset_id)
1110
14
                .tag("txn_id", txn_id)
1111
14
                .tag("size", file.size())
1112
14
                .tag("deleted", file.deleted())
1113
14
                .tag("corrected", file.corrected())
1114
14
                .tag("confirmed_this_round", confirmed_this_round);
1115
14
    };
1116
1117
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1118
14
        auto* small_file = packed_info->mutable_slices(i);
1119
14
        if (small_file->deleted()) {
1120
3
            log_small_file_status(*small_file, small_file->corrected());
1121
3
            continue;
1122
3
        }
1123
1124
11
        if (small_file->corrected()) {
1125
0
            left_num++;
1126
0
            left_bytes += small_file->size();
1127
0
            log_small_file_status(*small_file, true);
1128
0
            continue;
1129
0
        }
1130
1131
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1132
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1133
0
                    .tag("instance_id", instance_id_)
1134
0
                    .tag("small_file_path", small_file->path())
1135
0
                    .tag("index", i);
1136
0
            return -1;
1137
0
        }
1138
1139
11
        int64_t tablet_id = small_file->tablet_id();
1140
11
        const std::string& rowset_id = small_file->rowset_id();
1141
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1142
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1143
0
                    .tag("instance_id", instance_id_)
1144
0
                    .tag("small_file_path", small_file->path())
1145
0
                    .tag("index", i)
1146
0
                    .tag("tablet_id", tablet_id)
1147
0
                    .tag("rowset_id", rowset_id)
1148
0
                    .tag("has_txn_id", small_file->has_txn_id())
1149
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1150
0
            return -1;
1151
0
        }
1152
11
        int64_t txn_id = small_file->txn_id();
1153
11
        bool recycle_exists = false;
1154
11
        bool tmp_exists = false;
1155
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1156
11
                                                &tmp_exists) != 0) {
1157
0
            return -1;
1158
0
        }
1159
1160
11
        bool small_file_confirmed = false;
1161
11
        if (tmp_exists) {
1162
1
            left_num++;
1163
1
            left_bytes += small_file->size();
1164
1
            small_file_confirmed = true;
1165
10
        } else if (recycle_exists) {
1166
1
            left_num++;
1167
1
            left_bytes += small_file->size();
1168
            // keep small_file_confirmed=false so the packed file remains uncorrected
1169
9
        } else {
1170
9
            bool rowset_exists = false;
1171
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1172
0
                return -1;
1173
0
            }
1174
1175
9
            if (!rowset_exists) {
1176
6
                if (!small_file->deleted()) {
1177
6
                    small_file->set_deleted(true);
1178
6
                    local_changed = true;
1179
6
                }
1180
6
                if (!small_file->corrected()) {
1181
6
                    small_file->set_corrected(true);
1182
6
                    local_changed = true;
1183
6
                }
1184
6
                small_file_confirmed = true;
1185
6
            } else {
1186
3
                left_num++;
1187
3
                left_bytes += small_file->size();
1188
3
                small_file_confirmed = true;
1189
3
            }
1190
9
        }
1191
1192
11
        if (!small_file_confirmed) {
1193
1
            all_small_files_confirmed = false;
1194
1
        }
1195
1196
11
        if (small_file->corrected() != small_file_confirmed) {
1197
4
            small_file->set_corrected(small_file_confirmed);
1198
4
            local_changed = true;
1199
4
        }
1200
1201
11
        log_small_file_status(*small_file, small_file_confirmed);
1202
11
    }
1203
1204
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1205
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1206
3
        local_changed = true;
1207
3
    }
1208
3
    if (packed_info->ref_cnt() != left_num) {
1209
3
        auto old_ref_cnt = packed_info->ref_cnt();
1210
3
        packed_info->set_ref_cnt(left_num);
1211
3
        LOG_INFO("corrected packed file ref count")
1212
3
                .tag("instance_id", instance_id_)
1213
3
                .tag("resource_id", packed_info->resource_id())
1214
3
                .tag("packed_file_path", packed_file_path)
1215
3
                .tag("old_ref_cnt", old_ref_cnt)
1216
3
                .tag("new_ref_cnt", left_num);
1217
3
        local_changed = true;
1218
3
    }
1219
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1220
2
        packed_info->set_corrected(all_small_files_confirmed);
1221
2
        local_changed = true;
1222
2
    }
1223
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1224
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1225
1
        local_changed = true;
1226
1
    }
1227
1228
3
    if (changed != nullptr) {
1229
3
        *changed = local_changed;
1230
3
    }
1231
3
    return 0;
1232
3
}
1233
1234
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1235
                                                 const std::string& packed_file_path,
1236
4
                                                 PackedFileRecycleStats* stats) {
1237
4
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
1238
4
    bool correction_ok = false;
1239
4
    cloud::PackedFileInfoPB packed_info;
1240
1241
4
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
1242
4
        if (stopped()) {
1243
0
            LOG_WARNING("recycler stopped before processing packed file")
1244
0
                    .tag("instance_id", instance_id_)
1245
0
                    .tag("packed_file_path", packed_file_path)
1246
0
                    .tag("attempt", attempt);
1247
0
            return -1;
1248
0
        }
1249
1250
4
        std::unique_ptr<Transaction> txn;
1251
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1252
4
        if (err != TxnErrorCode::TXN_OK) {
1253
0
            LOG_WARNING("failed to create txn when processing packed file")
1254
0
                    .tag("instance_id", instance_id_)
1255
0
                    .tag("packed_file_path", packed_file_path)
1256
0
                    .tag("attempt", attempt)
1257
0
                    .tag("err", err);
1258
0
            return -1;
1259
0
        }
1260
1261
4
        std::string packed_val;
1262
4
        err = txn->get(packed_key, &packed_val);
1263
4
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1264
0
            return 0;
1265
0
        }
1266
4
        if (err != TxnErrorCode::TXN_OK) {
1267
0
            LOG_WARNING("failed to get packed file kv")
1268
0
                    .tag("instance_id", instance_id_)
1269
0
                    .tag("packed_file_path", packed_file_path)
1270
0
                    .tag("attempt", attempt)
1271
0
                    .tag("err", err);
1272
0
            return -1;
1273
0
        }
1274
1275
4
        if (!packed_info.ParseFromString(packed_val)) {
1276
0
            LOG_WARNING("failed to parse packed file info")
1277
0
                    .tag("instance_id", instance_id_)
1278
0
                    .tag("packed_file_path", packed_file_path)
1279
0
                    .tag("attempt", attempt);
1280
0
            return -1;
1281
0
        }
1282
1283
4
        int64_t now_sec = ::time(nullptr);
1284
4
        bool corrected = packed_info.corrected();
1285
4
        bool due = config::force_immediate_recycle ||
1286
4
                   now_sec - packed_info.created_at_sec() >=
1287
4
                           config::packed_file_correction_delay_seconds;
1288
1289
4
        if (!corrected && due) {
1290
3
            bool changed = false;
1291
3
            if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1292
0
                LOG_WARNING("correct_packed_file_info failed")
1293
0
                        .tag("instance_id", instance_id_)
1294
0
                        .tag("packed_file_path", packed_file_path)
1295
0
                        .tag("attempt", attempt);
1296
0
                return -1;
1297
0
            }
1298
3
            if (changed) {
1299
3
                std::string updated;
1300
3
                if (!packed_info.SerializeToString(&updated)) {
1301
0
                    LOG_WARNING("failed to serialize packed file info after correction")
1302
0
                            .tag("instance_id", instance_id_)
1303
0
                            .tag("packed_file_path", packed_file_path)
1304
0
                            .tag("attempt", attempt);
1305
0
                    return -1;
1306
0
                }
1307
3
                txn->put(packed_key, updated);
1308
3
                err = txn->commit();
1309
3
                if (err == TxnErrorCode::TXN_OK) {
1310
3
                    if (stats) {
1311
3
                        ++stats->num_corrected;
1312
3
                    }
1313
3
                } else {
1314
0
                    if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) {
1315
0
                        LOG_WARNING(
1316
0
                                "failed to commit correction for packed file due to conflict, "
1317
0
                                "retrying")
1318
0
                                .tag("instance_id", instance_id_)
1319
0
                                .tag("packed_file_path", packed_file_path)
1320
0
                                .tag("attempt", attempt);
1321
0
                        sleep_for_packed_file_retry();
1322
0
                        packed_info.Clear();
1323
0
                        continue;
1324
0
                    }
1325
0
                    LOG_WARNING("failed to commit correction for packed file")
1326
0
                            .tag("instance_id", instance_id_)
1327
0
                            .tag("packed_file_path", packed_file_path)
1328
0
                            .tag("attempt", attempt)
1329
0
                            .tag("err", err);
1330
0
                    return -1;
1331
0
                }
1332
3
            }
1333
3
        }
1334
1335
4
        correction_ok = true;
1336
4
        break;
1337
4
    }
1338
1339
4
    if (!correction_ok) {
1340
0
        return -1;
1341
0
    }
1342
1343
4
    if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1344
4
          packed_info.ref_cnt() == 0)) {
1345
3
        return 0;
1346
3
    }
1347
1348
1
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1349
0
        LOG_WARNING("packed file missing resource id when recycling")
1350
0
                .tag("instance_id", instance_id_)
1351
0
                .tag("packed_file_path", packed_file_path);
1352
0
        return -1;
1353
0
    }
1354
1
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1355
1
    if (!accessor) {
1356
0
        LOG_WARNING("no accessor available to delete packed file")
1357
0
                .tag("instance_id", instance_id_)
1358
0
                .tag("packed_file_path", packed_file_path)
1359
0
                .tag("resource_id", packed_info.resource_id());
1360
0
        return -1;
1361
0
    }
1362
1
    int del_ret = accessor->delete_file(packed_file_path);
1363
1
    if (del_ret != 0 && del_ret != 1) {
1364
0
        LOG_WARNING("failed to delete packed file")
1365
0
                .tag("instance_id", instance_id_)
1366
0
                .tag("packed_file_path", packed_file_path)
1367
0
                .tag("resource_id", resource_id)
1368
0
                .tag("ret", del_ret);
1369
0
        return -1;
1370
0
    }
1371
1
    if (del_ret == 1) {
1372
0
        LOG_INFO("packed file already removed")
1373
0
                .tag("instance_id", instance_id_)
1374
0
                .tag("packed_file_path", packed_file_path)
1375
0
                .tag("resource_id", resource_id);
1376
1
    } else {
1377
1
        LOG_INFO("deleted packed file")
1378
1
                .tag("instance_id", instance_id_)
1379
1
                .tag("packed_file_path", packed_file_path)
1380
1
                .tag("resource_id", resource_id);
1381
1
    }
1382
1383
1
    for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) {
1384
1
        std::unique_ptr<Transaction> del_txn;
1385
1
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
1386
1
        if (err != TxnErrorCode::TXN_OK) {
1387
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1388
0
                    .tag("instance_id", instance_id_)
1389
0
                    .tag("packed_file_path", packed_file_path)
1390
0
                    .tag("del_attempt", del_attempt)
1391
0
                    .tag("err", err);
1392
0
            return -1;
1393
0
        }
1394
1395
1
        std::string latest_val;
1396
1
        err = del_txn->get(packed_key, &latest_val);
1397
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1398
0
            return 0;
1399
0
        }
1400
1
        if (err != TxnErrorCode::TXN_OK) {
1401
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1402
0
                    .tag("instance_id", instance_id_)
1403
0
                    .tag("packed_file_path", packed_file_path)
1404
0
                    .tag("del_attempt", del_attempt)
1405
0
                    .tag("err", err);
1406
0
            return -1;
1407
0
        }
1408
1409
1
        cloud::PackedFileInfoPB latest_info;
1410
1
        if (!latest_info.ParseFromString(latest_val)) {
1411
0
            LOG_WARNING("failed to parse packed file info before removal")
1412
0
                    .tag("instance_id", instance_id_)
1413
0
                    .tag("packed_file_path", packed_file_path)
1414
0
                    .tag("del_attempt", del_attempt);
1415
0
            return -1;
1416
0
        }
1417
1418
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1419
1
              latest_info.ref_cnt() == 0)) {
1420
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1421
0
                    .tag("instance_id", instance_id_)
1422
0
                    .tag("packed_file_path", packed_file_path)
1423
0
                    .tag("del_attempt", del_attempt);
1424
0
            return 0;
1425
0
        }
1426
1427
1
        del_txn->remove(packed_key);
1428
1
        err = del_txn->commit();
1429
1
        if (err == TxnErrorCode::TXN_OK) {
1430
1
            if (stats) {
1431
1
                ++stats->num_deleted;
1432
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1433
1
                                        static_cast<int64_t>(latest_val.size());
1434
1
                if (del_ret == 0 || del_ret == 1) {
1435
1
                    ++stats->num_object_deleted;
1436
1
                    int64_t object_size = latest_info.total_slice_bytes();
1437
1
                    if (object_size <= 0) {
1438
0
                        object_size = packed_info.total_slice_bytes();
1439
0
                    }
1440
1
                    stats->bytes_object_deleted += object_size;
1441
1
                }
1442
1
            }
1443
1
            LOG_INFO("removed packed file metadata")
1444
1
                    .tag("instance_id", instance_id_)
1445
1
                    .tag("packed_file_path", packed_file_path);
1446
1
            return 0;
1447
1
        }
1448
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1449
0
            if (del_attempt >= max_retry_times) {
1450
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
1451
0
                        .tag("instance_id", instance_id_)
1452
0
                        .tag("packed_file_path", packed_file_path)
1453
0
                        .tag("del_attempt", del_attempt);
1454
0
                return -1;
1455
0
            }
1456
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
1457
0
                    .tag("instance_id", instance_id_)
1458
0
                    .tag("packed_file_path", packed_file_path)
1459
0
                    .tag("del_attempt", del_attempt);
1460
0
            sleep_for_packed_file_retry();
1461
0
            continue;
1462
0
        }
1463
0
        LOG_WARNING("failed to remove packed file kv")
1464
0
                .tag("instance_id", instance_id_)
1465
0
                .tag("packed_file_path", packed_file_path)
1466
0
                .tag("del_attempt", del_attempt)
1467
0
                .tag("err", err);
1468
0
        return -1;
1469
0
    }
1470
1471
0
    return -1;
1472
1
}
1473
1474
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/,
1475
4
                                            PackedFileRecycleStats* stats, int* ret) {
1476
4
    if (stats) {
1477
4
        ++stats->num_scanned;
1478
4
    }
1479
4
    std::string packed_file_path;
1480
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1481
0
        LOG_WARNING("failed to decode packed file key")
1482
0
                .tag("instance_id", instance_id_)
1483
0
                .tag("key", hex(key));
1484
0
        if (stats) {
1485
0
            ++stats->num_failed;
1486
0
        }
1487
0
        if (ret) {
1488
0
            *ret = -1;
1489
0
        }
1490
0
        return 0;
1491
0
    }
1492
1493
4
    std::string packed_key(key);
1494
4
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1495
4
    if (process_ret != 0) {
1496
0
        if (stats) {
1497
0
            ++stats->num_failed;
1498
0
        }
1499
0
        if (ret) {
1500
0
            *ret = -1;
1501
0
        }
1502
0
    }
1503
4
    return 0;
1504
4
}
1505
1506
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1507
8.77k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1508
8.77k
    if (config::force_immediate_recycle) {
1509
15
        return 0L;
1510
15
    }
1511
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1512
8.75k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1513
8.75k
    int64_t retention_seconds = config::retention_seconds;
1514
8.75k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1515
7.00k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1516
7.00k
    }
1517
8.75k
    int64_t final_expiration = expiration + retention_seconds;
1518
8.75k
    if (*earlest_ts > final_expiration) {
1519
6
        *earlest_ts = final_expiration;
1520
6
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1521
6
    }
1522
8.75k
    return final_expiration;
1523
8.77k
}
1524
1525
int64_t calculate_partition_expired_time(
1526
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1527
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1528
9
    if (config::force_immediate_recycle) {
1529
3
        return 0L;
1530
3
    }
1531
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1532
6
                                                            : partition_meta_pb.creation_time();
1533
6
    int64_t retention_seconds = config::retention_seconds;
1534
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1535
6
        retention_seconds =
1536
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1537
6
    }
1538
6
    int64_t final_expiration = expiration + retention_seconds;
1539
6
    if (*earlest_ts > final_expiration) {
1540
2
        *earlest_ts = final_expiration;
1541
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1542
2
    }
1543
6
    return final_expiration;
1544
9
}
1545
1546
int64_t calculate_index_expired_time(const std::string& instance_id_,
1547
                                     const RecycleIndexPB& index_meta_pb,
1548
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1549
10
    if (config::force_immediate_recycle) {
1550
4
        return 0L;
1551
4
    }
1552
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1553
6
                                                        : index_meta_pb.creation_time();
1554
6
    int64_t retention_seconds = config::retention_seconds;
1555
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1556
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1557
6
    }
1558
6
    int64_t final_expiration = expiration + retention_seconds;
1559
6
    if (*earlest_ts > final_expiration) {
1560
2
        *earlest_ts = final_expiration;
1561
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1562
2
    }
1563
6
    return final_expiration;
1564
10
}
1565
1566
int64_t calculate_tmp_rowset_expired_time(
1567
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1568
102k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1569
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1570
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1571
    //  duration or timeout always < `retention_time` in practice.
1572
102k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1573
102k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1574
102k
                                 : tmp_rowset_meta_pb.creation_time();
1575
102k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1576
102k
    int64_t final_expiration = expiration + config::retention_seconds;
1577
102k
    if (*earlest_ts > final_expiration) {
1578
18
        *earlest_ts = final_expiration;
1579
18
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1580
18
    }
1581
102k
    return final_expiration;
1582
102k
}
1583
1584
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1585
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1586
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1587
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1588
8
        *earlest_ts = final_expiration / 1000;
1589
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1590
8
    }
1591
30.0k
    return final_expiration;
1592
30.0k
}
1593
1594
int64_t calculate_restore_job_expired_time(
1595
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1596
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1597
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1598
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1599
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1600
        // final state, recycle immediately
1601
41
        return 0L;
1602
41
    }
1603
    // not final state, wait much longer than the FE's timeout(1 day)
1604
0
    int64_t last_modified_s =
1605
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1606
0
    int64_t expiration = restore_job.expired_at_s() > 0
1607
0
                                 ? last_modified_s + restore_job.expired_at_s()
1608
0
                                 : last_modified_s;
1609
0
    int64_t final_expiration = expiration + config::retention_seconds;
1610
0
    if (*earlest_ts > final_expiration) {
1611
0
        *earlest_ts = final_expiration;
1612
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1613
0
    }
1614
0
    return final_expiration;
1615
41
}
1616
1617
int get_meta_rowset_key(Transaction* txn, const std::string& instance_id, int64_t tablet_id,
1618
                        const std::string& rowset_id, int64_t start_version, int64_t end_version,
1619
0
                        bool load_key, bool* exist) {
1620
0
    std::string key =
1621
0
            load_key ? versioned::meta_rowset_load_key({instance_id, tablet_id, end_version})
1622
0
                     : versioned::meta_rowset_compact_key({instance_id, tablet_id, end_version});
1623
0
    RowsetMetaCloudPB rowset_meta;
1624
0
    Versionstamp version;
1625
0
    TxnErrorCode err = versioned::document_get(txn, key, &rowset_meta, &version);
1626
0
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1627
0
        VLOG_DEBUG << "not found load or compact meta_rowset_key."
1628
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1629
0
                   << " end_version=" << end_version << " key=" << hex(key);
1630
0
    } else if (err != TxnErrorCode::TXN_OK) {
1631
0
        LOG_INFO("failed to get load or compact meta_rowset_key.")
1632
0
                .tag("rowset_id", rowset_id)
1633
0
                .tag("start_version", start_version)
1634
0
                .tag("end_version", end_version)
1635
0
                .tag("key", hex(key))
1636
0
                .tag("error_code", err);
1637
0
        return -1;
1638
0
    } else if (rowset_meta.rowset_id_v2() == rowset_id) {
1639
0
        *exist = true;
1640
0
        VLOG_DEBUG << "found load or compact meta_rowset_key."
1641
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1642
0
                   << " end_version=" << end_version << " key=" << hex(key);
1643
0
    } else {
1644
0
        VLOG_DEBUG << "rowset_id does not match when find load or compact meta_rowset_key."
1645
0
                   << " rowset_id=" << rowset_id << " start_version=" << start_version
1646
0
                   << " end_version=" << end_version << " key=" << hex(key)
1647
0
                   << " found_rowset_id=" << rowset_meta.rowset_id_v2();
1648
0
    }
1649
0
    return 0;
1650
0
}
1651
1652
2
int InstanceRecycler::abort_txn_for_related_rowset(int64_t txn_id) {
1653
2
    AbortTxnRequest req;
1654
2
    TxnInfoPB txn_info;
1655
2
    MetaServiceCode code = MetaServiceCode::OK;
1656
2
    std::string msg;
1657
2
    std::stringstream ss;
1658
2
    std::unique_ptr<Transaction> txn;
1659
2
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1660
2
    if (err != TxnErrorCode::TXN_OK) {
1661
0
        LOG_WARNING("failed to create txn").tag("err", err);
1662
0
        return -1;
1663
0
    }
1664
1665
    // get txn index
1666
2
    TxnIndexPB txn_idx_pb;
1667
2
    auto index_key = txn_index_key({instance_id_, txn_id});
1668
2
    std::string index_val;
1669
2
    err = txn->get(index_key, &index_val);
1670
2
    if (err != TxnErrorCode::TXN_OK) {
1671
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1672
            // maybe recycled
1673
0
            LOG_INFO("txn index not found, txn_id={} instance_id={}", txn_id, instance_id_)
1674
0
                    .tag("key", hex(index_key))
1675
0
                    .tag("txn_id", txn_id);
1676
0
            return 0;
1677
0
        }
1678
0
        LOG_WARNING("failed to get txn index")
1679
0
                .tag("err", err)
1680
0
                .tag("key", hex(index_key))
1681
0
                .tag("txn_id", txn_id);
1682
0
        return -1;
1683
0
    }
1684
2
    if (!txn_idx_pb.ParseFromString(index_val)) {
1685
0
        LOG_WARNING("failed to parse txn index")
1686
0
                .tag("err", err)
1687
0
                .tag("key", hex(index_key))
1688
0
                .tag("txn_id", txn_id);
1689
0
        return -1;
1690
0
    }
1691
1692
2
    auto info_key = txn_info_key({instance_id_, txn_idx_pb.tablet_index().db_id(), txn_id});
1693
2
    std::string info_val;
1694
2
    err = txn->get(info_key, &info_val);
1695
2
    if (err != TxnErrorCode::TXN_OK) {
1696
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1697
            // maybe recycled
1698
0
            LOG_INFO("txn info not found, txn_id={} instance_id={}", txn_id, instance_id_)
1699
0
                    .tag("key", hex(info_key))
1700
0
                    .tag("txn_id", txn_id);
1701
0
            return 0;
1702
0
        }
1703
0
        LOG_WARNING("failed to get txn info")
1704
0
                .tag("err", err)
1705
0
                .tag("key", hex(info_key))
1706
0
                .tag("txn_id", txn_id);
1707
0
        return -1;
1708
0
    }
1709
2
    if (!txn_info.ParseFromString(info_val)) {
1710
0
        LOG_WARNING("failed to parse txn info")
1711
0
                .tag("err", err)
1712
0
                .tag("key", hex(info_key))
1713
0
                .tag("txn_id", txn_id);
1714
0
        return -1;
1715
0
    }
1716
1717
2
    if (txn_info.status() != TxnStatusPB::TXN_STATUS_PREPARED) {
1718
0
        LOG_INFO("txn is not prepared status, txn_id={} status={}", txn_id, txn_info.status())
1719
0
                .tag("key", hex(info_key))
1720
0
                .tag("txn_id", txn_id);
1721
0
        return 0;
1722
0
    }
1723
1724
2
    req.set_txn_id(txn_id);
1725
1726
2
    LOG(INFO) << "begin abort txn for related rowset, txn_id=" << txn_id
1727
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString();
1728
1729
2
    _abort_txn(instance_id_, &req, txn.get(), txn_info, ss, code, msg);
1730
2
    err = txn->commit();
1731
2
    if (err != TxnErrorCode::TXN_OK) {
1732
0
        code = cast_as<ErrCategory::COMMIT>(err);
1733
0
        ss << "failed to commit kv txn, txn_id=" << txn_info.txn_id() << " err=" << err;
1734
0
        msg = ss.str();
1735
0
        return -1;
1736
0
    }
1737
1738
2
    LOG(INFO) << "finish abort txn for related rowset, txn_id=" << txn_id
1739
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString()
1740
2
              << " code=" << code << " msg=" << msg;
1741
1742
2
    return 0;
1743
2
}
1744
1745
4
int InstanceRecycler::abort_job_for_related_rowset(const RowsetMetaCloudPB& rowset_meta) {
1746
4
    FinishTabletJobRequest req;
1747
4
    FinishTabletJobResponse res;
1748
4
    req.set_action(FinishTabletJobRequest::ABORT);
1749
4
    MetaServiceCode code = MetaServiceCode::OK;
1750
4
    std::string msg;
1751
4
    std::stringstream ss;
1752
1753
4
    TabletIndexPB tablet_idx;
1754
4
    int ret = get_tablet_idx(txn_kv_.get(), instance_id_, rowset_meta.tablet_id(), tablet_idx);
1755
4
    if (ret == 1) {
1756
        // tablet maybe recycled, directly return 0
1757
1
        return 0;
1758
3
    } else if (ret != 0) {
1759
0
        LOG(WARNING) << "failed to get tablet index, tablet_id=" << rowset_meta.tablet_id()
1760
0
                     << " instance_id=" << instance_id_ << " ret=" << ret;
1761
0
        return ret;
1762
0
    }
1763
1764
3
    std::unique_ptr<Transaction> txn;
1765
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1766
3
    if (err != TxnErrorCode::TXN_OK) {
1767
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ << " err=" << err;
1768
0
        return -1;
1769
0
    }
1770
1771
3
    std::string job_key =
1772
3
            job_tablet_key({instance_id_, tablet_idx.table_id(), tablet_idx.index_id(),
1773
3
                            tablet_idx.partition_id(), tablet_idx.tablet_id()});
1774
3
    std::string job_val;
1775
3
    err = txn->get(job_key, &job_val);
1776
3
    if (err != TxnErrorCode::TXN_OK) {
1777
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1778
0
            LOG(INFO) << "job not exists, instance_id=" << instance_id_
1779
0
                      << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1780
0
            return 0;
1781
0
        }
1782
0
        LOG(WARNING) << "failed to get job, instance_id=" << instance_id_
1783
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " err=" << err
1784
0
                     << " key=" << hex(job_key);
1785
0
        return -1;
1786
0
    }
1787
1788
3
    TabletJobInfoPB job_pb;
1789
3
    if (!job_pb.ParseFromString(job_val)) {
1790
0
        LOG(WARNING) << "failed to parse job, instance_id=" << instance_id_
1791
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1792
0
        return -1;
1793
0
    }
1794
1795
3
    std::string job_id {};
1796
3
    if (!job_pb.compaction().empty()) {
1797
2
        for (const auto& c : job_pb.compaction()) {
1798
2
            if (c.id() == rowset_meta.job_id()) {
1799
2
                job_id = c.id();
1800
2
                break;
1801
2
            }
1802
2
        }
1803
2
    } else if (job_pb.has_schema_change()) {
1804
1
        job_id = job_pb.schema_change().id();
1805
1
    }
1806
1807
3
    if (!job_id.empty() && rowset_meta.job_id() == job_id) {
1808
3
        LOG(INFO) << "begin to abort job for related rowset, job_id=" << rowset_meta.job_id()
1809
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id();
1810
3
        req.mutable_job()->CopyFrom(job_pb);
1811
3
        req.set_action(FinishTabletJobRequest::ABORT);
1812
3
        _finish_tablet_job(&req, &res, instance_id_, txn, txn_kv_.get(),
1813
3
                           delete_bitmap_lock_white_list_.get(), resource_mgr_.get(), code, msg,
1814
3
                           ss);
1815
3
        if (code != MetaServiceCode::OK) {
1816
0
            LOG(WARNING) << "failed to abort job, instance_id=" << instance_id_
1817
0
                         << " tablet_id=" << tablet_idx.tablet_id() << " code=" << code
1818
0
                         << " msg=" << msg;
1819
0
            return -1;
1820
0
        }
1821
3
        LOG(INFO) << "finish abort job for related rowset, job_id=" << rowset_meta.job_id()
1822
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id()
1823
3
                  << " code=" << code << " msg=" << msg;
1824
3
    } else {
1825
        // clang-format off
1826
0
        LOG(INFO) << "there is no job for related rowset, directly recycle rowset data"
1827
0
                  << ", instance_id=" << instance_id_ 
1828
0
                  << ", tablet_id=" << tablet_idx.tablet_id() 
1829
0
                  << ", job_id=" << job_id
1830
0
                  << ", rowset_id=" << rowset_meta.rowset_id_v2();
1831
        // clang-format on
1832
0
    }
1833
1834
3
    return 0;
1835
3
}
1836
1837
template <typename T>
1838
54.7k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1839
54.7k
    RowsetMetaCloudPB* rs_meta;
1840
54.7k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1841
1842
54.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1843
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1844
        // we do not need to check the job or txn state
1845
        // because tmp_rowset_key already exists when this key is generated.
1846
3.75k
        rowset_type = rowset_meta_pb.type();
1847
3.75k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1848
51.0k
    } else {
1849
51.0k
        rs_meta = &rowset_meta_pb;
1850
51.0k
    }
1851
1852
54.7k
    DCHECK(rs_meta != nullptr);
1853
1854
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1855
    // we need skip them because the related txn has been finished
1856
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1857
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1858
54.7k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1859
51.6k
        if (rs_meta->has_load_id()) {
1860
            // load
1861
2
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1862
51.6k
        } else if (rs_meta->has_job_id()) {
1863
            // compaction / schema change
1864
3
            return abort_job_for_related_rowset(*rs_meta);
1865
3
        }
1866
51.6k
    }
1867
1868
54.7k
    return 0;
1869
54.7k
}
_ZN5doris5cloud16InstanceRecycler28abort_txn_or_job_for_recycleINS0_15RecycleRowsetPBEEEiRT_
Line
Count
Source
1838
3.75k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1839
3.75k
    RowsetMetaCloudPB* rs_meta;
1840
3.75k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1841
1842
3.75k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1843
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1844
        // we do not need to check the job or txn state
1845
        // because tmp_rowset_key already exists when this key is generated.
1846
3.75k
        rowset_type = rowset_meta_pb.type();
1847
3.75k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1848
3.75k
    } else {
1849
3.75k
        rs_meta = &rowset_meta_pb;
1850
3.75k
    }
1851
1852
3.75k
    DCHECK(rs_meta != nullptr);
1853
1854
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1855
    // we need skip them because the related txn has been finished
1856
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1857
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1858
3.75k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1859
652
        if (rs_meta->has_load_id()) {
1860
            // load
1861
1
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1862
651
        } else if (rs_meta->has_job_id()) {
1863
            // compaction / schema change
1864
1
            return abort_job_for_related_rowset(*rs_meta);
1865
1
        }
1866
652
    }
1867
1868
3.75k
    return 0;
1869
3.75k
}
_ZN5doris5cloud16InstanceRecycler28abort_txn_or_job_for_recycleINS_17RowsetMetaCloudPBEEEiRT_
Line
Count
Source
1838
51.0k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1839
51.0k
    RowsetMetaCloudPB* rs_meta;
1840
51.0k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1841
1842
51.0k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1843
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1844
        // we do not need to check the job or txn state
1845
        // because tmp_rowset_key already exists when this key is generated.
1846
51.0k
        rowset_type = rowset_meta_pb.type();
1847
51.0k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1848
51.0k
    } else {
1849
51.0k
        rs_meta = &rowset_meta_pb;
1850
51.0k
    }
1851
1852
51.0k
    DCHECK(rs_meta != nullptr);
1853
1854
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1855
    // we need skip them because the related txn has been finished
1856
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1857
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1858
51.0k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1859
51.0k
        if (rs_meta->has_load_id()) {
1860
            // load
1861
1
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1862
51.0k
        } else if (rs_meta->has_job_id()) {
1863
            // compaction / schema change
1864
2
            return abort_job_for_related_rowset(*rs_meta);
1865
2
        }
1866
51.0k
    }
1867
1868
51.0k
    return 0;
1869
51.0k
}
1870
1871
template <typename T>
1872
int mark_rowset_as_recycled(TxnKv* txn_kv, const std::string& instance_id, std::string_view key,
1873
109k
                            T& rowset_meta_pb) {
1874
109k
    RowsetMetaCloudPB* rs_meta;
1875
1876
109k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1877
102k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1878
102k
    } else {
1879
102k
        rs_meta = &rowset_meta_pb;
1880
102k
    }
1881
1882
109k
    bool need_write_back = false;
1883
109k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1884
54.7k
        need_write_back = true;
1885
54.7k
        rs_meta->set_is_recycled(true);
1886
54.7k
    }
1887
1888
109k
    if (need_write_back) {
1889
54.7k
        std::unique_ptr<Transaction> txn;
1890
54.7k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1891
54.7k
        if (err != TxnErrorCode::TXN_OK) {
1892
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1893
0
            return -1;
1894
0
        }
1895
        // double check becase of new transaction
1896
54.7k
        T rowset_meta;
1897
54.7k
        std::string val;
1898
54.7k
        err = txn->get(key, &val);
1899
54.7k
        if (!rowset_meta.ParseFromString(val)) {
1900
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1901
0
            return -1;
1902
0
        }
1903
54.7k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1904
51.0k
            rs_meta = rowset_meta.mutable_rowset_meta();
1905
51.0k
        } else {
1906
51.0k
            rs_meta = &rowset_meta;
1907
51.0k
        }
1908
54.7k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1909
0
            return 0;
1910
0
        }
1911
54.7k
        rs_meta->set_is_recycled(true);
1912
54.7k
        val.clear();
1913
54.7k
        rowset_meta.SerializeToString(&val);
1914
54.7k
        txn->put(key, val);
1915
54.7k
        err = txn->commit();
1916
54.7k
        if (err != TxnErrorCode::TXN_OK) {
1917
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1918
0
            return -1;
1919
0
        }
1920
54.7k
    }
1921
109k
    return need_write_back ? 1 : 0;
1922
109k
}
_ZN5doris5cloud23mark_rowset_as_recycledINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS8_ERT_
Line
Count
Source
1873
7.50k
                            T& rowset_meta_pb) {
1874
7.50k
    RowsetMetaCloudPB* rs_meta;
1875
1876
7.50k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1877
7.50k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1878
7.50k
    } else {
1879
7.50k
        rs_meta = &rowset_meta_pb;
1880
7.50k
    }
1881
1882
7.50k
    bool need_write_back = false;
1883
7.50k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1884
3.75k
        need_write_back = true;
1885
3.75k
        rs_meta->set_is_recycled(true);
1886
3.75k
    }
1887
1888
7.50k
    if (need_write_back) {
1889
3.75k
        std::unique_ptr<Transaction> txn;
1890
3.75k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1891
3.75k
        if (err != TxnErrorCode::TXN_OK) {
1892
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1893
0
            return -1;
1894
0
        }
1895
        // double check becase of new transaction
1896
3.75k
        T rowset_meta;
1897
3.75k
        std::string val;
1898
3.75k
        err = txn->get(key, &val);
1899
3.75k
        if (!rowset_meta.ParseFromString(val)) {
1900
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1901
0
            return -1;
1902
0
        }
1903
3.75k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1904
3.75k
            rs_meta = rowset_meta.mutable_rowset_meta();
1905
3.75k
        } else {
1906
3.75k
            rs_meta = &rowset_meta;
1907
3.75k
        }
1908
3.75k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1909
0
            return 0;
1910
0
        }
1911
3.75k
        rs_meta->set_is_recycled(true);
1912
3.75k
        val.clear();
1913
3.75k
        rowset_meta.SerializeToString(&val);
1914
3.75k
        txn->put(key, val);
1915
3.75k
        err = txn->commit();
1916
3.75k
        if (err != TxnErrorCode::TXN_OK) {
1917
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1918
0
            return -1;
1919
0
        }
1920
3.75k
    }
1921
7.50k
    return need_write_back ? 1 : 0;
1922
7.50k
}
_ZN5doris5cloud23mark_rowset_as_recycledINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS8_ERT_
Line
Count
Source
1873
102k
                            T& rowset_meta_pb) {
1874
102k
    RowsetMetaCloudPB* rs_meta;
1875
1876
102k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1877
102k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1878
102k
    } else {
1879
102k
        rs_meta = &rowset_meta_pb;
1880
102k
    }
1881
1882
102k
    bool need_write_back = false;
1883
102k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1884
51.0k
        need_write_back = true;
1885
51.0k
        rs_meta->set_is_recycled(true);
1886
51.0k
    }
1887
1888
102k
    if (need_write_back) {
1889
51.0k
        std::unique_ptr<Transaction> txn;
1890
51.0k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1891
51.0k
        if (err != TxnErrorCode::TXN_OK) {
1892
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1893
0
            return -1;
1894
0
        }
1895
        // double check becase of new transaction
1896
51.0k
        T rowset_meta;
1897
51.0k
        std::string val;
1898
51.0k
        err = txn->get(key, &val);
1899
51.0k
        if (!rowset_meta.ParseFromString(val)) {
1900
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1901
0
            return -1;
1902
0
        }
1903
51.0k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1904
51.0k
            rs_meta = rowset_meta.mutable_rowset_meta();
1905
51.0k
        } else {
1906
51.0k
            rs_meta = &rowset_meta;
1907
51.0k
        }
1908
51.0k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1909
0
            return 0;
1910
0
        }
1911
51.0k
        rs_meta->set_is_recycled(true);
1912
51.0k
        val.clear();
1913
51.0k
        rowset_meta.SerializeToString(&val);
1914
51.0k
        txn->put(key, val);
1915
51.0k
        err = txn->commit();
1916
51.0k
        if (err != TxnErrorCode::TXN_OK) {
1917
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1918
0
            return -1;
1919
0
        }
1920
51.0k
    }
1921
102k
    return need_write_back ? 1 : 0;
1922
102k
}
1923
1924
0
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
1925
0
    const std::string task_name = "recycle_ref_rowsets";
1926
0
    int64_t num_scanned = 0;
1927
0
    int64_t num_recycled = 0;
1928
0
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
1929
1930
0
    std::string data_rowset_ref_count_key_start =
1931
0
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
1932
0
    std::string data_rowset_ref_count_key_end =
1933
0
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
1934
1935
0
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
1936
1937
0
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1938
0
    register_recycle_task(task_name, start_time);
1939
1940
0
    DORIS_CLOUD_DEFER {
1941
0
        unregister_recycle_task(task_name);
1942
0
        int64_t cost =
1943
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1944
0
        metrics_context.finish_report();
1945
0
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
1946
0
                .tag("instance_id", instance_id_)
1947
0
                .tag("num_scanned", num_scanned)
1948
0
                .tag("num_recycled", num_recycled);
1949
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
1950
1951
0
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
1952
0
        ++num_scanned;
1953
1954
0
        int64_t tablet_id;
1955
0
        std::string rowset_id;
1956
0
        std::string_view key(k);
1957
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
1958
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
1959
0
            return -1;
1960
0
        }
1961
1962
0
        std::unique_ptr<Transaction> txn;
1963
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1964
0
        if (err != TxnErrorCode::TXN_OK) {
1965
0
            return -1;
1966
0
        }
1967
1968
0
        int64_t ref_count;
1969
0
        if (!txn->decode_atomic_int(v, &ref_count)) {
1970
0
            LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(v));
1971
0
            return -1;
1972
0
        }
1973
0
        if (ref_count > 1) {
1974
0
            *has_unrecycled_rowsets = true;
1975
0
            LOG_INFO("skip recycle ref_count > 1 rowset")
1976
0
                    .tag("instance_id", instance_id_)
1977
0
                    .tag("tablet_id", tablet_id)
1978
0
                    .tag("rowset_id", rowset_id)
1979
0
                    .tag("ref_count", ref_count);
1980
0
            return 0;
1981
0
        }
1982
1983
0
        std::string meta_rowset_key =
1984
0
                versioned::meta_rowset_key({instance_id_, tablet_id, rowset_id});
1985
0
        ValueBuf val_buf;
1986
0
        err = blob_get(txn.get(), meta_rowset_key, &val_buf);
1987
0
        if (err != TxnErrorCode::TXN_OK) {
1988
0
            LOG_WARNING("failed to get meta_rowset_key")
1989
0
                    .tag("instance_id", instance_id_)
1990
0
                    .tag("tablet_id", tablet_id)
1991
0
                    .tag("rowset_id", rowset_id)
1992
0
                    .tag("key", hex(meta_rowset_key))
1993
0
                    .tag("err", err);
1994
0
            return -1;
1995
0
        }
1996
0
        doris::RowsetMetaCloudPB rowset_meta;
1997
0
        if (!val_buf.to_pb(&rowset_meta)) {
1998
0
            LOG_WARNING("failed to parse RowsetMetaCloudPB")
1999
0
                    .tag("instance_id", instance_id_)
2000
0
                    .tag("tablet_id", tablet_id)
2001
0
                    .tag("rowset_id", rowset_id)
2002
0
                    .tag("key", hex(meta_rowset_key));
2003
0
            return -1;
2004
0
        }
2005
0
        int64_t start_version = rowset_meta.start_version();
2006
0
        int64_t end_version = rowset_meta.end_version();
2007
2008
        // Check if the meta_rowset_compact_key or meta_rowset_load_key exists:
2009
        // exists: means it's referenced by current instance, can recycle rowset;
2010
        // not exists: means it's referenced by other instances, cannot recycle;
2011
        //
2012
        // end_version = 1: the first rowset;
2013
        // end_version = 0: the rowset is committed by load, but not commit_txn;
2014
        // can recycle in these 2 situations
2015
0
        bool exist = false;
2016
0
        if (end_version > 1) {
2017
0
            if (start_version != end_version) {
2018
0
                if (get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
2019
0
                                        start_version, end_version, false, &exist) != 0) {
2020
0
                    return -1;
2021
0
                }
2022
0
            } else {
2023
0
                if (get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
2024
0
                                        start_version, end_version, true, &exist) != 0) {
2025
0
                    return -1;
2026
0
                }
2027
0
                if (!exist && get_meta_rowset_key(txn.get(), instance_id_, tablet_id, rowset_id,
2028
0
                                                  start_version, end_version, false, &exist) != 0) {
2029
0
                    return -1;
2030
0
                }
2031
0
            }
2032
0
        }
2033
2034
0
        if (end_version > 1 && !exist) {
2035
0
            *has_unrecycled_rowsets = true;
2036
0
            LOG_INFO("skip recycle ref_count = 1 rowset")
2037
0
                    .tag("instance_id", instance_id_)
2038
0
                    .tag("tablet_id", tablet_id)
2039
0
                    .tag("rowset_id", rowset_id)
2040
0
                    .tag("start_version", start_version)
2041
0
                    .tag("end_version", end_version)
2042
0
                    .tag("ref_count", ref_count);
2043
0
            return 0;
2044
0
        }
2045
2046
0
        if (recycle_rowset_meta_and_data("", rowset_meta) != 0) {
2047
0
            LOG_WARNING("failed to recycle_rowset_meta_and_data")
2048
0
                    .tag("instance_id", instance_id_)
2049
0
                    .tag("tablet_id", tablet_id)
2050
0
                    .tag("rowset_id", rowset_id);
2051
0
            return -1;
2052
0
        }
2053
2054
0
        ++num_recycled;
2055
0
        return 0;
2056
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
2057
2058
    // recycle_func and loop_done for scan and recycle
2059
0
    return scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
2060
0
                            std::move(recycle_func));
2061
0
}
2062
2063
17
int InstanceRecycler::recycle_indexes() {
2064
17
    const std::string task_name = "recycle_indexes";
2065
17
    int64_t num_scanned = 0;
2066
17
    int64_t num_expired = 0;
2067
17
    int64_t num_recycled = 0;
2068
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2069
2070
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
2071
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
2072
17
    std::string index_key0;
2073
17
    std::string index_key1;
2074
17
    recycle_index_key(index_key_info0, &index_key0);
2075
17
    recycle_index_key(index_key_info1, &index_key1);
2076
2077
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
2078
2079
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2080
17
    register_recycle_task(task_name, start_time);
2081
2082
17
    DORIS_CLOUD_DEFER {
2083
17
        unregister_recycle_task(task_name);
2084
17
        int64_t cost =
2085
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2086
17
        metrics_context.finish_report();
2087
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2088
17
                .tag("instance_id", instance_id_)
2089
17
                .tag("num_scanned", num_scanned)
2090
17
                .tag("num_expired", num_expired)
2091
17
                .tag("num_recycled", num_recycled);
2092
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2082
2
    DORIS_CLOUD_DEFER {
2083
2
        unregister_recycle_task(task_name);
2084
2
        int64_t cost =
2085
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2086
2
        metrics_context.finish_report();
2087
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2088
2
                .tag("instance_id", instance_id_)
2089
2
                .tag("num_scanned", num_scanned)
2090
2
                .tag("num_expired", num_expired)
2091
2
                .tag("num_recycled", num_recycled);
2092
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2082
15
    DORIS_CLOUD_DEFER {
2083
15
        unregister_recycle_task(task_name);
2084
15
        int64_t cost =
2085
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2086
15
        metrics_context.finish_report();
2087
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2088
15
                .tag("instance_id", instance_id_)
2089
15
                .tag("num_scanned", num_scanned)
2090
15
                .tag("num_expired", num_expired)
2091
15
                .tag("num_recycled", num_recycled);
2092
15
    };
2093
2094
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2095
2096
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
2097
17
    std::vector<std::string_view> index_keys;
2098
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2099
10
        ++num_scanned;
2100
10
        RecycleIndexPB index_pb;
2101
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2102
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2103
0
            return -1;
2104
0
        }
2105
10
        int64_t current_time = ::time(nullptr);
2106
10
        if (current_time <
2107
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2108
0
            return 0;
2109
0
        }
2110
10
        ++num_expired;
2111
        // decode index_id
2112
10
        auto k1 = k;
2113
10
        k1.remove_prefix(1);
2114
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2115
10
        decode_key(&k1, &out);
2116
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2117
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2118
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2119
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2120
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2121
        // Change state to RECYCLING
2122
10
        std::unique_ptr<Transaction> txn;
2123
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2124
10
        if (err != TxnErrorCode::TXN_OK) {
2125
0
            LOG_WARNING("failed to create txn").tag("err", err);
2126
0
            return -1;
2127
0
        }
2128
10
        std::string val;
2129
10
        err = txn->get(k, &val);
2130
10
        if (err ==
2131
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2132
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2133
0
            return 0;
2134
0
        }
2135
10
        if (err != TxnErrorCode::TXN_OK) {
2136
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2137
0
            return -1;
2138
0
        }
2139
10
        index_pb.Clear();
2140
10
        if (!index_pb.ParseFromString(val)) {
2141
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2142
0
            return -1;
2143
0
        }
2144
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2145
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2146
9
            txn->put(k, index_pb.SerializeAsString());
2147
9
            err = txn->commit();
2148
9
            if (err != TxnErrorCode::TXN_OK) {
2149
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2150
0
                return -1;
2151
0
            }
2152
9
        }
2153
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2154
1
            LOG_WARNING("failed to recycle tablets under index")
2155
1
                    .tag("table_id", index_pb.table_id())
2156
1
                    .tag("instance_id", instance_id_)
2157
1
                    .tag("index_id", index_id);
2158
1
            return -1;
2159
1
        }
2160
2161
9
        if (index_pb.has_db_id()) {
2162
            // Recycle the versioned keys
2163
3
            std::unique_ptr<Transaction> txn;
2164
3
            err = txn_kv_->create_txn(&txn);
2165
3
            if (err != TxnErrorCode::TXN_OK) {
2166
0
                LOG_WARNING("failed to create txn").tag("err", err);
2167
0
                return -1;
2168
0
            }
2169
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2170
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2171
3
            std::string index_inverted_key = versioned::index_inverted_key(
2172
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2173
3
            versioned_remove_all(txn.get(), meta_key);
2174
3
            txn->remove(index_key);
2175
3
            txn->remove(index_inverted_key);
2176
3
            err = txn->commit();
2177
3
            if (err != TxnErrorCode::TXN_OK) {
2178
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2179
0
                return -1;
2180
0
            }
2181
3
        }
2182
2183
9
        metrics_context.total_recycled_num = ++num_recycled;
2184
9
        metrics_context.report();
2185
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2186
9
        index_keys.push_back(k);
2187
9
        return 0;
2188
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2098
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2099
2
        ++num_scanned;
2100
2
        RecycleIndexPB index_pb;
2101
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2102
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2103
0
            return -1;
2104
0
        }
2105
2
        int64_t current_time = ::time(nullptr);
2106
2
        if (current_time <
2107
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2108
0
            return 0;
2109
0
        }
2110
2
        ++num_expired;
2111
        // decode index_id
2112
2
        auto k1 = k;
2113
2
        k1.remove_prefix(1);
2114
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2115
2
        decode_key(&k1, &out);
2116
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2117
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2118
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2119
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2120
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2121
        // Change state to RECYCLING
2122
2
        std::unique_ptr<Transaction> txn;
2123
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2124
2
        if (err != TxnErrorCode::TXN_OK) {
2125
0
            LOG_WARNING("failed to create txn").tag("err", err);
2126
0
            return -1;
2127
0
        }
2128
2
        std::string val;
2129
2
        err = txn->get(k, &val);
2130
2
        if (err ==
2131
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2132
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2133
0
            return 0;
2134
0
        }
2135
2
        if (err != TxnErrorCode::TXN_OK) {
2136
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2137
0
            return -1;
2138
0
        }
2139
2
        index_pb.Clear();
2140
2
        if (!index_pb.ParseFromString(val)) {
2141
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2142
0
            return -1;
2143
0
        }
2144
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2145
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2146
1
            txn->put(k, index_pb.SerializeAsString());
2147
1
            err = txn->commit();
2148
1
            if (err != TxnErrorCode::TXN_OK) {
2149
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2150
0
                return -1;
2151
0
            }
2152
1
        }
2153
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2154
1
            LOG_WARNING("failed to recycle tablets under index")
2155
1
                    .tag("table_id", index_pb.table_id())
2156
1
                    .tag("instance_id", instance_id_)
2157
1
                    .tag("index_id", index_id);
2158
1
            return -1;
2159
1
        }
2160
2161
1
        if (index_pb.has_db_id()) {
2162
            // Recycle the versioned keys
2163
1
            std::unique_ptr<Transaction> txn;
2164
1
            err = txn_kv_->create_txn(&txn);
2165
1
            if (err != TxnErrorCode::TXN_OK) {
2166
0
                LOG_WARNING("failed to create txn").tag("err", err);
2167
0
                return -1;
2168
0
            }
2169
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2170
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2171
1
            std::string index_inverted_key = versioned::index_inverted_key(
2172
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2173
1
            versioned_remove_all(txn.get(), meta_key);
2174
1
            txn->remove(index_key);
2175
1
            txn->remove(index_inverted_key);
2176
1
            err = txn->commit();
2177
1
            if (err != TxnErrorCode::TXN_OK) {
2178
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2179
0
                return -1;
2180
0
            }
2181
1
        }
2182
2183
1
        metrics_context.total_recycled_num = ++num_recycled;
2184
1
        metrics_context.report();
2185
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2186
1
        index_keys.push_back(k);
2187
1
        return 0;
2188
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2098
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2099
8
        ++num_scanned;
2100
8
        RecycleIndexPB index_pb;
2101
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2102
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2103
0
            return -1;
2104
0
        }
2105
8
        int64_t current_time = ::time(nullptr);
2106
8
        if (current_time <
2107
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2108
0
            return 0;
2109
0
        }
2110
8
        ++num_expired;
2111
        // decode index_id
2112
8
        auto k1 = k;
2113
8
        k1.remove_prefix(1);
2114
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2115
8
        decode_key(&k1, &out);
2116
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2117
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2118
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2119
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2120
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2121
        // Change state to RECYCLING
2122
8
        std::unique_ptr<Transaction> txn;
2123
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2124
8
        if (err != TxnErrorCode::TXN_OK) {
2125
0
            LOG_WARNING("failed to create txn").tag("err", err);
2126
0
            return -1;
2127
0
        }
2128
8
        std::string val;
2129
8
        err = txn->get(k, &val);
2130
8
        if (err ==
2131
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2132
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2133
0
            return 0;
2134
0
        }
2135
8
        if (err != TxnErrorCode::TXN_OK) {
2136
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2137
0
            return -1;
2138
0
        }
2139
8
        index_pb.Clear();
2140
8
        if (!index_pb.ParseFromString(val)) {
2141
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2142
0
            return -1;
2143
0
        }
2144
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2145
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2146
8
            txn->put(k, index_pb.SerializeAsString());
2147
8
            err = txn->commit();
2148
8
            if (err != TxnErrorCode::TXN_OK) {
2149
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2150
0
                return -1;
2151
0
            }
2152
8
        }
2153
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2154
0
            LOG_WARNING("failed to recycle tablets under index")
2155
0
                    .tag("table_id", index_pb.table_id())
2156
0
                    .tag("instance_id", instance_id_)
2157
0
                    .tag("index_id", index_id);
2158
0
            return -1;
2159
0
        }
2160
2161
8
        if (index_pb.has_db_id()) {
2162
            // Recycle the versioned keys
2163
2
            std::unique_ptr<Transaction> txn;
2164
2
            err = txn_kv_->create_txn(&txn);
2165
2
            if (err != TxnErrorCode::TXN_OK) {
2166
0
                LOG_WARNING("failed to create txn").tag("err", err);
2167
0
                return -1;
2168
0
            }
2169
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2170
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2171
2
            std::string index_inverted_key = versioned::index_inverted_key(
2172
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2173
2
            versioned_remove_all(txn.get(), meta_key);
2174
2
            txn->remove(index_key);
2175
2
            txn->remove(index_inverted_key);
2176
2
            err = txn->commit();
2177
2
            if (err != TxnErrorCode::TXN_OK) {
2178
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2179
0
                return -1;
2180
0
            }
2181
2
        }
2182
2183
8
        metrics_context.total_recycled_num = ++num_recycled;
2184
8
        metrics_context.report();
2185
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2186
8
        index_keys.push_back(k);
2187
8
        return 0;
2188
8
    };
2189
2190
17
    auto loop_done = [&index_keys, this]() -> int {
2191
6
        if (index_keys.empty()) return 0;
2192
5
        DORIS_CLOUD_DEFER {
2193
5
            index_keys.clear();
2194
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2192
1
        DORIS_CLOUD_DEFER {
2193
1
            index_keys.clear();
2194
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2192
4
        DORIS_CLOUD_DEFER {
2193
4
            index_keys.clear();
2194
4
        };
2195
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2196
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2197
0
            return -1;
2198
0
        }
2199
5
        return 0;
2200
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2190
2
    auto loop_done = [&index_keys, this]() -> int {
2191
2
        if (index_keys.empty()) return 0;
2192
1
        DORIS_CLOUD_DEFER {
2193
1
            index_keys.clear();
2194
1
        };
2195
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2196
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2197
0
            return -1;
2198
0
        }
2199
1
        return 0;
2200
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2190
4
    auto loop_done = [&index_keys, this]() -> int {
2191
4
        if (index_keys.empty()) return 0;
2192
4
        DORIS_CLOUD_DEFER {
2193
4
            index_keys.clear();
2194
4
        };
2195
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2196
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2197
0
            return -1;
2198
0
        }
2199
4
        return 0;
2200
4
    };
2201
2202
17
    if (config::enable_recycler_stats_metrics) {
2203
0
        scan_and_statistics_indexes();
2204
0
    }
2205
    // recycle_func and loop_done for scan and recycle
2206
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
2207
17
}
2208
2209
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
2210
8.24k
                             int64_t tablet_id) {
2211
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
2212
2213
8.24k
    std::unique_ptr<Transaction> txn;
2214
8.24k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2215
8.24k
    if (err != TxnErrorCode::TXN_OK) {
2216
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
2217
0
                     << " tablet_id=" << tablet_id << " err=" << err;
2218
0
        return false;
2219
0
    }
2220
2221
8.24k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
2222
8.24k
    std::string tablet_idx_val;
2223
8.24k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
2224
8.24k
    if (TxnErrorCode::TXN_OK != err) {
2225
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
2226
0
                     << " tablet_id=" << tablet_id << " err=" << err
2227
0
                     << " key=" << hex(tablet_idx_key);
2228
0
        return false;
2229
0
    }
2230
2231
8.24k
    TabletIndexPB tablet_idx_pb;
2232
8.24k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
2233
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
2234
0
                     << " tablet_id=" << tablet_id;
2235
0
        return false;
2236
0
    }
2237
2238
8.24k
    if (!tablet_idx_pb.has_db_id()) {
2239
        // In the previous version, the db_id was not set in the index_pb.
2240
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2241
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
2242
0
                  << " instance_id=" << instance_id
2243
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
2244
0
        return true;
2245
0
    }
2246
2247
8.24k
    std::string ver_val;
2248
8.24k
    std::string ver_key =
2249
8.24k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
2250
8.24k
                                   tablet_idx_pb.partition_id()});
2251
8.24k
    err = txn->get(ver_key, &ver_val);
2252
2253
8.24k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2254
204
        LOG(INFO) << ""
2255
204
                     "partition version not found, instance_id="
2256
204
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
2257
204
                  << " table_id=" << tablet_idx_pb.table_id()
2258
204
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
2259
204
                  << " key=" << hex(ver_key);
2260
204
        return true;
2261
204
    }
2262
2263
8.03k
    if (TxnErrorCode::TXN_OK != err) {
2264
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
2265
0
                     << " db_id=" << tablet_idx_pb.db_id()
2266
0
                     << " table_id=" << tablet_idx_pb.table_id()
2267
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2268
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
2269
0
        return false;
2270
0
    }
2271
2272
8.03k
    VersionPB version_pb;
2273
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
2274
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
2275
0
                     << " db_id=" << tablet_idx_pb.db_id()
2276
0
                     << " table_id=" << tablet_idx_pb.table_id()
2277
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2278
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
2279
0
        return false;
2280
0
    }
2281
2282
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
2283
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
2284
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
2285
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
2286
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
2287
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
2288
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
2289
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
2290
4.00k
                     << " key=" << hex(ver_key);
2291
4.00k
        return false;
2292
4.00k
    }
2293
4.03k
    return true;
2294
8.03k
}
2295
2296
15
int InstanceRecycler::recycle_partitions() {
2297
15
    const std::string task_name = "recycle_partitions";
2298
15
    int64_t num_scanned = 0;
2299
15
    int64_t num_expired = 0;
2300
15
    int64_t num_recycled = 0;
2301
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2302
2303
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
2304
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
2305
15
    std::string part_key0;
2306
15
    std::string part_key1;
2307
15
    recycle_partition_key(part_key_info0, &part_key0);
2308
15
    recycle_partition_key(part_key_info1, &part_key1);
2309
2310
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
2311
2312
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2313
15
    register_recycle_task(task_name, start_time);
2314
2315
15
    DORIS_CLOUD_DEFER {
2316
15
        unregister_recycle_task(task_name);
2317
15
        int64_t cost =
2318
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2319
15
        metrics_context.finish_report();
2320
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2321
15
                .tag("instance_id", instance_id_)
2322
15
                .tag("num_scanned", num_scanned)
2323
15
                .tag("num_expired", num_expired)
2324
15
                .tag("num_recycled", num_recycled);
2325
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2315
2
    DORIS_CLOUD_DEFER {
2316
2
        unregister_recycle_task(task_name);
2317
2
        int64_t cost =
2318
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2319
2
        metrics_context.finish_report();
2320
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2321
2
                .tag("instance_id", instance_id_)
2322
2
                .tag("num_scanned", num_scanned)
2323
2
                .tag("num_expired", num_expired)
2324
2
                .tag("num_recycled", num_recycled);
2325
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2315
13
    DORIS_CLOUD_DEFER {
2316
13
        unregister_recycle_task(task_name);
2317
13
        int64_t cost =
2318
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2319
13
        metrics_context.finish_report();
2320
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2321
13
                .tag("instance_id", instance_id_)
2322
13
                .tag("num_scanned", num_scanned)
2323
13
                .tag("num_expired", num_expired)
2324
13
                .tag("num_recycled", num_recycled);
2325
13
    };
2326
2327
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2328
2329
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
2330
15
    std::vector<std::string_view> partition_keys;
2331
15
    std::vector<std::string> partition_version_keys;
2332
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2333
9
        ++num_scanned;
2334
9
        RecyclePartitionPB part_pb;
2335
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2336
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2337
0
            return -1;
2338
0
        }
2339
9
        int64_t current_time = ::time(nullptr);
2340
9
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2341
9
                                                            &earlest_ts)) { // not expired
2342
0
            return 0;
2343
0
        }
2344
9
        ++num_expired;
2345
        // decode partition_id
2346
9
        auto k1 = k;
2347
9
        k1.remove_prefix(1);
2348
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2349
9
        decode_key(&k1, &out);
2350
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2351
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2352
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2353
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2354
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2355
        // Change state to RECYCLING
2356
9
        std::unique_ptr<Transaction> txn;
2357
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2358
9
        if (err != TxnErrorCode::TXN_OK) {
2359
0
            LOG_WARNING("failed to create txn").tag("err", err);
2360
0
            return -1;
2361
0
        }
2362
9
        std::string val;
2363
9
        err = txn->get(k, &val);
2364
9
        if (err ==
2365
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2366
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2367
0
            return 0;
2368
0
        }
2369
9
        if (err != TxnErrorCode::TXN_OK) {
2370
0
            LOG_WARNING("failed to get kv");
2371
0
            return -1;
2372
0
        }
2373
9
        part_pb.Clear();
2374
9
        if (!part_pb.ParseFromString(val)) {
2375
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2376
0
            return -1;
2377
0
        }
2378
        // Partitions with PREPARED state MUST have no data
2379
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2380
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2381
8
            txn->put(k, part_pb.SerializeAsString());
2382
8
            err = txn->commit();
2383
8
            if (err != TxnErrorCode::TXN_OK) {
2384
0
                LOG_WARNING("failed to commit txn: {}", err);
2385
0
                return -1;
2386
0
            }
2387
8
        }
2388
2389
9
        int ret = 0;
2390
33
        for (int64_t index_id : part_pb.index_id()) {
2391
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2392
1
                LOG_WARNING("failed to recycle tablets under partition")
2393
1
                        .tag("table_id", part_pb.table_id())
2394
1
                        .tag("instance_id", instance_id_)
2395
1
                        .tag("index_id", index_id)
2396
1
                        .tag("partition_id", partition_id);
2397
1
                ret = -1;
2398
1
            }
2399
33
        }
2400
9
        if (ret == 0 && part_pb.has_db_id()) {
2401
            // Recycle the versioned keys
2402
8
            std::unique_ptr<Transaction> txn;
2403
8
            err = txn_kv_->create_txn(&txn);
2404
8
            if (err != TxnErrorCode::TXN_OK) {
2405
0
                LOG_WARNING("failed to create txn").tag("err", err);
2406
0
                return -1;
2407
0
            }
2408
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2409
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2410
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2411
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2412
8
            std::string partition_version_key =
2413
8
                    versioned::partition_version_key({instance_id_, partition_id});
2414
8
            versioned_remove_all(txn.get(), meta_key);
2415
8
            txn->remove(index_key);
2416
8
            txn->remove(inverted_index_key);
2417
8
            versioned_remove_all(txn.get(), partition_version_key);
2418
8
            err = txn->commit();
2419
8
            if (err != TxnErrorCode::TXN_OK) {
2420
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2421
0
                return -1;
2422
0
            }
2423
8
        }
2424
2425
9
        if (ret == 0) {
2426
8
            ++num_recycled;
2427
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2428
8
            partition_keys.push_back(k);
2429
8
            if (part_pb.db_id() > 0) {
2430
8
                partition_version_keys.push_back(partition_version_key(
2431
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2432
8
            }
2433
8
            metrics_context.total_recycled_num = num_recycled;
2434
8
            metrics_context.report();
2435
8
        }
2436
9
        return ret;
2437
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2332
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2333
2
        ++num_scanned;
2334
2
        RecyclePartitionPB part_pb;
2335
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2336
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2337
0
            return -1;
2338
0
        }
2339
2
        int64_t current_time = ::time(nullptr);
2340
2
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2341
2
                                                            &earlest_ts)) { // not expired
2342
0
            return 0;
2343
0
        }
2344
2
        ++num_expired;
2345
        // decode partition_id
2346
2
        auto k1 = k;
2347
2
        k1.remove_prefix(1);
2348
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2349
2
        decode_key(&k1, &out);
2350
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2351
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2352
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2353
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2354
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2355
        // Change state to RECYCLING
2356
2
        std::unique_ptr<Transaction> txn;
2357
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2358
2
        if (err != TxnErrorCode::TXN_OK) {
2359
0
            LOG_WARNING("failed to create txn").tag("err", err);
2360
0
            return -1;
2361
0
        }
2362
2
        std::string val;
2363
2
        err = txn->get(k, &val);
2364
2
        if (err ==
2365
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2366
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2367
0
            return 0;
2368
0
        }
2369
2
        if (err != TxnErrorCode::TXN_OK) {
2370
0
            LOG_WARNING("failed to get kv");
2371
0
            return -1;
2372
0
        }
2373
2
        part_pb.Clear();
2374
2
        if (!part_pb.ParseFromString(val)) {
2375
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2376
0
            return -1;
2377
0
        }
2378
        // Partitions with PREPARED state MUST have no data
2379
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2380
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2381
1
            txn->put(k, part_pb.SerializeAsString());
2382
1
            err = txn->commit();
2383
1
            if (err != TxnErrorCode::TXN_OK) {
2384
0
                LOG_WARNING("failed to commit txn: {}", err);
2385
0
                return -1;
2386
0
            }
2387
1
        }
2388
2389
2
        int ret = 0;
2390
2
        for (int64_t index_id : part_pb.index_id()) {
2391
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2392
1
                LOG_WARNING("failed to recycle tablets under partition")
2393
1
                        .tag("table_id", part_pb.table_id())
2394
1
                        .tag("instance_id", instance_id_)
2395
1
                        .tag("index_id", index_id)
2396
1
                        .tag("partition_id", partition_id);
2397
1
                ret = -1;
2398
1
            }
2399
2
        }
2400
2
        if (ret == 0 && part_pb.has_db_id()) {
2401
            // Recycle the versioned keys
2402
1
            std::unique_ptr<Transaction> txn;
2403
1
            err = txn_kv_->create_txn(&txn);
2404
1
            if (err != TxnErrorCode::TXN_OK) {
2405
0
                LOG_WARNING("failed to create txn").tag("err", err);
2406
0
                return -1;
2407
0
            }
2408
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2409
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2410
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2411
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2412
1
            std::string partition_version_key =
2413
1
                    versioned::partition_version_key({instance_id_, partition_id});
2414
1
            versioned_remove_all(txn.get(), meta_key);
2415
1
            txn->remove(index_key);
2416
1
            txn->remove(inverted_index_key);
2417
1
            versioned_remove_all(txn.get(), partition_version_key);
2418
1
            err = txn->commit();
2419
1
            if (err != TxnErrorCode::TXN_OK) {
2420
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2421
0
                return -1;
2422
0
            }
2423
1
        }
2424
2425
2
        if (ret == 0) {
2426
1
            ++num_recycled;
2427
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2428
1
            partition_keys.push_back(k);
2429
1
            if (part_pb.db_id() > 0) {
2430
1
                partition_version_keys.push_back(partition_version_key(
2431
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2432
1
            }
2433
1
            metrics_context.total_recycled_num = num_recycled;
2434
1
            metrics_context.report();
2435
1
        }
2436
2
        return ret;
2437
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2332
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2333
7
        ++num_scanned;
2334
7
        RecyclePartitionPB part_pb;
2335
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2336
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2337
0
            return -1;
2338
0
        }
2339
7
        int64_t current_time = ::time(nullptr);
2340
7
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2341
7
                                                            &earlest_ts)) { // not expired
2342
0
            return 0;
2343
0
        }
2344
7
        ++num_expired;
2345
        // decode partition_id
2346
7
        auto k1 = k;
2347
7
        k1.remove_prefix(1);
2348
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2349
7
        decode_key(&k1, &out);
2350
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2351
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2352
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2353
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2354
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2355
        // Change state to RECYCLING
2356
7
        std::unique_ptr<Transaction> txn;
2357
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2358
7
        if (err != TxnErrorCode::TXN_OK) {
2359
0
            LOG_WARNING("failed to create txn").tag("err", err);
2360
0
            return -1;
2361
0
        }
2362
7
        std::string val;
2363
7
        err = txn->get(k, &val);
2364
7
        if (err ==
2365
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2366
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2367
0
            return 0;
2368
0
        }
2369
7
        if (err != TxnErrorCode::TXN_OK) {
2370
0
            LOG_WARNING("failed to get kv");
2371
0
            return -1;
2372
0
        }
2373
7
        part_pb.Clear();
2374
7
        if (!part_pb.ParseFromString(val)) {
2375
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2376
0
            return -1;
2377
0
        }
2378
        // Partitions with PREPARED state MUST have no data
2379
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2380
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2381
7
            txn->put(k, part_pb.SerializeAsString());
2382
7
            err = txn->commit();
2383
7
            if (err != TxnErrorCode::TXN_OK) {
2384
0
                LOG_WARNING("failed to commit txn: {}", err);
2385
0
                return -1;
2386
0
            }
2387
7
        }
2388
2389
7
        int ret = 0;
2390
31
        for (int64_t index_id : part_pb.index_id()) {
2391
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2392
0
                LOG_WARNING("failed to recycle tablets under partition")
2393
0
                        .tag("table_id", part_pb.table_id())
2394
0
                        .tag("instance_id", instance_id_)
2395
0
                        .tag("index_id", index_id)
2396
0
                        .tag("partition_id", partition_id);
2397
0
                ret = -1;
2398
0
            }
2399
31
        }
2400
7
        if (ret == 0 && part_pb.has_db_id()) {
2401
            // Recycle the versioned keys
2402
7
            std::unique_ptr<Transaction> txn;
2403
7
            err = txn_kv_->create_txn(&txn);
2404
7
            if (err != TxnErrorCode::TXN_OK) {
2405
0
                LOG_WARNING("failed to create txn").tag("err", err);
2406
0
                return -1;
2407
0
            }
2408
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2409
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2410
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2411
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2412
7
            std::string partition_version_key =
2413
7
                    versioned::partition_version_key({instance_id_, partition_id});
2414
7
            versioned_remove_all(txn.get(), meta_key);
2415
7
            txn->remove(index_key);
2416
7
            txn->remove(inverted_index_key);
2417
7
            versioned_remove_all(txn.get(), partition_version_key);
2418
7
            err = txn->commit();
2419
7
            if (err != TxnErrorCode::TXN_OK) {
2420
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2421
0
                return -1;
2422
0
            }
2423
7
        }
2424
2425
7
        if (ret == 0) {
2426
7
            ++num_recycled;
2427
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2428
7
            partition_keys.push_back(k);
2429
7
            if (part_pb.db_id() > 0) {
2430
7
                partition_version_keys.push_back(partition_version_key(
2431
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2432
7
            }
2433
7
            metrics_context.total_recycled_num = num_recycled;
2434
7
            metrics_context.report();
2435
7
        }
2436
7
        return ret;
2437
7
    };
2438
2439
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2440
5
        if (partition_keys.empty()) return 0;
2441
4
        DORIS_CLOUD_DEFER {
2442
4
            partition_keys.clear();
2443
4
            partition_version_keys.clear();
2444
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2441
1
        DORIS_CLOUD_DEFER {
2442
1
            partition_keys.clear();
2443
1
            partition_version_keys.clear();
2444
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2441
3
        DORIS_CLOUD_DEFER {
2442
3
            partition_keys.clear();
2443
3
            partition_version_keys.clear();
2444
3
        };
2445
4
        std::unique_ptr<Transaction> txn;
2446
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2447
4
        if (err != TxnErrorCode::TXN_OK) {
2448
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2449
0
            return -1;
2450
0
        }
2451
8
        for (auto& k : partition_keys) {
2452
8
            txn->remove(k);
2453
8
        }
2454
8
        for (auto& k : partition_version_keys) {
2455
8
            txn->remove(k);
2456
8
        }
2457
4
        err = txn->commit();
2458
4
        if (err != TxnErrorCode::TXN_OK) {
2459
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2460
0
                         << " err=" << err;
2461
0
            return -1;
2462
0
        }
2463
4
        return 0;
2464
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2439
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2440
2
        if (partition_keys.empty()) return 0;
2441
1
        DORIS_CLOUD_DEFER {
2442
1
            partition_keys.clear();
2443
1
            partition_version_keys.clear();
2444
1
        };
2445
1
        std::unique_ptr<Transaction> txn;
2446
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2447
1
        if (err != TxnErrorCode::TXN_OK) {
2448
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2449
0
            return -1;
2450
0
        }
2451
1
        for (auto& k : partition_keys) {
2452
1
            txn->remove(k);
2453
1
        }
2454
1
        for (auto& k : partition_version_keys) {
2455
1
            txn->remove(k);
2456
1
        }
2457
1
        err = txn->commit();
2458
1
        if (err != TxnErrorCode::TXN_OK) {
2459
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2460
0
                         << " err=" << err;
2461
0
            return -1;
2462
0
        }
2463
1
        return 0;
2464
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2439
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2440
3
        if (partition_keys.empty()) return 0;
2441
3
        DORIS_CLOUD_DEFER {
2442
3
            partition_keys.clear();
2443
3
            partition_version_keys.clear();
2444
3
        };
2445
3
        std::unique_ptr<Transaction> txn;
2446
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2447
3
        if (err != TxnErrorCode::TXN_OK) {
2448
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2449
0
            return -1;
2450
0
        }
2451
7
        for (auto& k : partition_keys) {
2452
7
            txn->remove(k);
2453
7
        }
2454
7
        for (auto& k : partition_version_keys) {
2455
7
            txn->remove(k);
2456
7
        }
2457
3
        err = txn->commit();
2458
3
        if (err != TxnErrorCode::TXN_OK) {
2459
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2460
0
                         << " err=" << err;
2461
0
            return -1;
2462
0
        }
2463
3
        return 0;
2464
3
    };
2465
2466
15
    if (config::enable_recycler_stats_metrics) {
2467
0
        scan_and_statistics_partitions();
2468
0
    }
2469
    // recycle_func and loop_done for scan and recycle
2470
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2471
15
}
2472
2473
14
int InstanceRecycler::recycle_versions() {
2474
14
    if (should_recycle_versioned_keys()) {
2475
2
        return recycle_orphan_partitions();
2476
2
    }
2477
2478
12
    int64_t num_scanned = 0;
2479
12
    int64_t num_recycled = 0;
2480
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2481
2482
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2483
2484
12
    auto start_time = steady_clock::now();
2485
2486
12
    DORIS_CLOUD_DEFER {
2487
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2488
12
        metrics_context.finish_report();
2489
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2490
12
                .tag("instance_id", instance_id_)
2491
12
                .tag("num_scanned", num_scanned)
2492
12
                .tag("num_recycled", num_recycled);
2493
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2486
12
    DORIS_CLOUD_DEFER {
2487
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2488
12
        metrics_context.finish_report();
2489
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2490
12
                .tag("instance_id", instance_id_)
2491
12
                .tag("num_scanned", num_scanned)
2492
12
                .tag("num_recycled", num_recycled);
2493
12
    };
2494
2495
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2496
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2497
12
    int64_t last_scanned_table_id = 0;
2498
12
    bool is_recycled = false; // Is last scanned kv recycled
2499
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2500
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2501
2
        ++num_scanned;
2502
2
        auto k1 = k;
2503
2
        k1.remove_prefix(1);
2504
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2505
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2506
2
        decode_key(&k1, &out);
2507
2
        DCHECK_EQ(out.size(), 6) << k;
2508
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2509
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2510
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2511
0
            return 0;
2512
0
        }
2513
2
        last_scanned_table_id = table_id;
2514
2
        is_recycled = false;
2515
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2516
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2517
2
        std::unique_ptr<Transaction> txn;
2518
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2519
2
        if (err != TxnErrorCode::TXN_OK) {
2520
0
            return -1;
2521
0
        }
2522
2
        std::unique_ptr<RangeGetIterator> iter;
2523
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2524
2
        if (err != TxnErrorCode::TXN_OK) {
2525
0
            return -1;
2526
0
        }
2527
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2528
1
            return 0;
2529
1
        }
2530
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2531
        // 1. Remove all partition version kvs of this table
2532
1
        auto partition_version_key_begin =
2533
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2534
1
        auto partition_version_key_end =
2535
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2536
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2537
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2538
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2539
1
                     << " table_id=" << table_id;
2540
        // 2. Remove the table version kv of this table
2541
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2542
1
        txn->remove(tbl_version_key);
2543
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2544
        // 3. Remove mow delete bitmap update lock and tablet job lock
2545
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2546
1
        txn->remove(lock_key);
2547
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2548
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2549
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2550
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2551
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2552
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2553
1
                     << " table_id=" << table_id;
2554
1
        err = txn->commit();
2555
1
        if (err != TxnErrorCode::TXN_OK) {
2556
0
            return -1;
2557
0
        }
2558
1
        metrics_context.total_recycled_num = ++num_recycled;
2559
1
        metrics_context.report();
2560
1
        is_recycled = true;
2561
1
        return 0;
2562
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2500
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2501
2
        ++num_scanned;
2502
2
        auto k1 = k;
2503
2
        k1.remove_prefix(1);
2504
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2505
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2506
2
        decode_key(&k1, &out);
2507
2
        DCHECK_EQ(out.size(), 6) << k;
2508
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2509
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2510
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2511
0
            return 0;
2512
0
        }
2513
2
        last_scanned_table_id = table_id;
2514
2
        is_recycled = false;
2515
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2516
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2517
2
        std::unique_ptr<Transaction> txn;
2518
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2519
2
        if (err != TxnErrorCode::TXN_OK) {
2520
0
            return -1;
2521
0
        }
2522
2
        std::unique_ptr<RangeGetIterator> iter;
2523
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2524
2
        if (err != TxnErrorCode::TXN_OK) {
2525
0
            return -1;
2526
0
        }
2527
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2528
1
            return 0;
2529
1
        }
2530
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2531
        // 1. Remove all partition version kvs of this table
2532
1
        auto partition_version_key_begin =
2533
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2534
1
        auto partition_version_key_end =
2535
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2536
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2537
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2538
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2539
1
                     << " table_id=" << table_id;
2540
        // 2. Remove the table version kv of this table
2541
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2542
1
        txn->remove(tbl_version_key);
2543
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2544
        // 3. Remove mow delete bitmap update lock and tablet job lock
2545
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2546
1
        txn->remove(lock_key);
2547
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2548
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2549
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2550
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2551
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2552
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2553
1
                     << " table_id=" << table_id;
2554
1
        err = txn->commit();
2555
1
        if (err != TxnErrorCode::TXN_OK) {
2556
0
            return -1;
2557
0
        }
2558
1
        metrics_context.total_recycled_num = ++num_recycled;
2559
1
        metrics_context.report();
2560
1
        is_recycled = true;
2561
1
        return 0;
2562
1
    };
2563
2564
12
    if (config::enable_recycler_stats_metrics) {
2565
0
        scan_and_statistics_versions();
2566
0
    }
2567
    // recycle_func and loop_done for scan and recycle
2568
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2569
14
}
2570
2571
3
int InstanceRecycler::recycle_orphan_partitions() {
2572
3
    int64_t num_scanned = 0;
2573
3
    int64_t num_recycled = 0;
2574
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2575
2576
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2577
3
            .tag("instance_id", instance_id_);
2578
2579
3
    auto start_time = steady_clock::now();
2580
2581
3
    DORIS_CLOUD_DEFER {
2582
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2583
3
        metrics_context.finish_report();
2584
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2585
3
                .tag("instance_id", instance_id_)
2586
3
                .tag("num_scanned", num_scanned)
2587
3
                .tag("num_recycled", num_recycled);
2588
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2581
3
    DORIS_CLOUD_DEFER {
2582
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2583
3
        metrics_context.finish_report();
2584
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2585
3
                .tag("instance_id", instance_id_)
2586
3
                .tag("num_scanned", num_scanned)
2587
3
                .tag("num_recycled", num_recycled);
2588
3
    };
2589
2590
3
    bool is_empty_table = false;        // whether the table has no indexes
2591
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2592
3
    int64_t current_table_id = 0;       // current scanning table id
2593
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2594
3
                         &current_table_id, &is_table_kvs_recycled,
2595
3
                         this](std::string_view k, std::string_view) {
2596
2
        ++num_scanned;
2597
2598
2
        std::string_view k1(k);
2599
2
        int64_t db_id, table_id, partition_id;
2600
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2601
2
                                                            &partition_id)) {
2602
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2603
0
            return -1;
2604
2
        } else if (table_id != current_table_id) {
2605
2
            current_table_id = table_id;
2606
2
            is_table_kvs_recycled = false;
2607
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2608
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2609
2
            if (err != TxnErrorCode::TXN_OK) {
2610
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2611
0
                             << " table_id=" << table_id << " err=" << err;
2612
0
                return -1;
2613
0
            }
2614
2
        }
2615
2616
2
        if (!is_empty_table) {
2617
            // table is not empty, skip recycle
2618
1
            return 0;
2619
1
        }
2620
2621
1
        std::unique_ptr<Transaction> txn;
2622
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2623
1
        if (err != TxnErrorCode::TXN_OK) {
2624
0
            return -1;
2625
0
        }
2626
2627
        // 1. Remove all partition related kvs
2628
1
        std::string partition_meta_key =
2629
1
                versioned::meta_partition_key({instance_id_, partition_id});
2630
1
        std::string partition_index_key =
2631
1
                versioned::partition_index_key({instance_id_, partition_id});
2632
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2633
1
                {instance_id_, db_id, table_id, partition_id});
2634
1
        std::string partition_version_key =
2635
1
                versioned::partition_version_key({instance_id_, partition_id});
2636
1
        txn->remove(partition_index_key);
2637
1
        txn->remove(partition_inverted_key);
2638
1
        versioned_remove_all(txn.get(), partition_meta_key);
2639
1
        versioned_remove_all(txn.get(), partition_version_key);
2640
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2641
1
                     << " table_id=" << table_id << " db_id=" << db_id
2642
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2643
1
                     << " partition_version_key=" << hex(partition_version_key);
2644
2645
1
        if (!is_table_kvs_recycled) {
2646
1
            is_table_kvs_recycled = true;
2647
2648
            // 2. Remove the table version kv of this table
2649
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2650
1
            versioned_remove_all(txn.get(), table_version_key);
2651
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2652
            // 3. Remove mow delete bitmap update lock and tablet job lock
2653
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2654
1
            txn->remove(lock_key);
2655
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2656
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2657
1
            std::string tablet_job_key_end =
2658
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2659
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2660
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2661
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2662
1
                         << " table_id=" << table_id;
2663
1
        }
2664
2665
1
        err = txn->commit();
2666
1
        if (err != TxnErrorCode::TXN_OK) {
2667
0
            return -1;
2668
0
        }
2669
1
        metrics_context.total_recycled_num = ++num_recycled;
2670
1
        metrics_context.report();
2671
1
        return 0;
2672
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2595
2
                         this](std::string_view k, std::string_view) {
2596
2
        ++num_scanned;
2597
2598
2
        std::string_view k1(k);
2599
2
        int64_t db_id, table_id, partition_id;
2600
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2601
2
                                                            &partition_id)) {
2602
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2603
0
            return -1;
2604
2
        } else if (table_id != current_table_id) {
2605
2
            current_table_id = table_id;
2606
2
            is_table_kvs_recycled = false;
2607
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2608
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2609
2
            if (err != TxnErrorCode::TXN_OK) {
2610
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2611
0
                             << " table_id=" << table_id << " err=" << err;
2612
0
                return -1;
2613
0
            }
2614
2
        }
2615
2616
2
        if (!is_empty_table) {
2617
            // table is not empty, skip recycle
2618
1
            return 0;
2619
1
        }
2620
2621
1
        std::unique_ptr<Transaction> txn;
2622
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2623
1
        if (err != TxnErrorCode::TXN_OK) {
2624
0
            return -1;
2625
0
        }
2626
2627
        // 1. Remove all partition related kvs
2628
1
        std::string partition_meta_key =
2629
1
                versioned::meta_partition_key({instance_id_, partition_id});
2630
1
        std::string partition_index_key =
2631
1
                versioned::partition_index_key({instance_id_, partition_id});
2632
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2633
1
                {instance_id_, db_id, table_id, partition_id});
2634
1
        std::string partition_version_key =
2635
1
                versioned::partition_version_key({instance_id_, partition_id});
2636
1
        txn->remove(partition_index_key);
2637
1
        txn->remove(partition_inverted_key);
2638
1
        versioned_remove_all(txn.get(), partition_meta_key);
2639
1
        versioned_remove_all(txn.get(), partition_version_key);
2640
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2641
1
                     << " table_id=" << table_id << " db_id=" << db_id
2642
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2643
1
                     << " partition_version_key=" << hex(partition_version_key);
2644
2645
1
        if (!is_table_kvs_recycled) {
2646
1
            is_table_kvs_recycled = true;
2647
2648
            // 2. Remove the table version kv of this table
2649
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2650
1
            versioned_remove_all(txn.get(), table_version_key);
2651
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2652
            // 3. Remove mow delete bitmap update lock and tablet job lock
2653
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2654
1
            txn->remove(lock_key);
2655
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2656
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2657
1
            std::string tablet_job_key_end =
2658
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2659
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2660
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2661
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2662
1
                         << " table_id=" << table_id;
2663
1
        }
2664
2665
1
        err = txn->commit();
2666
1
        if (err != TxnErrorCode::TXN_OK) {
2667
0
            return -1;
2668
0
        }
2669
1
        metrics_context.total_recycled_num = ++num_recycled;
2670
1
        metrics_context.report();
2671
1
        return 0;
2672
1
    };
2673
2674
    // recycle_func and loop_done for scan and recycle
2675
3
    return scan_and_recycle(
2676
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2677
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2678
3
            std::move(recycle_func));
2679
3
}
2680
2681
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2682
                                      RecyclerMetricsContext& metrics_context,
2683
49
                                      int64_t partition_id) {
2684
49
    bool is_multi_version =
2685
49
            instance_info_.has_multi_version_status() &&
2686
49
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2687
49
    int64_t num_scanned = 0;
2688
49
    std::atomic_long num_recycled = 0;
2689
2690
49
    std::string tablet_key_begin, tablet_key_end;
2691
49
    std::string stats_key_begin, stats_key_end;
2692
49
    std::string job_key_begin, job_key_end;
2693
2694
49
    std::string tablet_belongs;
2695
49
    if (partition_id > 0) {
2696
        // recycle tablets in a partition belonging to the index
2697
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2698
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2699
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2700
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2701
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2702
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2703
33
        tablet_belongs = "partition";
2704
33
    } else {
2705
        // recycle tablets in the index
2706
16
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2707
16
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2708
16
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2709
16
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2710
16
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2711
16
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2712
16
        tablet_belongs = "index";
2713
16
    }
2714
2715
49
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2716
49
            .tag("table_id", table_id)
2717
49
            .tag("index_id", index_id)
2718
49
            .tag("partition_id", partition_id);
2719
2720
49
    auto start_time = steady_clock::now();
2721
2722
49
    DORIS_CLOUD_DEFER {
2723
49
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2724
49
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2725
49
                .tag("instance_id", instance_id_)
2726
49
                .tag("table_id", table_id)
2727
49
                .tag("index_id", index_id)
2728
49
                .tag("partition_id", partition_id)
2729
49
                .tag("num_scanned", num_scanned)
2730
49
                .tag("num_recycled", num_recycled);
2731
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2722
4
    DORIS_CLOUD_DEFER {
2723
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2724
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2725
4
                .tag("instance_id", instance_id_)
2726
4
                .tag("table_id", table_id)
2727
4
                .tag("index_id", index_id)
2728
4
                .tag("partition_id", partition_id)
2729
4
                .tag("num_scanned", num_scanned)
2730
4
                .tag("num_recycled", num_recycled);
2731
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2722
45
    DORIS_CLOUD_DEFER {
2723
45
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2724
45
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2725
45
                .tag("instance_id", instance_id_)
2726
45
                .tag("table_id", table_id)
2727
45
                .tag("index_id", index_id)
2728
45
                .tag("partition_id", partition_id)
2729
45
                .tag("num_scanned", num_scanned)
2730
45
                .tag("num_recycled", num_recycled);
2731
45
    };
2732
2733
    // The first string_view represents the tablet key which has been recycled
2734
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
2735
49
    using TabletKeyPair = std::pair<std::string_view, bool>;
2736
49
    SyncExecutor<TabletKeyPair> sync_executor(
2737
49
            _thread_pool_group.recycle_tablet_pool,
2738
49
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2739
49
                        index_id, partition_id),
2740
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2740
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2740
237
            [](const TabletKeyPair& k) { return k.first.empty(); });
2741
2742
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
2743
49
    std::vector<std::string> tablet_idx_keys;
2744
49
    std::vector<std::string> restore_job_keys;
2745
49
    std::vector<std::string> init_rs_keys;
2746
49
    std::vector<std::string> tablet_compact_stats_keys;
2747
49
    std::vector<std::string> tablet_load_stats_keys;
2748
49
    std::vector<std::string> versioned_meta_tablet_keys;
2749
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2750
8.24k
        bool use_range_remove = true;
2751
8.24k
        ++num_scanned;
2752
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
2753
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2754
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2755
0
            use_range_remove = false;
2756
0
            return -1;
2757
0
        }
2758
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2759
2760
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2761
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2762
4.00k
            return -1;
2763
4.00k
        }
2764
2765
4.24k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2766
4.24k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2767
4.24k
        if (is_multi_version) {
2768
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2769
6
            tablet_compact_stats_keys.push_back(
2770
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2771
6
            tablet_load_stats_keys.push_back(
2772
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2773
6
            versioned_meta_tablet_keys.push_back(
2774
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2775
6
        }
2776
4.24k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2777
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2778
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2779
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2780
1
                LOG_WARNING("failed to recycle tablet")
2781
1
                        .tag("instance_id", instance_id_)
2782
1
                        .tag("tablet_id", tid);
2783
1
                range_move = false;
2784
1
                return {std::string_view(), range_move};
2785
1
            }
2786
4.23k
            ++num_recycled;
2787
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2788
4.23k
            return {k, range_move};
2789
4.23k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2778
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2779
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2780
0
                LOG_WARNING("failed to recycle tablet")
2781
0
                        .tag("instance_id", instance_id_)
2782
0
                        .tag("tablet_id", tid);
2783
0
                range_move = false;
2784
0
                return {std::string_view(), range_move};
2785
0
            }
2786
4.00k
            ++num_recycled;
2787
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2788
4.00k
            return {k, range_move};
2789
4.00k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2778
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2779
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2780
1
                LOG_WARNING("failed to recycle tablet")
2781
1
                        .tag("instance_id", instance_id_)
2782
1
                        .tag("tablet_id", tid);
2783
1
                range_move = false;
2784
1
                return {std::string_view(), range_move};
2785
1
            }
2786
236
            ++num_recycled;
2787
236
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2788
236
            return {k, range_move};
2789
237
        });
2790
4.23k
        return 0;
2791
4.24k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2749
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2750
8.00k
        bool use_range_remove = true;
2751
8.00k
        ++num_scanned;
2752
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2753
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2754
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2755
0
            use_range_remove = false;
2756
0
            return -1;
2757
0
        }
2758
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2759
2760
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2761
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2762
4.00k
            return -1;
2763
4.00k
        }
2764
2765
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2766
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2767
4.00k
        if (is_multi_version) {
2768
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2769
0
            tablet_compact_stats_keys.push_back(
2770
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2771
0
            tablet_load_stats_keys.push_back(
2772
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2773
0
            versioned_meta_tablet_keys.push_back(
2774
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2775
0
        }
2776
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2777
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2778
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2779
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2780
4.00k
                LOG_WARNING("failed to recycle tablet")
2781
4.00k
                        .tag("instance_id", instance_id_)
2782
4.00k
                        .tag("tablet_id", tid);
2783
4.00k
                range_move = false;
2784
4.00k
                return {std::string_view(), range_move};
2785
4.00k
            }
2786
4.00k
            ++num_recycled;
2787
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2788
4.00k
            return {k, range_move};
2789
4.00k
        });
2790
4.00k
        return 0;
2791
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2749
240
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2750
240
        bool use_range_remove = true;
2751
240
        ++num_scanned;
2752
240
        doris::TabletMetaCloudPB tablet_meta_pb;
2753
240
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2754
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2755
0
            use_range_remove = false;
2756
0
            return -1;
2757
0
        }
2758
240
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2759
2760
240
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2761
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2762
0
            return -1;
2763
0
        }
2764
2765
240
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2766
240
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2767
240
        if (is_multi_version) {
2768
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2769
6
            tablet_compact_stats_keys.push_back(
2770
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2771
6
            tablet_load_stats_keys.push_back(
2772
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2773
6
            versioned_meta_tablet_keys.push_back(
2774
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2775
6
        }
2776
240
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2777
237
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2778
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2779
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2780
237
                LOG_WARNING("failed to recycle tablet")
2781
237
                        .tag("instance_id", instance_id_)
2782
237
                        .tag("tablet_id", tid);
2783
237
                range_move = false;
2784
237
                return {std::string_view(), range_move};
2785
237
            }
2786
237
            ++num_recycled;
2787
237
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2788
237
            return {k, range_move};
2789
237
        });
2790
237
        return 0;
2791
240
    };
2792
2793
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2794
49
    auto loop_done = [&, this]() -> int {
2795
49
        bool finished = true;
2796
49
        auto tablet_keys = sync_executor.when_all(&finished);
2797
49
        if (!finished) {
2798
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2799
1
            return -1;
2800
1
        }
2801
48
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2802
46
        if (!tablet_keys.empty() &&
2803
46
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2803
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2803
42
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2804
0
            return -1;
2805
0
        }
2806
        // sort the vector using key's order
2807
46
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2808
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2808
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2808
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2809
46
        bool use_range_remove = true;
2810
4.23k
        for (auto& [_, remove] : tablet_keys) {
2811
4.23k
            if (!remove) {
2812
0
                use_range_remove = remove;
2813
0
                break;
2814
0
            }
2815
4.23k
        }
2816
46
        DORIS_CLOUD_DEFER {
2817
46
            tablet_idx_keys.clear();
2818
46
            restore_job_keys.clear();
2819
46
            init_rs_keys.clear();
2820
46
            tablet_compact_stats_keys.clear();
2821
46
            tablet_load_stats_keys.clear();
2822
46
            versioned_meta_tablet_keys.clear();
2823
46
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2816
2
        DORIS_CLOUD_DEFER {
2817
2
            tablet_idx_keys.clear();
2818
2
            restore_job_keys.clear();
2819
2
            init_rs_keys.clear();
2820
2
            tablet_compact_stats_keys.clear();
2821
2
            tablet_load_stats_keys.clear();
2822
2
            versioned_meta_tablet_keys.clear();
2823
2
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2816
44
        DORIS_CLOUD_DEFER {
2817
44
            tablet_idx_keys.clear();
2818
44
            restore_job_keys.clear();
2819
44
            init_rs_keys.clear();
2820
44
            tablet_compact_stats_keys.clear();
2821
44
            tablet_load_stats_keys.clear();
2822
44
            versioned_meta_tablet_keys.clear();
2823
44
        };
2824
46
        std::unique_ptr<Transaction> txn;
2825
46
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2826
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2827
0
            return -1;
2828
0
        }
2829
46
        std::string tablet_key_end;
2830
46
        if (!tablet_keys.empty()) {
2831
44
            if (use_range_remove) {
2832
44
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2833
44
                txn->remove(tablet_keys.front().first, tablet_key_end);
2834
44
            } else {
2835
0
                for (auto& [k, _] : tablet_keys) {
2836
0
                    txn->remove(k);
2837
0
                }
2838
0
            }
2839
44
        }
2840
46
        if (is_multi_version) {
2841
6
            for (auto& k : tablet_compact_stats_keys) {
2842
                // Remove all versions of tablet compact stats for recycled tablet
2843
6
                LOG_INFO("remove versioned tablet compact stats key")
2844
6
                        .tag("compact_stats_key", hex(k));
2845
6
                versioned_remove_all(txn.get(), k);
2846
6
            }
2847
6
            for (auto& k : tablet_load_stats_keys) {
2848
                // Remove all versions of tablet load stats for recycled tablet
2849
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2850
6
                versioned_remove_all(txn.get(), k);
2851
6
            }
2852
6
            for (auto& k : versioned_meta_tablet_keys) {
2853
                // Remove all versions of meta tablet for recycled tablet
2854
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2855
6
                versioned_remove_all(txn.get(), k);
2856
6
            }
2857
5
        }
2858
4.24k
        for (auto& k : tablet_idx_keys) {
2859
4.24k
            txn->remove(k);
2860
4.24k
        }
2861
4.24k
        for (auto& k : restore_job_keys) {
2862
4.24k
            txn->remove(k);
2863
4.24k
        }
2864
46
        for (auto& k : init_rs_keys) {
2865
0
            txn->remove(k);
2866
0
        }
2867
46
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2868
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2869
0
                         << ", err=" << err;
2870
0
            return -1;
2871
0
        }
2872
46
        return 0;
2873
46
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2794
4
    auto loop_done = [&, this]() -> int {
2795
4
        bool finished = true;
2796
4
        auto tablet_keys = sync_executor.when_all(&finished);
2797
4
        if (!finished) {
2798
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2799
0
            return -1;
2800
0
        }
2801
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2802
2
        if (!tablet_keys.empty() &&
2803
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2804
0
            return -1;
2805
0
        }
2806
        // sort the vector using key's order
2807
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2808
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2809
2
        bool use_range_remove = true;
2810
4.00k
        for (auto& [_, remove] : tablet_keys) {
2811
4.00k
            if (!remove) {
2812
0
                use_range_remove = remove;
2813
0
                break;
2814
0
            }
2815
4.00k
        }
2816
2
        DORIS_CLOUD_DEFER {
2817
2
            tablet_idx_keys.clear();
2818
2
            restore_job_keys.clear();
2819
2
            init_rs_keys.clear();
2820
2
            tablet_compact_stats_keys.clear();
2821
2
            tablet_load_stats_keys.clear();
2822
2
            versioned_meta_tablet_keys.clear();
2823
2
        };
2824
2
        std::unique_ptr<Transaction> txn;
2825
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2826
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2827
0
            return -1;
2828
0
        }
2829
2
        std::string tablet_key_end;
2830
2
        if (!tablet_keys.empty()) {
2831
2
            if (use_range_remove) {
2832
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2833
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2834
2
            } else {
2835
0
                for (auto& [k, _] : tablet_keys) {
2836
0
                    txn->remove(k);
2837
0
                }
2838
0
            }
2839
2
        }
2840
2
        if (is_multi_version) {
2841
0
            for (auto& k : tablet_compact_stats_keys) {
2842
                // Remove all versions of tablet compact stats for recycled tablet
2843
0
                LOG_INFO("remove versioned tablet compact stats key")
2844
0
                        .tag("compact_stats_key", hex(k));
2845
0
                versioned_remove_all(txn.get(), k);
2846
0
            }
2847
0
            for (auto& k : tablet_load_stats_keys) {
2848
                // Remove all versions of tablet load stats for recycled tablet
2849
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2850
0
                versioned_remove_all(txn.get(), k);
2851
0
            }
2852
0
            for (auto& k : versioned_meta_tablet_keys) {
2853
                // Remove all versions of meta tablet for recycled tablet
2854
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2855
0
                versioned_remove_all(txn.get(), k);
2856
0
            }
2857
0
        }
2858
4.00k
        for (auto& k : tablet_idx_keys) {
2859
4.00k
            txn->remove(k);
2860
4.00k
        }
2861
4.00k
        for (auto& k : restore_job_keys) {
2862
4.00k
            txn->remove(k);
2863
4.00k
        }
2864
2
        for (auto& k : init_rs_keys) {
2865
0
            txn->remove(k);
2866
0
        }
2867
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2868
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2869
0
                         << ", err=" << err;
2870
0
            return -1;
2871
0
        }
2872
2
        return 0;
2873
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2794
45
    auto loop_done = [&, this]() -> int {
2795
45
        bool finished = true;
2796
45
        auto tablet_keys = sync_executor.when_all(&finished);
2797
45
        if (!finished) {
2798
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2799
1
            return -1;
2800
1
        }
2801
44
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2802
44
        if (!tablet_keys.empty() &&
2803
44
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2804
0
            return -1;
2805
0
        }
2806
        // sort the vector using key's order
2807
44
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2808
44
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2809
44
        bool use_range_remove = true;
2810
236
        for (auto& [_, remove] : tablet_keys) {
2811
236
            if (!remove) {
2812
0
                use_range_remove = remove;
2813
0
                break;
2814
0
            }
2815
236
        }
2816
44
        DORIS_CLOUD_DEFER {
2817
44
            tablet_idx_keys.clear();
2818
44
            restore_job_keys.clear();
2819
44
            init_rs_keys.clear();
2820
44
            tablet_compact_stats_keys.clear();
2821
44
            tablet_load_stats_keys.clear();
2822
44
            versioned_meta_tablet_keys.clear();
2823
44
        };
2824
44
        std::unique_ptr<Transaction> txn;
2825
44
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2826
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2827
0
            return -1;
2828
0
        }
2829
44
        std::string tablet_key_end;
2830
44
        if (!tablet_keys.empty()) {
2831
42
            if (use_range_remove) {
2832
42
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2833
42
                txn->remove(tablet_keys.front().first, tablet_key_end);
2834
42
            } else {
2835
0
                for (auto& [k, _] : tablet_keys) {
2836
0
                    txn->remove(k);
2837
0
                }
2838
0
            }
2839
42
        }
2840
44
        if (is_multi_version) {
2841
6
            for (auto& k : tablet_compact_stats_keys) {
2842
                // Remove all versions of tablet compact stats for recycled tablet
2843
6
                LOG_INFO("remove versioned tablet compact stats key")
2844
6
                        .tag("compact_stats_key", hex(k));
2845
6
                versioned_remove_all(txn.get(), k);
2846
6
            }
2847
6
            for (auto& k : tablet_load_stats_keys) {
2848
                // Remove all versions of tablet load stats for recycled tablet
2849
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2850
6
                versioned_remove_all(txn.get(), k);
2851
6
            }
2852
6
            for (auto& k : versioned_meta_tablet_keys) {
2853
                // Remove all versions of meta tablet for recycled tablet
2854
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2855
6
                versioned_remove_all(txn.get(), k);
2856
6
            }
2857
5
        }
2858
239
        for (auto& k : tablet_idx_keys) {
2859
239
            txn->remove(k);
2860
239
        }
2861
239
        for (auto& k : restore_job_keys) {
2862
239
            txn->remove(k);
2863
239
        }
2864
44
        for (auto& k : init_rs_keys) {
2865
0
            txn->remove(k);
2866
0
        }
2867
44
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2868
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2869
0
                         << ", err=" << err;
2870
0
            return -1;
2871
0
        }
2872
44
        return 0;
2873
44
    };
2874
2875
49
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
2876
49
                               std::move(loop_done));
2877
49
    if (ret != 0) {
2878
3
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
2879
3
        return ret;
2880
3
    }
2881
2882
    // directly remove tablet stats and tablet jobs of these dropped index or partition
2883
46
    std::unique_ptr<Transaction> txn;
2884
46
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2885
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
2886
0
        return -1;
2887
0
    }
2888
46
    txn->remove(stats_key_begin, stats_key_end);
2889
46
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
2890
46
                 << " end=" << hex(stats_key_end);
2891
46
    txn->remove(job_key_begin, job_key_end);
2892
46
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
2893
46
    std::string schema_key_begin, schema_key_end;
2894
46
    std::string schema_dict_key;
2895
46
    std::string versioned_schema_key_begin, versioned_schema_key_end;
2896
46
    if (partition_id <= 0) {
2897
        // Delete schema kv of this index
2898
14
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
2899
14
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
2900
14
        txn->remove(schema_key_begin, schema_key_end);
2901
14
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
2902
14
                     << " end=" << hex(schema_key_end);
2903
14
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
2904
14
        txn->remove(schema_dict_key);
2905
14
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
2906
14
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
2907
14
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
2908
14
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
2909
14
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
2910
14
                     << " end=" << hex(versioned_schema_key_end);
2911
14
    }
2912
2913
46
    TxnErrorCode err = txn->commit();
2914
46
    if (err != TxnErrorCode::TXN_OK) {
2915
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
2916
0
                     << " err=" << err;
2917
0
        return -1;
2918
0
    }
2919
2920
46
    return ret;
2921
46
}
2922
2923
4.81k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
2924
4.81k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
2925
4.81k
    int64_t num_segments = rs_meta_pb.num_segments();
2926
4.81k
    if (num_segments <= 0) return 0;
2927
2928
4.81k
    std::vector<std::string> file_paths;
2929
4.81k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
2930
0
        return -1;
2931
0
    }
2932
2933
    // Process inverted indexes
2934
4.81k
    std::vector<std::pair<int64_t, std::string>> index_ids;
2935
    // default format as v1.
2936
4.81k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2937
4.81k
    bool delete_rowset_data_by_prefix = false;
2938
4.81k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2939
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2940
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2941
0
        delete_rowset_data_by_prefix = true;
2942
4.81k
    } else if (rs_meta_pb.has_tablet_schema()) {
2943
9.00k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
2944
9.00k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2945
9.00k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2946
9.00k
            }
2947
9.00k
        }
2948
4.40k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
2949
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
2950
2.00k
        }
2951
4.40k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
2952
        // schema version and index id are not found, delete rowset data by prefix directly.
2953
0
        delete_rowset_data_by_prefix = true;
2954
409
    } else {
2955
        // otherwise, try to get schema kv
2956
409
        InvertedIndexInfo index_info;
2957
409
        int inverted_index_get_ret = inverted_index_id_cache_->get(
2958
409
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
2959
409
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2960
409
                                 &inverted_index_get_ret);
2961
409
        if (inverted_index_get_ret == 0) {
2962
409
            index_format = index_info.first;
2963
409
            index_ids = index_info.second;
2964
409
        } else if (inverted_index_get_ret == 1) {
2965
            // 1. Schema kv not found means tablet has been recycled
2966
            // Maybe some tablet recycle failed by some bugs
2967
            // We need to delete again to double check
2968
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2969
            // because we are uncertain about the inverted index information.
2970
            // If there are inverted indexes, some data might not be deleted,
2971
            // but this is acceptable as we have made our best effort to delete the data.
2972
0
            LOG_INFO(
2973
0
                    "delete rowset data schema kv not found, need to delete again to double "
2974
0
                    "check")
2975
0
                    .tag("instance_id", instance_id_)
2976
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2977
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
2978
            // Currently index_ids is guaranteed to be empty,
2979
            // but we clear it again here as a safeguard against future code changes
2980
            // that might cause index_ids to no longer be empty
2981
0
            index_format = InvertedIndexStorageFormatPB::V2;
2982
0
            index_ids.clear();
2983
0
        } else {
2984
            // failed to get schema kv, delete rowset data by prefix directly.
2985
0
            delete_rowset_data_by_prefix = true;
2986
0
        }
2987
409
    }
2988
2989
4.81k
    if (delete_rowset_data_by_prefix) {
2990
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
2991
0
                                  rs_meta_pb.rowset_id_v2());
2992
0
    }
2993
2994
4.81k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
2995
4.81k
    if (it == accessor_map_.end()) {
2996
799
        LOG_WARNING("instance has no such resource id")
2997
799
                .tag("instance_id", instance_id_)
2998
799
                .tag("resource_id", rs_meta_pb.resource_id());
2999
799
        return -1;
3000
799
    }
3001
4.01k
    auto& accessor = it->second;
3002
3003
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
3004
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
3005
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
3006
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3007
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
3008
40.0k
            for (const auto& index_id : index_ids) {
3009
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
3010
40.0k
                                                            index_id.second));
3011
40.0k
            }
3012
20.0k
        } else if (!index_ids.empty()) {
3013
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3014
0
        }
3015
20.0k
    }
3016
3017
    // Process delete bitmap
3018
4.01k
    file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3019
    // TODO(AlexYue): seems could do do batch
3020
4.01k
    return accessor->delete_files(file_paths);
3021
4.81k
}
3022
3023
61.5k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
3024
61.5k
    LOG_INFO("begin process_packed_file_location_index")
3025
61.5k
            .tag("instance_id", instance_id_)
3026
61.5k
            .tag("tablet_id", rs_meta_pb.tablet_id())
3027
61.5k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3028
61.5k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
3029
61.5k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
3030
61.5k
    if (index_map.empty()) {
3031
61.5k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
3032
61.5k
                .tag("instance_id", instance_id_)
3033
61.5k
                .tag("tablet_id", rs_meta_pb.tablet_id())
3034
61.5k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
3035
61.5k
        return 0;
3036
61.5k
    }
3037
15
    struct PackedSmallFileInfo {
3038
15
        std::string small_file_path;
3039
15
    };
3040
15
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
3041
15
    packed_file_updates.reserve(index_map.size());
3042
27
    for (const auto& [small_path, index_pb] : index_map) {
3043
27
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
3044
0
            continue;
3045
0
        }
3046
27
        packed_file_updates[index_pb.packed_file_path()].push_back(
3047
27
                PackedSmallFileInfo {small_path});
3048
27
    }
3049
15
    if (packed_file_updates.empty()) {
3050
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
3051
0
                .tag("instance_id", instance_id_)
3052
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
3053
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3054
0
                .tag("index_map_size", index_map.size());
3055
0
        return 0;
3056
0
    }
3057
3058
15
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3059
15
    int ret = 0;
3060
24
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
3061
24
        if (small_files.empty()) {
3062
0
            continue;
3063
0
        }
3064
3065
24
        bool success = false;
3066
24
        for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3067
24
            std::unique_ptr<Transaction> txn;
3068
24
            TxnErrorCode err = txn_kv_->create_txn(&txn);
3069
24
            if (err != TxnErrorCode::TXN_OK) {
3070
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
3071
0
                        .tag("instance_id", instance_id_)
3072
0
                        .tag("packed_file_path", packed_file_path)
3073
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3074
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3075
0
                        .tag("err", err);
3076
0
                ret = -1;
3077
0
                break;
3078
0
            }
3079
3080
24
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3081
24
            std::string packed_val;
3082
24
            err = txn->get(packed_key, &packed_val);
3083
24
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3084
0
                LOG_WARNING("packed file info not found when recycling rowset")
3085
0
                        .tag("instance_id", instance_id_)
3086
0
                        .tag("packed_file_path", packed_file_path)
3087
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3088
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3089
0
                        .tag("key", hex(packed_key))
3090
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
3091
                // Skip this packed file entry and continue with others
3092
0
                success = true;
3093
0
                break;
3094
0
            }
3095
24
            if (err != TxnErrorCode::TXN_OK) {
3096
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
3097
0
                        .tag("instance_id", instance_id_)
3098
0
                        .tag("packed_file_path", packed_file_path)
3099
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3100
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3101
0
                        .tag("err", err);
3102
0
                ret = -1;
3103
0
                break;
3104
0
            }
3105
3106
24
            cloud::PackedFileInfoPB packed_info;
3107
24
            if (!packed_info.ParseFromString(packed_val)) {
3108
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
3109
0
                        .tag("instance_id", instance_id_)
3110
0
                        .tag("packed_file_path", packed_file_path)
3111
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3112
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3113
0
                ret = -1;
3114
0
                break;
3115
0
            }
3116
3117
24
            LOG_INFO("packed file update check")
3118
24
                    .tag("instance_id", instance_id_)
3119
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3120
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3121
24
                    .tag("merged_file_path", packed_file_path)
3122
24
                    .tag("requested_small_files", small_files.size())
3123
24
                    .tag("merge_entries", packed_info.slices_size());
3124
3125
24
            auto* small_file_entries = packed_info.mutable_slices();
3126
24
            int64_t changed_files = 0;
3127
24
            int64_t missing_entries = 0;
3128
24
            int64_t already_deleted = 0;
3129
27
            for (const auto& small_file_info : small_files) {
3130
27
                bool found = false;
3131
87
                for (auto& small_file_entry : *small_file_entries) {
3132
87
                    if (small_file_entry.path() == small_file_info.small_file_path) {
3133
27
                        if (!small_file_entry.deleted()) {
3134
27
                            small_file_entry.set_deleted(true);
3135
27
                            if (!small_file_entry.corrected()) {
3136
27
                                small_file_entry.set_corrected(true);
3137
27
                            }
3138
27
                            ++changed_files;
3139
27
                        } else {
3140
0
                            ++already_deleted;
3141
0
                        }
3142
27
                        found = true;
3143
27
                        break;
3144
27
                    }
3145
87
                }
3146
27
                if (!found) {
3147
0
                    ++missing_entries;
3148
0
                    LOG_WARNING("packed file info missing small file entry")
3149
0
                            .tag("instance_id", instance_id_)
3150
0
                            .tag("packed_file_path", packed_file_path)
3151
0
                            .tag("small_file_path", small_file_info.small_file_path)
3152
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3153
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
3154
0
                }
3155
27
            }
3156
3157
24
            if (changed_files == 0) {
3158
0
                LOG_INFO("skip merge file update: no merge entries changed")
3159
0
                        .tag("instance_id", instance_id_)
3160
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3161
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3162
0
                        .tag("merged_file_path", packed_file_path)
3163
0
                        .tag("missing_entries", missing_entries)
3164
0
                        .tag("already_deleted", already_deleted)
3165
0
                        .tag("requested_small_files", small_files.size())
3166
0
                        .tag("merge_entries", packed_info.slices_size());
3167
0
                success = true;
3168
0
                break;
3169
0
            }
3170
3171
24
            int64_t left_file_count = 0;
3172
24
            int64_t left_file_bytes = 0;
3173
141
            for (const auto& small_file_entry : packed_info.slices()) {
3174
141
                if (!small_file_entry.deleted()) {
3175
57
                    ++left_file_count;
3176
57
                    left_file_bytes += small_file_entry.size();
3177
57
                }
3178
141
            }
3179
24
            packed_info.set_remaining_slice_bytes(left_file_bytes);
3180
24
            packed_info.set_ref_cnt(left_file_count);
3181
24
            LOG_INFO("updated packed file reference info")
3182
24
                    .tag("instance_id", instance_id_)
3183
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3184
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3185
24
                    .tag("packed_file_path", packed_file_path)
3186
24
                    .tag("ref_cnt", left_file_count)
3187
24
                    .tag("left_file_bytes", left_file_bytes);
3188
3189
24
            if (left_file_count == 0) {
3190
7
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3191
7
            }
3192
3193
24
            std::string updated_val;
3194
24
            if (!packed_info.SerializeToString(&updated_val)) {
3195
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
3196
0
                        .tag("instance_id", instance_id_)
3197
0
                        .tag("packed_file_path", packed_file_path)
3198
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3199
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3200
0
                ret = -1;
3201
0
                break;
3202
0
            }
3203
3204
24
            txn->put(packed_key, updated_val);
3205
24
            err = txn->commit();
3206
24
            if (err == TxnErrorCode::TXN_OK) {
3207
24
                success = true;
3208
24
                if (left_file_count == 0) {
3209
7
                    LOG_INFO("packed file ready to delete, deleting immediately")
3210
7
                            .tag("instance_id", instance_id_)
3211
7
                            .tag("packed_file_path", packed_file_path);
3212
7
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3213
0
                        ret = -1;
3214
0
                    }
3215
7
                }
3216
24
                break;
3217
24
            }
3218
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
3219
0
                if (attempt >= max_retry_times) {
3220
0
                    LOG_WARNING("packed file info update conflict after max retry")
3221
0
                            .tag("instance_id", instance_id_)
3222
0
                            .tag("packed_file_path", packed_file_path)
3223
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3224
0
                            .tag("tablet_id", rs_meta_pb.tablet_id())
3225
0
                            .tag("changed_files", changed_files)
3226
0
                            .tag("attempt", attempt);
3227
0
                    ret = -1;
3228
0
                    break;
3229
0
                }
3230
0
                LOG_WARNING("packed file info update conflict, retrying")
3231
0
                        .tag("instance_id", instance_id_)
3232
0
                        .tag("packed_file_path", packed_file_path)
3233
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3234
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3235
0
                        .tag("changed_files", changed_files)
3236
0
                        .tag("attempt", attempt);
3237
0
                sleep_for_packed_file_retry();
3238
0
                continue;
3239
0
            }
3240
3241
0
            LOG_WARNING("failed to commit packed file info update")
3242
0
                    .tag("instance_id", instance_id_)
3243
0
                    .tag("packed_file_path", packed_file_path)
3244
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3245
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3246
0
                    .tag("err", err)
3247
0
                    .tag("changed_files", changed_files);
3248
0
            ret = -1;
3249
0
            break;
3250
0
        }
3251
3252
24
        if (!success) {
3253
0
            ret = -1;
3254
0
        }
3255
24
    }
3256
3257
15
    return ret;
3258
15
}
3259
3260
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
3261
                                                const std::string& packed_key,
3262
7
                                                const cloud::PackedFileInfoPB& packed_info) {
3263
7
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
3264
0
        LOG_WARNING("packed file missing resource id when recycling")
3265
0
                .tag("instance_id", instance_id_)
3266
0
                .tag("packed_file_path", packed_file_path);
3267
0
        return -1;
3268
0
    }
3269
3270
7
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
3271
7
    if (!accessor) {
3272
0
        LOG_WARNING("no accessor available to delete packed file")
3273
0
                .tag("instance_id", instance_id_)
3274
0
                .tag("packed_file_path", packed_file_path)
3275
0
                .tag("resource_id", packed_info.resource_id());
3276
0
        return -1;
3277
0
    }
3278
3279
7
    int del_ret = accessor->delete_file(packed_file_path);
3280
7
    if (del_ret != 0 && del_ret != 1) {
3281
0
        LOG_WARNING("failed to delete packed file")
3282
0
                .tag("instance_id", instance_id_)
3283
0
                .tag("packed_file_path", packed_file_path)
3284
0
                .tag("resource_id", resource_id)
3285
0
                .tag("ret", del_ret);
3286
0
        return -1;
3287
0
    }
3288
7
    if (del_ret == 1) {
3289
0
        LOG_INFO("packed file already removed")
3290
0
                .tag("instance_id", instance_id_)
3291
0
                .tag("packed_file_path", packed_file_path)
3292
0
                .tag("resource_id", resource_id);
3293
7
    } else {
3294
7
        LOG_INFO("deleted packed file")
3295
7
                .tag("instance_id", instance_id_)
3296
7
                .tag("packed_file_path", packed_file_path)
3297
7
                .tag("resource_id", resource_id);
3298
7
    }
3299
3300
7
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
3301
7
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3302
7
        std::unique_ptr<Transaction> del_txn;
3303
7
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
3304
7
        if (err != TxnErrorCode::TXN_OK) {
3305
0
            LOG_WARNING("failed to create txn when removing packed file kv")
3306
0
                    .tag("instance_id", instance_id_)
3307
0
                    .tag("packed_file_path", packed_file_path)
3308
0
                    .tag("attempt", attempt)
3309
0
                    .tag("err", err);
3310
0
            return -1;
3311
0
        }
3312
3313
7
        std::string latest_val;
3314
7
        err = del_txn->get(packed_key, &latest_val);
3315
7
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3316
0
            return 0;
3317
0
        }
3318
7
        if (err != TxnErrorCode::TXN_OK) {
3319
0
            LOG_WARNING("failed to re-read packed file kv before removal")
3320
0
                    .tag("instance_id", instance_id_)
3321
0
                    .tag("packed_file_path", packed_file_path)
3322
0
                    .tag("attempt", attempt)
3323
0
                    .tag("err", err);
3324
0
            return -1;
3325
0
        }
3326
3327
7
        cloud::PackedFileInfoPB latest_info;
3328
7
        if (!latest_info.ParseFromString(latest_val)) {
3329
0
            LOG_WARNING("failed to parse packed file info before removal")
3330
0
                    .tag("instance_id", instance_id_)
3331
0
                    .tag("packed_file_path", packed_file_path)
3332
0
                    .tag("attempt", attempt);
3333
0
            return -1;
3334
0
        }
3335
3336
7
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
3337
7
              latest_info.ref_cnt() == 0)) {
3338
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
3339
0
                    .tag("instance_id", instance_id_)
3340
0
                    .tag("packed_file_path", packed_file_path)
3341
0
                    .tag("attempt", attempt);
3342
0
            return 0;
3343
0
        }
3344
3345
7
        del_txn->remove(packed_key);
3346
7
        err = del_txn->commit();
3347
7
        if (err == TxnErrorCode::TXN_OK) {
3348
7
            LOG_INFO("removed packed file metadata")
3349
7
                    .tag("instance_id", instance_id_)
3350
7
                    .tag("packed_file_path", packed_file_path);
3351
7
            return 0;
3352
7
        }
3353
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3354
0
            if (attempt >= max_retry_times) {
3355
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
3356
0
                        .tag("instance_id", instance_id_)
3357
0
                        .tag("packed_file_path", packed_file_path)
3358
0
                        .tag("attempt", attempt);
3359
0
                return -1;
3360
0
            }
3361
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
3362
0
                    .tag("instance_id", instance_id_)
3363
0
                    .tag("packed_file_path", packed_file_path)
3364
0
                    .tag("attempt", attempt);
3365
0
            sleep_for_packed_file_retry();
3366
0
            continue;
3367
0
        }
3368
0
        LOG_WARNING("failed to remove packed file kv")
3369
0
                .tag("instance_id", instance_id_)
3370
0
                .tag("packed_file_path", packed_file_path)
3371
0
                .tag("attempt", attempt)
3372
0
                .tag("err", err);
3373
0
        return -1;
3374
0
    }
3375
0
    return -1;
3376
7
}
3377
3378
int InstanceRecycler::delete_rowset_data(
3379
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
3380
92
        RecyclerMetricsContext& metrics_context) {
3381
92
    int ret = 0;
3382
    // resource_id -> file_paths
3383
92
    std::map<std::string, std::vector<std::string>> resource_file_paths;
3384
    // (resource_id, tablet_id, rowset_id)
3385
92
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
3386
92
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
3387
3388
54.1k
    for (const auto& [_, rs] : rowsets) {
3389
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
3390
        // due to aborted schema change.
3391
54.1k
        if (is_formal_rowset) {
3392
3.16k
            std::lock_guard lock(recycled_tablets_mtx_);
3393
3.16k
            if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) {
3394
                // Tablet has been recycled and this rowset has no packed slices, so file data
3395
                // should already be gone; skip to avoid redundant deletes. Rowsets with packed
3396
                // slice info must still run to decrement packed file ref counts.
3397
0
                continue;
3398
0
            }
3399
3.16k
        }
3400
3401
54.1k
        auto it = accessor_map_.find(rs.resource_id());
3402
        // possible if the accessor is not initilized correctly
3403
54.1k
        if (it == accessor_map_.end()) [[unlikely]] {
3404
1
            LOG_WARNING("instance has no such resource id")
3405
1
                    .tag("instance_id", instance_id_)
3406
1
                    .tag("resource_id", rs.resource_id());
3407
1
            ret = -1;
3408
1
            continue;
3409
1
        }
3410
3411
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
3412
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
3413
54.1k
        int64_t tablet_id = rs.tablet_id();
3414
54.1k
        LOG_INFO("recycle rowset merge index size")
3415
54.1k
                .tag("instance_id", instance_id_)
3416
54.1k
                .tag("tablet_id", tablet_id)
3417
54.1k
                .tag("rowset_id", rowset_id)
3418
54.1k
                .tag("merge_index_size", rs.packed_slice_locations_size());
3419
54.1k
        if (decrement_packed_file_ref_counts(rs) != 0) {
3420
0
            ret = -1;
3421
0
            continue;
3422
0
        }
3423
54.1k
        int64_t num_segments = rs.num_segments();
3424
54.1k
        if (num_segments <= 0) {
3425
0
            metrics_context.total_recycled_num++;
3426
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
3427
0
            continue;
3428
0
        }
3429
3430
        // Process delete bitmap
3431
54.1k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3432
3433
        // Process inverted indexes
3434
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
3435
        // default format as v1.
3436
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3437
54.1k
        int inverted_index_get_ret = 0;
3438
54.1k
        if (rs.has_tablet_schema()) {
3439
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
3440
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3441
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3442
53.5k
                }
3443
53.5k
            }
3444
26.5k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
3445
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
3446
26.5k
            }
3447
27.5k
        } else {
3448
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
3449
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
3450
0
                                "instance_id="
3451
0
                             << instance_id_ << " tablet_id=" << tablet_id
3452
0
                             << " rowset_id=" << rowset_id;
3453
0
                ret = -1;
3454
0
                continue;
3455
0
            }
3456
27.5k
            InvertedIndexInfo index_info;
3457
27.5k
            inverted_index_get_ret =
3458
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
3459
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3460
27.5k
                                     &inverted_index_get_ret);
3461
27.5k
            if (inverted_index_get_ret == 0) {
3462
27.0k
                index_format = index_info.first;
3463
27.0k
                index_ids = index_info.second;
3464
27.0k
            } else if (inverted_index_get_ret == 1) {
3465
                // 1. Schema kv not found means tablet has been recycled
3466
                // Maybe some tablet recycle failed by some bugs
3467
                // We need to delete again to double check
3468
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3469
                // because we are uncertain about the inverted index information.
3470
                // If there are inverted indexes, some data might not be deleted,
3471
                // but this is acceptable as we have made our best effort to delete the data.
3472
507
                LOG_INFO(
3473
507
                        "delete rowset data schema kv not found, need to delete again to "
3474
507
                        "double "
3475
507
                        "check")
3476
507
                        .tag("instance_id", instance_id_)
3477
507
                        .tag("tablet_id", tablet_id)
3478
507
                        .tag("rowset", rs.ShortDebugString());
3479
                // Currently index_ids is guaranteed to be empty,
3480
                // but we clear it again here as a safeguard against future code changes
3481
                // that might cause index_ids to no longer be empty
3482
507
                index_format = InvertedIndexStorageFormatPB::V2;
3483
507
                index_ids.clear();
3484
18.4E
            } else {
3485
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
3486
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3487
18.4E
                ret = -1;
3488
18.4E
                continue;
3489
18.4E
            }
3490
27.5k
        }
3491
54.1k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3492
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3493
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3494
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3495
5
            continue;
3496
5
        }
3497
324k
        for (int64_t i = 0; i < num_segments; ++i) {
3498
270k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3499
270k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3500
536k
                for (const auto& index_id : index_ids) {
3501
536k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
3502
536k
                                                                index_id.first, index_id.second));
3503
536k
                }
3504
267k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3505
                // try to recycle inverted index v2 when get_ret == 1
3506
                // we treat schema not found as if it has a v2 format inverted index
3507
                // to reduce chance of data leakage
3508
2.50k
                if (inverted_index_get_ret == 1) {
3509
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3510
2.50k
                            .tag("instance_id", instance_id_)
3511
2.50k
                            .tag("inverted index v2 path",
3512
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3513
2.50k
                }
3514
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3515
2.50k
            }
3516
270k
        }
3517
54.1k
    }
3518
3519
92
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3520
92
                                                 "delete_rowset_data",
3521
92
                                                 [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3521
5
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3521
50
                                                 [](const int& ret) { return ret != 0; });
3522
92
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3523
50
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3524
50
            DCHECK(accessor_map_.count(*rid))
3525
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3526
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3527
50
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3528
50
                                     &accessor_map_);
3529
50
            if (!accessor_map_.contains(*rid)) {
3530
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3531
0
                        .tag("resource_id", resource_id)
3532
0
                        .tag("instance_id", instance_id_);
3533
0
                return -1;
3534
0
            }
3535
50
            auto& accessor = accessor_map_[*rid];
3536
50
            int ret = accessor->delete_files(*paths);
3537
50
            if (!ret) {
3538
                // deduplication of different files with the same rowset id
3539
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3540
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3541
50
                std::set<std::string> deleted_rowset_id;
3542
3543
50
                std::for_each(paths->begin(), paths->end(),
3544
50
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3545
861k
                               this](const std::string& path) {
3546
861k
                                  std::vector<std::string> str;
3547
861k
                                  butil::SplitString(path, '/', &str);
3548
861k
                                  std::string rowset_id;
3549
861k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3550
857k
                                      rowset_id = str.back().substr(0, pos);
3551
857k
                                  } else {
3552
4.36k
                                      if (path.find("packed_file/") != std::string::npos) {
3553
0
                                          return; // packed files do not have rowset_id encoded
3554
0
                                      }
3555
4.36k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3556
4.36k
                                      return;
3557
4.36k
                                  }
3558
857k
                                  auto rs_meta = rowsets.find(rowset_id);
3559
857k
                                  if (rs_meta != rowsets.end() &&
3560
861k
                                      !deleted_rowset_id.contains(rowset_id)) {
3561
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3562
54.1k
                                      metrics_context.total_recycled_data_size +=
3563
54.1k
                                              rs_meta->second.total_disk_size();
3564
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3565
54.1k
                                              rs_meta->second.num_segments();
3566
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3567
54.1k
                                              rs_meta->second.total_disk_size();
3568
54.1k
                                      metrics_context.total_recycled_num++;
3569
54.1k
                                  }
3570
857k
                              });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3545
14
                               this](const std::string& path) {
3546
14
                                  std::vector<std::string> str;
3547
14
                                  butil::SplitString(path, '/', &str);
3548
14
                                  std::string rowset_id;
3549
14
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3550
14
                                      rowset_id = str.back().substr(0, pos);
3551
14
                                  } else {
3552
0
                                      if (path.find("packed_file/") != std::string::npos) {
3553
0
                                          return; // packed files do not have rowset_id encoded
3554
0
                                      }
3555
0
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3556
0
                                      return;
3557
0
                                  }
3558
14
                                  auto rs_meta = rowsets.find(rowset_id);
3559
14
                                  if (rs_meta != rowsets.end() &&
3560
14
                                      !deleted_rowset_id.contains(rowset_id)) {
3561
7
                                      deleted_rowset_id.emplace(rowset_id);
3562
7
                                      metrics_context.total_recycled_data_size +=
3563
7
                                              rs_meta->second.total_disk_size();
3564
7
                                      segment_metrics_context_.total_recycled_num +=
3565
7
                                              rs_meta->second.num_segments();
3566
7
                                      segment_metrics_context_.total_recycled_data_size +=
3567
7
                                              rs_meta->second.total_disk_size();
3568
7
                                      metrics_context.total_recycled_num++;
3569
7
                                  }
3570
14
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3545
861k
                               this](const std::string& path) {
3546
861k
                                  std::vector<std::string> str;
3547
861k
                                  butil::SplitString(path, '/', &str);
3548
861k
                                  std::string rowset_id;
3549
861k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3550
857k
                                      rowset_id = str.back().substr(0, pos);
3551
857k
                                  } else {
3552
4.36k
                                      if (path.find("packed_file/") != std::string::npos) {
3553
0
                                          return; // packed files do not have rowset_id encoded
3554
0
                                      }
3555
4.36k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3556
4.36k
                                      return;
3557
4.36k
                                  }
3558
857k
                                  auto rs_meta = rowsets.find(rowset_id);
3559
857k
                                  if (rs_meta != rowsets.end() &&
3560
861k
                                      !deleted_rowset_id.contains(rowset_id)) {
3561
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3562
54.1k
                                      metrics_context.total_recycled_data_size +=
3563
54.1k
                                              rs_meta->second.total_disk_size();
3564
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3565
54.1k
                                              rs_meta->second.num_segments();
3566
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3567
54.1k
                                              rs_meta->second.total_disk_size();
3568
54.1k
                                      metrics_context.total_recycled_num++;
3569
54.1k
                                  }
3570
857k
                              });
3571
50
                segment_metrics_context_.report();
3572
50
                metrics_context.report();
3573
50
            }
3574
50
            return ret;
3575
50
        });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3523
5
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3524
5
            DCHECK(accessor_map_.count(*rid))
3525
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3526
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3527
5
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3528
5
                                     &accessor_map_);
3529
5
            if (!accessor_map_.contains(*rid)) {
3530
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3531
0
                        .tag("resource_id", resource_id)
3532
0
                        .tag("instance_id", instance_id_);
3533
0
                return -1;
3534
0
            }
3535
5
            auto& accessor = accessor_map_[*rid];
3536
5
            int ret = accessor->delete_files(*paths);
3537
5
            if (!ret) {
3538
                // deduplication of different files with the same rowset id
3539
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3540
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3541
5
                std::set<std::string> deleted_rowset_id;
3542
3543
5
                std::for_each(paths->begin(), paths->end(),
3544
5
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3545
5
                               this](const std::string& path) {
3546
5
                                  std::vector<std::string> str;
3547
5
                                  butil::SplitString(path, '/', &str);
3548
5
                                  std::string rowset_id;
3549
5
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3550
5
                                      rowset_id = str.back().substr(0, pos);
3551
5
                                  } else {
3552
5
                                      if (path.find("packed_file/") != std::string::npos) {
3553
5
                                          return; // packed files do not have rowset_id encoded
3554
5
                                      }
3555
5
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3556
5
                                      return;
3557
5
                                  }
3558
5
                                  auto rs_meta = rowsets.find(rowset_id);
3559
5
                                  if (rs_meta != rowsets.end() &&
3560
5
                                      !deleted_rowset_id.contains(rowset_id)) {
3561
5
                                      deleted_rowset_id.emplace(rowset_id);
3562
5
                                      metrics_context.total_recycled_data_size +=
3563
5
                                              rs_meta->second.total_disk_size();
3564
5
                                      segment_metrics_context_.total_recycled_num +=
3565
5
                                              rs_meta->second.num_segments();
3566
5
                                      segment_metrics_context_.total_recycled_data_size +=
3567
5
                                              rs_meta->second.total_disk_size();
3568
5
                                      metrics_context.total_recycled_num++;
3569
5
                                  }
3570
5
                              });
3571
5
                segment_metrics_context_.report();
3572
5
                metrics_context.report();
3573
5
            }
3574
5
            return ret;
3575
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3523
45
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3524
45
            DCHECK(accessor_map_.count(*rid))
3525
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3526
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3527
45
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3528
45
                                     &accessor_map_);
3529
45
            if (!accessor_map_.contains(*rid)) {
3530
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3531
0
                        .tag("resource_id", resource_id)
3532
0
                        .tag("instance_id", instance_id_);
3533
0
                return -1;
3534
0
            }
3535
45
            auto& accessor = accessor_map_[*rid];
3536
45
            int ret = accessor->delete_files(*paths);
3537
45
            if (!ret) {
3538
                // deduplication of different files with the same rowset id
3539
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3540
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3541
45
                std::set<std::string> deleted_rowset_id;
3542
3543
45
                std::for_each(paths->begin(), paths->end(),
3544
45
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3545
45
                               this](const std::string& path) {
3546
45
                                  std::vector<std::string> str;
3547
45
                                  butil::SplitString(path, '/', &str);
3548
45
                                  std::string rowset_id;
3549
45
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3550
45
                                      rowset_id = str.back().substr(0, pos);
3551
45
                                  } else {
3552
45
                                      if (path.find("packed_file/") != std::string::npos) {
3553
45
                                          return; // packed files do not have rowset_id encoded
3554
45
                                      }
3555
45
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3556
45
                                      return;
3557
45
                                  }
3558
45
                                  auto rs_meta = rowsets.find(rowset_id);
3559
45
                                  if (rs_meta != rowsets.end() &&
3560
45
                                      !deleted_rowset_id.contains(rowset_id)) {
3561
45
                                      deleted_rowset_id.emplace(rowset_id);
3562
45
                                      metrics_context.total_recycled_data_size +=
3563
45
                                              rs_meta->second.total_disk_size();
3564
45
                                      segment_metrics_context_.total_recycled_num +=
3565
45
                                              rs_meta->second.num_segments();
3566
45
                                      segment_metrics_context_.total_recycled_data_size +=
3567
45
                                              rs_meta->second.total_disk_size();
3568
45
                                      metrics_context.total_recycled_num++;
3569
45
                                  }
3570
45
                              });
3571
45
                segment_metrics_context_.report();
3572
45
                metrics_context.report();
3573
45
            }
3574
45
            return ret;
3575
45
        });
3576
50
    }
3577
92
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
3578
5
        LOG_INFO(
3579
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
3580
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
3581
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
3582
5
        concurrent_delete_executor.add([&]() -> int {
3583
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3584
5
            if (!ret) {
3585
5
                auto rs = rowsets.at(rowset_id);
3586
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3587
5
                metrics_context.total_recycled_num++;
3588
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3589
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3590
5
                metrics_context.report();
3591
5
                segment_metrics_context_.report();
3592
5
            }
3593
5
            return ret;
3594
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
3582
5
        concurrent_delete_executor.add([&]() -> int {
3583
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3584
5
            if (!ret) {
3585
5
                auto rs = rowsets.at(rowset_id);
3586
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3587
5
                metrics_context.total_recycled_num++;
3588
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3589
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3590
5
                metrics_context.report();
3591
5
                segment_metrics_context_.report();
3592
5
            }
3593
5
            return ret;
3594
5
        });
3595
5
    }
3596
3597
92
    bool finished = true;
3598
92
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3599
92
    for (int r : rets) {
3600
55
        if (r != 0) {
3601
0
            ret = -1;
3602
0
            break;
3603
0
        }
3604
55
    }
3605
92
    ret = finished ? ret : -1;
3606
92
    return ret;
3607
92
}
3608
3609
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
3610
3.10k
                                         const std::string& rowset_id) {
3611
3.10k
    auto it = accessor_map_.find(resource_id);
3612
3.10k
    if (it == accessor_map_.end()) {
3613
200
        LOG_WARNING("instance has no such resource id")
3614
200
                .tag("instance_id", instance_id_)
3615
200
                .tag("resource_id", resource_id)
3616
200
                .tag("tablet_id", tablet_id)
3617
200
                .tag("rowset_id", rowset_id);
3618
200
        return -1;
3619
200
    }
3620
2.90k
    auto& accessor = it->second;
3621
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
3622
3.10k
}
3623
3624
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
3625
4
    if (key.empty()) {
3626
0
        return false;
3627
0
    }
3628
4
    std::string_view key_view = key;
3629
4
    key_view.remove_prefix(1); // remove keyspace prefix
3630
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
3631
4
    if (decode_key(&key_view, &decoded) != 0) {
3632
0
        return false;
3633
0
    }
3634
4
    if (decoded.size() < 4) {
3635
0
        return false;
3636
0
    }
3637
4
    try {
3638
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
3639
4
    } catch (const std::bad_variant_access&) {
3640
0
        return false;
3641
0
    }
3642
4
    return true;
3643
4
}
3644
3645
14
int InstanceRecycler::recycle_packed_files() {
3646
14
    const std::string task_name = "recycle_packed_files";
3647
14
    auto start_tp = steady_clock::now();
3648
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
3649
14
    int ret = 0;
3650
14
    PackedFileRecycleStats stats;
3651
3652
14
    register_recycle_task(task_name, start_time);
3653
14
    DORIS_CLOUD_DEFER {
3654
14
        unregister_recycle_task(task_name);
3655
14
        int64_t cost =
3656
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3657
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3658
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3659
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3660
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3661
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3662
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3663
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3664
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3665
14
                                                             stats.bytes_object_deleted);
3666
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3667
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3668
14
                .tag("instance_id", instance_id_)
3669
14
                .tag("num_scanned", stats.num_scanned)
3670
14
                .tag("num_corrected", stats.num_corrected)
3671
14
                .tag("num_deleted", stats.num_deleted)
3672
14
                .tag("num_failed", stats.num_failed)
3673
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3674
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3675
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3676
14
                .tag("bytes_deleted", stats.bytes_deleted)
3677
14
                .tag("ret", ret);
3678
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
3653
14
    DORIS_CLOUD_DEFER {
3654
14
        unregister_recycle_task(task_name);
3655
14
        int64_t cost =
3656
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3657
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3658
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3659
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3660
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3661
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3662
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3663
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3664
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3665
14
                                                             stats.bytes_object_deleted);
3666
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3667
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3668
14
                .tag("instance_id", instance_id_)
3669
14
                .tag("num_scanned", stats.num_scanned)
3670
14
                .tag("num_corrected", stats.num_corrected)
3671
14
                .tag("num_deleted", stats.num_deleted)
3672
14
                .tag("num_failed", stats.num_failed)
3673
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3674
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3675
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3676
14
                .tag("bytes_deleted", stats.bytes_deleted)
3677
14
                .tag("ret", ret);
3678
14
    };
3679
3680
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3681
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3682
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3683
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
3680
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3681
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3682
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3683
4
    };
3684
3685
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
3686
3687
14
    std::string begin = packed_file_key({instance_id_, ""});
3688
14
    std::string end = packed_file_key({instance_id_, "\xff"});
3689
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
3690
0
        ret = -1;
3691
0
    }
3692
3693
14
    return ret;
3694
14
}
3695
3696
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
3697
                                                  RecyclerMetricsContext& metrics_context,
3698
0
                                                  int64_t partition_id, bool is_empty_tablet) {
3699
0
    std::string tablet_key_begin, tablet_key_end;
3700
3701
0
    if (partition_id > 0) {
3702
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
3703
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
3704
0
    } else {
3705
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
3706
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
3707
0
    }
3708
    // for calculate the total num or bytes of recyled objects
3709
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
3710
0
                                                          std::string_view v) -> int {
3711
0
        doris::TabletMetaCloudPB tablet_meta_pb;
3712
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
3713
0
            return 0;
3714
0
        }
3715
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
3716
3717
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
3718
0
            return 0;
3719
0
        }
3720
3721
0
        if (!is_empty_tablet) {
3722
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
3723
0
                return 0;
3724
0
            }
3725
0
            tablet_metrics_context_.total_need_recycle_num++;
3726
0
        }
3727
0
        return 0;
3728
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
3729
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
3730
0
    metrics_context.report(true);
3731
0
    tablet_metrics_context_.report(true);
3732
0
    segment_metrics_context_.report(true);
3733
0
    return ret;
3734
0
}
3735
3736
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
3737
0
                                                 RecyclerMetricsContext& metrics_context) {
3738
0
    int ret = 0;
3739
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
3740
0
    std::unique_ptr<Transaction> txn;
3741
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3742
0
        LOG_WARNING("failed to recycle tablet ")
3743
0
                .tag("tablet id", tablet_id)
3744
0
                .tag("instance_id", instance_id_)
3745
0
                .tag("reason", "failed to create txn");
3746
0
        ret = -1;
3747
0
    }
3748
0
    GetRowsetResponse resp;
3749
0
    std::string msg;
3750
0
    MetaServiceCode code = MetaServiceCode::OK;
3751
    // get rowsets in tablet
3752
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3753
0
                        tablet_id, code, msg, &resp);
3754
0
    if (code != MetaServiceCode::OK) {
3755
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3756
0
                .tag("tablet id", tablet_id)
3757
0
                .tag("msg", msg)
3758
0
                .tag("code", code)
3759
0
                .tag("instance id", instance_id_);
3760
0
        ret = -1;
3761
0
    }
3762
0
    for (const auto& rs_meta : resp.rowset_meta()) {
3763
        /*
3764
        * For compatibility, we skip the loop for [0-1] here.
3765
        * The purpose of this loop is to delete object files,
3766
        * and since [0-1] only has meta and doesn't have object files,
3767
        * skipping it doesn't affect system correctness.
3768
        *
3769
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
3770
        * would return error -1 directly, causing the recycle operation to fail.
3771
        *
3772
        * [0-1] doesn't have resource id is a bug.
3773
        * In the future, we will fix this problem, after that,
3774
        * we can remove this if statement.
3775
        *
3776
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
3777
        */
3778
3779
0
        if (rs_meta.end_version() == 1) {
3780
            // Assert that [0-1] has no resource_id to make sure
3781
            // this if statement will not be forgetted to remove
3782
            // when the resource id bug is fixed
3783
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3784
0
            continue;
3785
0
        }
3786
0
        if (!rs_meta.has_resource_id()) {
3787
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3788
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3789
0
                    .tag("instance_id", instance_id_)
3790
0
                    .tag("tablet_id", tablet_id);
3791
0
            continue;
3792
0
        }
3793
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3794
0
        auto it = accessor_map_.find(rs_meta.resource_id());
3795
        // possible if the accessor is not initilized correctly
3796
0
        if (it == accessor_map_.end()) [[unlikely]] {
3797
0
            LOG_WARNING(
3798
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3799
0
                    "recycle process")
3800
0
                    .tag("tablet id", tablet_id)
3801
0
                    .tag("instance_id", instance_id_)
3802
0
                    .tag("resource_id", rs_meta.resource_id())
3803
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3804
0
            continue;
3805
0
        }
3806
3807
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
3808
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
3809
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
3810
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
3811
0
    }
3812
0
    return ret;
3813
0
}
3814
3815
4.25k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
3816
4.25k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
3817
4.25k
            .tag("instance_id", instance_id_)
3818
4.25k
            .tag("tablet_id", tablet_id);
3819
3820
4.25k
    if (should_recycle_versioned_keys()) {
3821
11
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
3822
11
        if (ret != 0) {
3823
0
            return ret;
3824
0
        }
3825
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
3826
        // during the recycle_versioned_tablet process.
3827
        //
3828
        // .. And remove restore job rowsets of this tablet too
3829
11
    }
3830
3831
4.25k
    int ret = 0;
3832
4.25k
    auto start_time = steady_clock::now();
3833
3834
4.25k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
3835
3836
    // collect resource ids
3837
248
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
3838
248
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
3839
248
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
3840
248
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
3841
248
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
3842
248
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
3843
3844
248
    std::set<std::string> resource_ids;
3845
248
    int64_t recycle_rowsets_number = 0;
3846
248
    int64_t recycle_segments_number = 0;
3847
248
    int64_t recycle_rowsets_data_size = 0;
3848
248
    int64_t recycle_rowsets_index_size = 0;
3849
248
    int64_t recycle_restore_job_rowsets_number = 0;
3850
248
    int64_t recycle_restore_job_segments_number = 0;
3851
248
    int64_t recycle_restore_job_rowsets_data_size = 0;
3852
248
    int64_t recycle_restore_job_rowsets_index_size = 0;
3853
248
    int64_t max_rowset_version = 0;
3854
248
    int64_t min_rowset_creation_time = INT64_MAX;
3855
248
    int64_t max_rowset_creation_time = 0;
3856
248
    int64_t min_rowset_expiration_time = INT64_MAX;
3857
248
    int64_t max_rowset_expiration_time = 0;
3858
3859
248
    DORIS_CLOUD_DEFER {
3860
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3861
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3862
248
                .tag("instance_id", instance_id_)
3863
248
                .tag("tablet_id", tablet_id)
3864
248
                .tag("recycle rowsets number", recycle_rowsets_number)
3865
248
                .tag("recycle segments number", recycle_segments_number)
3866
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3867
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3868
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
3869
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
3870
248
                .tag("all restore job rowsets recycle data size",
3871
248
                     recycle_restore_job_rowsets_data_size)
3872
248
                .tag("all restore job rowsets recycle index size",
3873
248
                     recycle_restore_job_rowsets_index_size)
3874
248
                .tag("max rowset version", max_rowset_version)
3875
248
                .tag("min rowset creation time", min_rowset_creation_time)
3876
248
                .tag("max rowset creation time", max_rowset_creation_time)
3877
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
3878
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
3879
248
                .tag("task type", metrics_context.operation_type)
3880
248
                .tag("ret", ret);
3881
248
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3859
248
    DORIS_CLOUD_DEFER {
3860
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
3861
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
3862
248
                .tag("instance_id", instance_id_)
3863
248
                .tag("tablet_id", tablet_id)
3864
248
                .tag("recycle rowsets number", recycle_rowsets_number)
3865
248
                .tag("recycle segments number", recycle_segments_number)
3866
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
3867
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
3868
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
3869
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
3870
248
                .tag("all restore job rowsets recycle data size",
3871
248
                     recycle_restore_job_rowsets_data_size)
3872
248
                .tag("all restore job rowsets recycle index size",
3873
248
                     recycle_restore_job_rowsets_index_size)
3874
248
                .tag("max rowset version", max_rowset_version)
3875
248
                .tag("min rowset creation time", min_rowset_creation_time)
3876
248
                .tag("max rowset creation time", max_rowset_creation_time)
3877
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
3878
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
3879
248
                .tag("task type", metrics_context.operation_type)
3880
248
                .tag("ret", ret);
3881
248
    };
3882
3883
248
    std::unique_ptr<Transaction> txn;
3884
248
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3885
0
        LOG_WARNING("failed to recycle tablet ")
3886
0
                .tag("tablet id", tablet_id)
3887
0
                .tag("instance_id", instance_id_)
3888
0
                .tag("reason", "failed to create txn");
3889
0
        ret = -1;
3890
0
    }
3891
248
    GetRowsetResponse resp;
3892
248
    std::string msg;
3893
248
    MetaServiceCode code = MetaServiceCode::OK;
3894
    // get rowsets in tablet
3895
248
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3896
248
                        tablet_id, code, msg, &resp);
3897
248
    if (code != MetaServiceCode::OK) {
3898
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3899
0
                .tag("tablet id", tablet_id)
3900
0
                .tag("msg", msg)
3901
0
                .tag("code", code)
3902
0
                .tag("instance id", instance_id_);
3903
0
        ret = -1;
3904
0
    }
3905
248
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
3906
3907
2.51k
    for (const auto& rs_meta : resp.rowset_meta()) {
3908
        // The rowset has no resource id and segments when it was generated by compaction
3909
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
3910
2.51k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
3911
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
3912
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3913
0
                    .tag("instance_id", instance_id_)
3914
0
                    .tag("tablet_id", tablet_id);
3915
0
            recycle_rowsets_number += 1;
3916
0
            continue;
3917
0
        }
3918
2.51k
        if (!rs_meta.has_resource_id()) {
3919
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3920
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
3921
1
                    .tag("instance_id", instance_id_)
3922
1
                    .tag("tablet_id", tablet_id);
3923
1
            return -1;
3924
1
        }
3925
2.51k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
3926
2.51k
        auto it = accessor_map_.find(rs_meta.resource_id());
3927
        // possible if the accessor is not initilized correctly
3928
2.51k
        if (it == accessor_map_.end()) [[unlikely]] {
3929
1
            LOG_WARNING(
3930
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3931
1
                    "recycle process")
3932
1
                    .tag("tablet id", tablet_id)
3933
1
                    .tag("instance_id", instance_id_)
3934
1
                    .tag("resource_id", rs_meta.resource_id())
3935
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3936
1
            return -1;
3937
1
        }
3938
2.51k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
3939
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
3940
0
                    .tag("instance_id", instance_id_)
3941
0
                    .tag("tablet_id", tablet_id)
3942
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
3943
0
            return -1;
3944
0
        }
3945
2.51k
        recycle_rowsets_number += 1;
3946
2.51k
        recycle_segments_number += rs_meta.num_segments();
3947
2.51k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
3948
2.51k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
3949
2.51k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
3950
2.51k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
3951
2.51k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
3952
2.51k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
3953
2.51k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
3954
2.51k
        resource_ids.emplace(rs_meta.resource_id());
3955
2.51k
    }
3956
3957
    // get restore job rowset in tablet
3958
246
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
3959
246
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
3960
246
    if (code != MetaServiceCode::OK) {
3961
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
3962
0
                .tag("tablet id", tablet_id)
3963
0
                .tag("msg", msg)
3964
0
                .tag("code", code)
3965
0
                .tag("instance id", instance_id_);
3966
0
        return -1;
3967
0
    }
3968
3969
246
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
3970
0
        if (!rs_meta.has_resource_id()) {
3971
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
3972
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
3973
0
                    .tag("instance_id", instance_id_)
3974
0
                    .tag("tablet_id", tablet_id);
3975
0
            return -1;
3976
0
        }
3977
3978
0
        auto it = accessor_map_.find(rs_meta.resource_id());
3979
        // possible if the accessor is not initilized correctly
3980
0
        if (it == accessor_map_.end()) [[unlikely]] {
3981
0
            LOG_WARNING(
3982
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
3983
0
                    "recycle process")
3984
0
                    .tag("tablet id", tablet_id)
3985
0
                    .tag("instance_id", instance_id_)
3986
0
                    .tag("resource_id", rs_meta.resource_id())
3987
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
3988
0
            return -1;
3989
0
        }
3990
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
3991
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
3992
0
                    .tag("instance_id", instance_id_)
3993
0
                    .tag("tablet_id", tablet_id)
3994
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
3995
0
            return -1;
3996
0
        }
3997
0
        recycle_restore_job_rowsets_number += 1;
3998
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
3999
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
4000
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
4001
0
        resource_ids.emplace(rs_meta.resource_id());
4002
0
    }
4003
4004
246
    LOG_INFO("recycle tablet start to delete object")
4005
246
            .tag("instance id", instance_id_)
4006
246
            .tag("tablet id", tablet_id)
4007
246
            .tag("recycle tablet resource ids are",
4008
246
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
4009
246
                                 [](std::string rs_id, const auto& it) {
4010
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4011
206
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
4009
206
                                 [](std::string rs_id, const auto& it) {
4010
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4011
206
                                 }));
4012
4013
246
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
4014
246
            _thread_pool_group.s3_producer_pool,
4015
246
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4016
246
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
4016
206
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
4017
4018
    // delete all rowset data in this tablet
4019
    // ATTN: there may be data leak if not all accessor initilized successfully
4020
    //       partial data deleted if the tablet is stored cross-storage vault
4021
    //       vault id is not attached to TabletMeta...
4022
246
    for (const auto& resource_id : resource_ids) {
4023
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
4024
206
        concurrent_delete_executor.add(
4025
206
                [&, rs_id = resource_id,
4026
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4027
206
                    std::unique_ptr<int, std::function<void(int*)>> defer(
4028
206
                            (int*)0x01, [&](int*) { metrics_context.report(); });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11EvENKUlPiE_clES5_
Line
Count
Source
4028
206
                            (int*)0x01, [&](int*) { metrics_context.report(); });
4029
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4030
206
                    if (res != 0) {
4031
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4032
2
                                     << " path=" << accessor_ptr->uri()
4033
2
                                     << " task type=" << metrics_context.operation_type;
4034
2
                        return std::make_pair(-1, rs_id);
4035
2
                    }
4036
204
                    return std::make_pair(0, rs_id);
4037
206
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
4026
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4027
206
                    std::unique_ptr<int, std::function<void(int*)>> defer(
4028
206
                            (int*)0x01, [&](int*) { metrics_context.report(); });
4029
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4030
206
                    if (res != 0) {
4031
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4032
2
                                     << " path=" << accessor_ptr->uri()
4033
2
                                     << " task type=" << metrics_context.operation_type;
4034
2
                        return std::make_pair(-1, rs_id);
4035
2
                    }
4036
204
                    return std::make_pair(0, rs_id);
4037
206
                });
4038
206
    }
4039
4040
246
    bool finished = true;
4041
246
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
4042
246
    for (auto& r : rets) {
4043
206
        if (r.first != 0) {
4044
2
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
4045
2
            ret = -1;
4046
2
        }
4047
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
4048
206
    }
4049
246
    ret = finished ? ret : -1;
4050
4051
246
    if (ret != 0) { // failed recycle tablet data
4052
2
        LOG_WARNING("ret!=0")
4053
2
                .tag("finished", finished)
4054
2
                .tag("ret", ret)
4055
2
                .tag("instance_id", instance_id_)
4056
2
                .tag("tablet_id", tablet_id);
4057
2
        return ret;
4058
2
    }
4059
4060
244
    tablet_metrics_context_.total_recycled_data_size +=
4061
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4062
244
    tablet_metrics_context_.total_recycled_num += 1;
4063
244
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4064
244
    segment_metrics_context_.total_recycled_data_size +=
4065
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4066
244
    metrics_context.total_recycled_data_size +=
4067
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4068
244
    tablet_metrics_context_.report();
4069
244
    segment_metrics_context_.report();
4070
244
    metrics_context.report();
4071
4072
244
    txn.reset();
4073
244
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4074
0
        LOG_WARNING("failed to recycle tablet ")
4075
0
                .tag("tablet id", tablet_id)
4076
0
                .tag("instance_id", instance_id_)
4077
0
                .tag("reason", "failed to create txn");
4078
0
        ret = -1;
4079
0
    }
4080
    // delete all rowset kv in this tablet
4081
244
    txn->remove(rs_key0, rs_key1);
4082
244
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4083
244
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4084
4085
    // remove delete bitmap for MoW table
4086
244
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4087
244
    txn->remove(pending_key);
4088
244
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4089
244
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4090
244
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4091
4092
244
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4093
244
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4094
244
    txn->remove(dbm_start_key, dbm_end_key);
4095
244
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4096
244
              << " end=" << hex(dbm_end_key);
4097
4098
244
    TxnErrorCode err = txn->commit();
4099
244
    if (err != TxnErrorCode::TXN_OK) {
4100
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4101
0
        ret = -1;
4102
0
    }
4103
4104
244
    if (ret == 0) {
4105
        // All object files under tablet have been deleted
4106
244
        std::lock_guard lock(recycled_tablets_mtx_);
4107
244
        recycled_tablets_.insert(tablet_id);
4108
244
    }
4109
4110
244
    return ret;
4111
246
}
4112
4113
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
4114
11
                                               RecyclerMetricsContext& metrics_context) {
4115
11
    int ret = 0;
4116
11
    auto start_time = steady_clock::now();
4117
4118
11
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4119
4120
    // collect resource ids
4121
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4122
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4123
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4124
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4125
4126
11
    int64_t recycle_rowsets_number = 0;
4127
11
    int64_t recycle_segments_number = 0;
4128
11
    int64_t recycle_rowsets_data_size = 0;
4129
11
    int64_t recycle_rowsets_index_size = 0;
4130
11
    int64_t max_rowset_version = 0;
4131
11
    int64_t min_rowset_creation_time = INT64_MAX;
4132
11
    int64_t max_rowset_creation_time = 0;
4133
11
    int64_t min_rowset_expiration_time = INT64_MAX;
4134
11
    int64_t max_rowset_expiration_time = 0;
4135
4136
11
    DORIS_CLOUD_DEFER {
4137
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4138
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4139
11
                .tag("instance_id", instance_id_)
4140
11
                .tag("tablet_id", tablet_id)
4141
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4142
11
                .tag("recycle segments number", recycle_segments_number)
4143
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4144
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4145
11
                .tag("max rowset version", max_rowset_version)
4146
11
                .tag("min rowset creation time", min_rowset_creation_time)
4147
11
                .tag("max rowset creation time", max_rowset_creation_time)
4148
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4149
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4150
11
                .tag("ret", ret);
4151
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4136
11
    DORIS_CLOUD_DEFER {
4137
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4138
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4139
11
                .tag("instance_id", instance_id_)
4140
11
                .tag("tablet_id", tablet_id)
4141
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4142
11
                .tag("recycle segments number", recycle_segments_number)
4143
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4144
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4145
11
                .tag("max rowset version", max_rowset_version)
4146
11
                .tag("min rowset creation time", min_rowset_creation_time)
4147
11
                .tag("max rowset creation time", max_rowset_creation_time)
4148
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4149
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4150
11
                .tag("ret", ret);
4151
11
    };
4152
4153
11
    std::unique_ptr<Transaction> txn;
4154
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4155
0
        LOG_WARNING("failed to recycle tablet ")
4156
0
                .tag("tablet id", tablet_id)
4157
0
                .tag("instance_id", instance_id_)
4158
0
                .tag("reason", "failed to create txn");
4159
0
        ret = -1;
4160
0
    }
4161
4162
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
4163
    // by the related operation logs.
4164
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
4165
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
4166
11
    MetaReader meta_reader(instance_id_);
4167
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
4168
11
    if (err == TxnErrorCode::TXN_OK) {
4169
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
4170
11
    }
4171
11
    if (err != TxnErrorCode::TXN_OK) {
4172
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4173
0
                .tag("tablet id", tablet_id)
4174
0
                .tag("err", err)
4175
0
                .tag("instance id", instance_id_);
4176
0
        ret = -1;
4177
0
    }
4178
4179
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
4180
11
             load_rowset_metas.size(), compact_rowset_metas.size())
4181
11
            .tag("instance_id", instance_id_)
4182
11
            .tag("tablet_id", tablet_id);
4183
4184
11
    SyncExecutor<int> concurrent_delete_executor(
4185
11
            _thread_pool_group.s3_producer_pool,
4186
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4187
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
4188
4189
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4190
60
        recycle_rowsets_number += 1;
4191
60
        recycle_segments_number += rs_meta.num_segments();
4192
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4193
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4194
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4195
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4196
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4197
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4198
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4199
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
4189
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4190
60
        recycle_rowsets_number += 1;
4191
60
        recycle_segments_number += rs_meta.num_segments();
4192
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4193
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4194
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4195
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4196
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4197
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4198
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4199
60
    };
4200
4201
11
    std::vector<RowsetDeleteTask> all_tasks;
4202
4203
11
    auto create_delete_task = [this](const RowsetMetaCloudPB& rs_meta, std::string_view recycle_key,
4204
11
                                     std::string_view non_versioned_rowset_key =
4205
60
                                             "") -> RowsetDeleteTask {
4206
60
        RowsetDeleteTask task;
4207
60
        task.rowset_meta = rs_meta;
4208
60
        task.recycle_rowset_key = std::string(recycle_key);
4209
60
        task.non_versioned_rowset_key = std::string(non_versioned_rowset_key);
4210
60
        task.versioned_rowset_key = versioned::meta_rowset_key(
4211
60
                {instance_id_, rs_meta.tablet_id(), rs_meta.rowset_id_v2()});
4212
60
        return task;
4213
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clERKNS_17RowsetMetaCloudPBESt17basic_string_viewIcSt11char_traitsIcEESB_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clERKNS_17RowsetMetaCloudPBESt17basic_string_viewIcSt11char_traitsIcEESB_
Line
Count
Source
4205
60
                                             "") -> RowsetDeleteTask {
4206
60
        RowsetDeleteTask task;
4207
60
        task.rowset_meta = rs_meta;
4208
60
        task.recycle_rowset_key = std::string(recycle_key);
4209
60
        task.non_versioned_rowset_key = std::string(non_versioned_rowset_key);
4210
60
        task.versioned_rowset_key = versioned::meta_rowset_key(
4211
60
                {instance_id_, rs_meta.tablet_id(), rs_meta.rowset_id_v2()});
4212
60
        return task;
4213
60
    };
4214
4215
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
4216
60
        update_rowset_stats(rs_meta);
4217
        // Version 0-1 rowset has no resource_id and no actual data files,
4218
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4219
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4220
60
        std::string rowset_load_key =
4221
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
4222
60
        std::string rowset_key = meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4223
60
        RowsetDeleteTask task = create_delete_task(
4224
60
                rs_meta, encode_versioned_key(rowset_load_key, versionstamp), rowset_key);
4225
60
        all_tasks.push_back(std::move(task));
4226
60
    }
4227
4228
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
4229
0
        update_rowset_stats(rs_meta);
4230
        // Version 0-1 rowset has no resource_id and no actual data files,
4231
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4232
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4233
0
        std::string rowset_compact_key = versioned::meta_rowset_compact_key(
4234
0
                {instance_id_, tablet_id, rs_meta.end_version()});
4235
0
        std::string rowset_key = meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4236
0
        RowsetDeleteTask task = create_delete_task(
4237
0
                rs_meta, encode_versioned_key(rowset_compact_key, versionstamp), rowset_key);
4238
0
        all_tasks.push_back(std::move(task));
4239
0
    }
4240
4241
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
4242
0
        RecycleRowsetPB recycle_rowset;
4243
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
4244
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4245
0
            return -1;
4246
0
        }
4247
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
4248
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
4249
                // in old version, keep this key-value pair and it needs to be checked manually
4250
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4251
0
                return -1;
4252
0
            }
4253
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
4254
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4255
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4256
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
4257
0
                return -1;
4258
0
            }
4259
            // decode rowset_id
4260
0
            auto k1 = k;
4261
0
            k1.remove_prefix(1);
4262
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4263
0
            decode_key(&k1, &out);
4264
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4265
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4266
0
            LOG_INFO("delete old-version rowset data")
4267
0
                    .tag("instance_id", instance_id_)
4268
0
                    .tag("tablet_id", tablet_id)
4269
0
                    .tag("rowset_id", rowset_id);
4270
4271
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
4272
            // so we must use prefix deletion directly instead of batch delete.
4273
0
            concurrent_delete_executor.add(
4274
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
4275
                        // delete by prefix, the recycle rowset key will be deleted by range later.
4276
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
4277
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
4278
0
        } else {
4279
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
4280
            // Version 0-1 rowset has no resource_id and no actual data files,
4281
            // but still needs ref_count key cleanup, so we add it to all_tasks.
4282
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
4283
0
            RowsetDeleteTask task = create_delete_task(rowset_meta, k);
4284
0
            all_tasks.push_back(std::move(task));
4285
0
        }
4286
0
        return 0;
4287
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_4clESt17basic_string_viewIcSt11char_traitsIcEES8_
4288
4289
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
4290
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
4291
0
                .tag("tablet id", tablet_id)
4292
0
                .tag("instance_id", instance_id_)
4293
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
4294
0
        ret = -1;
4295
0
    }
4296
4297
    // Phase 1: Classify tasks by ref_count
4298
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
4299
60
    for (auto& task : all_tasks) {
4300
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
4301
60
        if (classify_ret < 0) {
4302
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
4303
0
                    .tag("instance_id", instance_id_)
4304
0
                    .tag("tablet_id", tablet_id)
4305
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4306
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
4307
0
                return recycle_rowset_meta_and_data(t.recycle_rowset_key, t.rowset_meta,
4308
0
                                                    t.non_versioned_rowset_key);
4309
0
            });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_5clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_5clEv
4310
0
        }
4311
60
    }
4312
4313
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
4314
4315
11
    LOG_INFO("batch delete plan created")
4316
11
            .tag("instance_id", instance_id_)
4317
11
            .tag("tablet_id", tablet_id)
4318
11
            .tag("plan_count", batch_delete_tasks.size());
4319
4320
    // Phase 2: Execute batch delete using existing delete_rowset_data
4321
11
    if (!batch_delete_tasks.empty()) {
4322
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
4323
49
        for (const auto& task : batch_delete_tasks) {
4324
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
4325
49
            if (task.rowset_meta.resource_id().empty()) {
4326
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
4327
10
                        .tag("instance_id", instance_id_)
4328
10
                        .tag("tablet_id", tablet_id)
4329
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4330
10
                continue;
4331
10
            }
4332
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
4333
39
        }
4334
4335
        // Only call delete_rowset_data if there are rowsets with actual data to delete
4336
10
        bool delete_success = true;
4337
10
        if (!rowsets_to_delete.empty()) {
4338
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
4339
9
                                                         "batch_delete_versioned_tablet");
4340
9
            int delete_ret = delete_rowset_data(
4341
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
4342
9
            if (delete_ret != 0) {
4343
0
                LOG_WARNING("batch delete execution failed")
4344
0
                        .tag("instance_id", instance_id_)
4345
0
                        .tag("tablet_id", tablet_id);
4346
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
4347
0
                ret = -1;
4348
0
                delete_success = false;
4349
0
            }
4350
9
        }
4351
4352
        // Phase 3: Only cleanup metadata if data deletion succeeded.
4353
        // If deletion failed, keep recycle_rowset_key so next round will retry.
4354
10
        if (delete_success) {
4355
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
4356
10
            if (cleanup_ret != 0) {
4357
0
                LOG_WARNING("batch delete cleanup failed")
4358
0
                        .tag("instance_id", instance_id_)
4359
0
                        .tag("tablet_id", tablet_id);
4360
0
                ret = -1;
4361
0
            }
4362
10
        }
4363
10
    }
4364
4365
    // Always wait for fallback tasks to complete before returning
4366
11
    bool finished = true;
4367
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4368
11
    for (int r : rets) {
4369
0
        if (r != 0) {
4370
0
            ret = -1;
4371
0
        }
4372
0
    }
4373
4374
11
    ret = finished ? ret : -1;
4375
4376
11
    if (ret != 0) { // failed recycle tablet data
4377
0
        LOG_WARNING("recycle versioned tablet failed")
4378
0
                .tag("finished", finished)
4379
0
                .tag("ret", ret)
4380
0
                .tag("instance_id", instance_id_)
4381
0
                .tag("tablet_id", tablet_id);
4382
0
        return ret;
4383
0
    }
4384
4385
11
    tablet_metrics_context_.total_recycled_data_size +=
4386
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4387
11
    tablet_metrics_context_.total_recycled_num += 1;
4388
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4389
11
    segment_metrics_context_.total_recycled_data_size +=
4390
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4391
11
    metrics_context.total_recycled_data_size +=
4392
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4393
11
    tablet_metrics_context_.report();
4394
11
    segment_metrics_context_.report();
4395
11
    metrics_context.report();
4396
4397
11
    txn.reset();
4398
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4399
0
        LOG_WARNING("failed to recycle tablet ")
4400
0
                .tag("tablet id", tablet_id)
4401
0
                .tag("instance_id", instance_id_)
4402
0
                .tag("reason", "failed to create txn");
4403
0
        ret = -1;
4404
0
    }
4405
    // delete all rowset kv in this tablet
4406
11
    txn->remove(rs_key0, rs_key1);
4407
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4408
4409
    // remove delete bitmap for MoW table
4410
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4411
11
    txn->remove(pending_key);
4412
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4413
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4414
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4415
4416
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4417
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4418
11
    txn->remove(dbm_start_key, dbm_end_key);
4419
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4420
11
              << " end=" << hex(dbm_end_key);
4421
4422
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
4423
11
    std::string tablet_index_val;
4424
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
4425
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
4426
0
        LOG_WARNING("failed to get tablet index kv")
4427
0
                .tag("instance_id", instance_id_)
4428
0
                .tag("tablet_id", tablet_id)
4429
0
                .tag("err", err);
4430
0
        ret = -1;
4431
11
    } else if (err == TxnErrorCode::TXN_OK) {
4432
        // If the tablet index kv exists, we need to delete it
4433
10
        TabletIndexPB tablet_index_pb;
4434
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
4435
0
            LOG_WARNING("failed to parse tablet index pb")
4436
0
                    .tag("instance_id", instance_id_)
4437
0
                    .tag("tablet_id", tablet_id);
4438
0
            ret = -1;
4439
10
        } else {
4440
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
4441
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
4442
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
4443
10
            txn->remove(versioned_inverted_idx_key);
4444
10
            txn->remove(versioned_idx_key);
4445
10
        }
4446
10
    }
4447
4448
11
    err = txn->commit();
4449
11
    if (err != TxnErrorCode::TXN_OK) {
4450
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4451
0
        ret = -1;
4452
0
    }
4453
4454
11
    if (ret == 0) {
4455
        // All object files under tablet have been deleted
4456
11
        std::lock_guard lock(recycled_tablets_mtx_);
4457
11
        recycled_tablets_.insert(tablet_id);
4458
11
    }
4459
4460
11
    return ret;
4461
11
}
4462
4463
27
int InstanceRecycler::recycle_rowsets() {
4464
27
    if (should_recycle_versioned_keys()) {
4465
5
        return recycle_versioned_rowsets();
4466
5
    }
4467
4468
22
    const std::string task_name = "recycle_rowsets";
4469
22
    int64_t num_scanned = 0;
4470
22
    int64_t num_expired = 0;
4471
22
    int64_t num_prepare = 0;
4472
22
    int64_t num_compacted = 0;
4473
22
    int64_t num_empty_rowset = 0;
4474
22
    size_t total_rowset_key_size = 0;
4475
22
    size_t total_rowset_value_size = 0;
4476
22
    size_t expired_rowset_size = 0;
4477
22
    std::atomic_long num_recycled = 0;
4478
22
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4479
4480
22
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4481
22
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4482
22
    std::string recyc_rs_key0;
4483
22
    std::string recyc_rs_key1;
4484
22
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4485
22
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4486
4487
22
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4488
4489
22
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4490
22
    register_recycle_task(task_name, start_time);
4491
4492
22
    DORIS_CLOUD_DEFER {
4493
22
        unregister_recycle_task(task_name);
4494
22
        int64_t cost =
4495
22
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4496
22
        metrics_context.finish_report();
4497
22
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4498
22
                .tag("instance_id", instance_id_)
4499
22
                .tag("num_scanned", num_scanned)
4500
22
                .tag("num_expired", num_expired)
4501
22
                .tag("num_recycled", num_recycled)
4502
22
                .tag("num_recycled.prepare", num_prepare)
4503
22
                .tag("num_recycled.compacted", num_compacted)
4504
22
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4505
22
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4506
22
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4507
22
                .tag("expired_rowset_meta_size", expired_rowset_size);
4508
22
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4492
7
    DORIS_CLOUD_DEFER {
4493
7
        unregister_recycle_task(task_name);
4494
7
        int64_t cost =
4495
7
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4496
7
        metrics_context.finish_report();
4497
7
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4498
7
                .tag("instance_id", instance_id_)
4499
7
                .tag("num_scanned", num_scanned)
4500
7
                .tag("num_expired", num_expired)
4501
7
                .tag("num_recycled", num_recycled)
4502
7
                .tag("num_recycled.prepare", num_prepare)
4503
7
                .tag("num_recycled.compacted", num_compacted)
4504
7
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4505
7
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4506
7
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4507
7
                .tag("expired_rowset_meta_size", expired_rowset_size);
4508
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4492
15
    DORIS_CLOUD_DEFER {
4493
15
        unregister_recycle_task(task_name);
4494
15
        int64_t cost =
4495
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4496
15
        metrics_context.finish_report();
4497
15
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4498
15
                .tag("instance_id", instance_id_)
4499
15
                .tag("num_scanned", num_scanned)
4500
15
                .tag("num_expired", num_expired)
4501
15
                .tag("num_recycled", num_recycled)
4502
15
                .tag("num_recycled.prepare", num_prepare)
4503
15
                .tag("num_recycled.compacted", num_compacted)
4504
15
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4505
15
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4506
15
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4507
15
                .tag("expired_rowset_meta_size", expired_rowset_size);
4508
15
    };
4509
4510
22
    std::vector<std::string> rowset_keys;
4511
    // rowset_id -> rowset_meta
4512
    // store rowset id and meta for statistics rs size when delete
4513
22
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
4514
4515
    // Store keys of rowset recycled by background workers
4516
22
    std::mutex async_recycled_rowset_keys_mutex;
4517
22
    std::vector<std::string> async_recycled_rowset_keys;
4518
22
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4519
22
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4520
22
    worker_pool->start();
4521
    // TODO bacth delete
4522
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4523
4.00k
        std::string dbm_start_key =
4524
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4525
4.00k
        std::string dbm_end_key = dbm_start_key;
4526
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4527
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4528
4.00k
        if (ret != 0) {
4529
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4530
0
                         << instance_id_;
4531
0
        }
4532
4.00k
        return ret;
4533
4.00k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4522
2
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4523
2
        std::string dbm_start_key =
4524
2
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4525
2
        std::string dbm_end_key = dbm_start_key;
4526
2
        encode_int64(INT64_MAX, &dbm_end_key);
4527
2
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4528
2
        if (ret != 0) {
4529
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4530
0
                         << instance_id_;
4531
0
        }
4532
2
        return ret;
4533
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4522
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4523
4.00k
        std::string dbm_start_key =
4524
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4525
4.00k
        std::string dbm_end_key = dbm_start_key;
4526
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4527
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4528
4.00k
        if (ret != 0) {
4529
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4530
0
                         << instance_id_;
4531
0
        }
4532
4.00k
        return ret;
4533
4.00k
    };
4534
22
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4535
902
                                            int64_t tablet_id, const std::string& rowset_id) {
4536
        // Try to delete rowset data in background thread
4537
902
        int ret = worker_pool->submit_with_timeout(
4538
902
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4539
812
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4540
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4541
0
                        return;
4542
0
                    }
4543
812
                    std::vector<std::string> keys;
4544
812
                    {
4545
812
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4546
812
                        async_recycled_rowset_keys.push_back(std::move(key));
4547
812
                        if (async_recycled_rowset_keys.size() > 100) {
4548
7
                            keys.swap(async_recycled_rowset_keys);
4549
7
                        }
4550
812
                    }
4551
812
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4552
812
                    if (keys.empty()) return;
4553
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4554
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4555
0
                                     << instance_id_;
4556
7
                    } else {
4557
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4558
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4559
7
                                           num_recycled, start_time);
4560
7
                    }
4561
7
                },
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4538
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4539
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4540
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4541
0
                        return;
4542
0
                    }
4543
2
                    std::vector<std::string> keys;
4544
2
                    {
4545
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4546
2
                        async_recycled_rowset_keys.push_back(std::move(key));
4547
2
                        if (async_recycled_rowset_keys.size() > 100) {
4548
0
                            keys.swap(async_recycled_rowset_keys);
4549
0
                        }
4550
2
                    }
4551
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4552
2
                    if (keys.empty()) return;
4553
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4554
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4555
0
                                     << instance_id_;
4556
0
                    } else {
4557
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4558
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4559
0
                                           num_recycled, start_time);
4560
0
                    }
4561
0
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4538
810
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4539
810
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4540
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4541
0
                        return;
4542
0
                    }
4543
810
                    std::vector<std::string> keys;
4544
810
                    {
4545
810
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4546
810
                        async_recycled_rowset_keys.push_back(std::move(key));
4547
810
                        if (async_recycled_rowset_keys.size() > 100) {
4548
7
                            keys.swap(async_recycled_rowset_keys);
4549
7
                        }
4550
810
                    }
4551
810
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4552
810
                    if (keys.empty()) return;
4553
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4554
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4555
0
                                     << instance_id_;
4556
7
                    } else {
4557
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4558
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4559
7
                                           num_recycled, start_time);
4560
7
                    }
4561
7
                },
4562
902
                0);
4563
902
        if (ret == 0) return 0;
4564
        // Submit task failed, delete rowset data in current thread
4565
90
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4566
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4567
0
            return -1;
4568
0
        }
4569
90
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4570
0
            return -1;
4571
0
        }
4572
90
        rowset_keys.push_back(std::move(key));
4573
90
        return 0;
4574
90
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4535
2
                                            int64_t tablet_id, const std::string& rowset_id) {
4536
        // Try to delete rowset data in background thread
4537
2
        int ret = worker_pool->submit_with_timeout(
4538
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4539
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4540
2
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4541
2
                        return;
4542
2
                    }
4543
2
                    std::vector<std::string> keys;
4544
2
                    {
4545
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4546
2
                        async_recycled_rowset_keys.push_back(std::move(key));
4547
2
                        if (async_recycled_rowset_keys.size() > 100) {
4548
2
                            keys.swap(async_recycled_rowset_keys);
4549
2
                        }
4550
2
                    }
4551
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4552
2
                    if (keys.empty()) return;
4553
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4554
2
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4555
2
                                     << instance_id_;
4556
2
                    } else {
4557
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4558
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4559
2
                                           num_recycled, start_time);
4560
2
                    }
4561
2
                },
4562
2
                0);
4563
2
        if (ret == 0) return 0;
4564
        // Submit task failed, delete rowset data in current thread
4565
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4566
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4567
0
            return -1;
4568
0
        }
4569
0
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4570
0
            return -1;
4571
0
        }
4572
0
        rowset_keys.push_back(std::move(key));
4573
0
        return 0;
4574
0
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4535
900
                                            int64_t tablet_id, const std::string& rowset_id) {
4536
        // Try to delete rowset data in background thread
4537
900
        int ret = worker_pool->submit_with_timeout(
4538
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4539
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4540
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4541
900
                        return;
4542
900
                    }
4543
900
                    std::vector<std::string> keys;
4544
900
                    {
4545
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4546
900
                        async_recycled_rowset_keys.push_back(std::move(key));
4547
900
                        if (async_recycled_rowset_keys.size() > 100) {
4548
900
                            keys.swap(async_recycled_rowset_keys);
4549
900
                        }
4550
900
                    }
4551
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4552
900
                    if (keys.empty()) return;
4553
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4554
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4555
900
                                     << instance_id_;
4556
900
                    } else {
4557
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4558
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4559
900
                                           num_recycled, start_time);
4560
900
                    }
4561
900
                },
4562
900
                0);
4563
900
        if (ret == 0) return 0;
4564
        // Submit task failed, delete rowset data in current thread
4565
90
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4566
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4567
0
            return -1;
4568
0
        }
4569
90
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4570
0
            return -1;
4571
0
        }
4572
90
        rowset_keys.push_back(std::move(key));
4573
90
        return 0;
4574
90
    };
4575
4576
22
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4577
4578
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4579
7.75k
        ++num_scanned;
4580
7.75k
        total_rowset_key_size += k.size();
4581
7.75k
        total_rowset_value_size += v.size();
4582
7.75k
        RecycleRowsetPB rowset;
4583
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4584
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4585
0
            return -1;
4586
0
        }
4587
4588
7.75k
        int64_t current_time = ::time(nullptr);
4589
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4590
4591
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4592
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4593
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4594
7.75k
        if (current_time < expiration) { // not expired
4595
0
            return 0;
4596
0
        }
4597
7.75k
        ++num_expired;
4598
7.75k
        expired_rowset_size += v.size();
4599
4600
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4601
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4602
                // in old version, keep this key-value pair and it needs to be checked manually
4603
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4604
0
                return -1;
4605
0
            }
4606
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4607
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4608
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4609
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4610
0
                rowset_keys.emplace_back(k);
4611
0
                return -1;
4612
0
            }
4613
            // decode rowset_id
4614
250
            auto k1 = k;
4615
250
            k1.remove_prefix(1);
4616
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4617
250
            decode_key(&k1, &out);
4618
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4619
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4620
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4621
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4622
250
                      << " task_type=" << metrics_context.operation_type;
4623
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4624
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4625
0
                return -1;
4626
0
            }
4627
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4628
250
            metrics_context.total_recycled_num++;
4629
250
            segment_metrics_context_.total_recycled_data_size +=
4630
250
                    rowset.rowset_meta().total_disk_size();
4631
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4632
250
            segment_metrics_context_.report();
4633
250
            metrics_context.report();
4634
250
            return 0;
4635
250
        }
4636
4637
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
4638
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
4639
7.50k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4640
7.50k
            if (mark_ret == -1) {
4641
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4642
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4643
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4644
0
                             << "]";
4645
0
                return -1;
4646
7.50k
            } else if (mark_ret == 1) {
4647
3.75k
                LOG(INFO)
4648
3.75k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4649
3.75k
                           "next turn, instance_id="
4650
3.75k
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4651
3.75k
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4652
3.75k
                return 0;
4653
3.75k
            }
4654
7.50k
        }
4655
4656
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4657
3.75k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4658
3.75k
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4659
3.75k
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4660
4661
3.75k
            if (rowset_meta->end_version() != 1) {
4662
3.75k
                int ret = abort_txn_or_job_for_recycle(rowset);
4663
4664
3.75k
                if (ret != 0) {
4665
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4666
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4667
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4668
0
                                 << rowset_meta->end_version() << "]";
4669
0
                    return ret;
4670
0
                }
4671
3.75k
            }
4672
3.75k
        }
4673
4674
        // TODO(plat1ko): check rowset not referenced
4675
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4676
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4677
0
                LOG_INFO("recycle rowset that has empty resource id");
4678
0
            } else {
4679
                // other situations, keep this key-value pair and it needs to be checked manually
4680
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4681
0
                return -1;
4682
0
            }
4683
0
        }
4684
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4685
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4686
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4687
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4688
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4689
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4690
3.75k
                  << " rowset_meta_size=" << v.size()
4691
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4692
3.75k
                  << " task_type=" << metrics_context.operation_type;
4693
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4694
            // unable to calculate file path, can only be deleted by rowset id prefix
4695
652
            num_prepare += 1;
4696
652
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4697
652
                                             rowset_meta->tablet_id(),
4698
652
                                             rowset_meta->rowset_id_v2()) != 0) {
4699
0
                return -1;
4700
0
            }
4701
3.10k
        } else {
4702
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4703
3.10k
            rowset_keys.emplace_back(k);
4704
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4705
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4706
3.10k
                ++num_empty_rowset;
4707
3.10k
            }
4708
3.10k
        }
4709
3.75k
        return 0;
4710
3.75k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4578
7
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4579
7
        ++num_scanned;
4580
7
        total_rowset_key_size += k.size();
4581
7
        total_rowset_value_size += v.size();
4582
7
        RecycleRowsetPB rowset;
4583
7
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4584
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4585
0
            return -1;
4586
0
        }
4587
4588
7
        int64_t current_time = ::time(nullptr);
4589
7
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4590
4591
7
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4592
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4593
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4594
7
        if (current_time < expiration) { // not expired
4595
0
            return 0;
4596
0
        }
4597
7
        ++num_expired;
4598
7
        expired_rowset_size += v.size();
4599
4600
7
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4601
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4602
                // in old version, keep this key-value pair and it needs to be checked manually
4603
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4604
0
                return -1;
4605
0
            }
4606
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4607
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4608
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4609
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4610
0
                rowset_keys.emplace_back(k);
4611
0
                return -1;
4612
0
            }
4613
            // decode rowset_id
4614
0
            auto k1 = k;
4615
0
            k1.remove_prefix(1);
4616
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4617
0
            decode_key(&k1, &out);
4618
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4619
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4620
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4621
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4622
0
                      << " task_type=" << metrics_context.operation_type;
4623
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4624
0
                                             rowset.tablet_id(), rowset_id) != 0) {
4625
0
                return -1;
4626
0
            }
4627
0
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4628
0
            metrics_context.total_recycled_num++;
4629
0
            segment_metrics_context_.total_recycled_data_size +=
4630
0
                    rowset.rowset_meta().total_disk_size();
4631
0
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4632
0
            segment_metrics_context_.report();
4633
0
            metrics_context.report();
4634
0
            return 0;
4635
0
        }
4636
4637
7
        auto* rowset_meta = rowset.mutable_rowset_meta();
4638
7
        if (config::enable_mark_delete_rowset_before_recycle) {
4639
7
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4640
7
            if (mark_ret == -1) {
4641
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4642
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4643
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4644
0
                             << "]";
4645
0
                return -1;
4646
7
            } else if (mark_ret == 1) {
4647
5
                LOG(INFO)
4648
5
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4649
5
                           "next turn, instance_id="
4650
5
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4651
5
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4652
5
                return 0;
4653
5
            }
4654
7
        }
4655
4656
2
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4657
2
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4658
2
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4659
2
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4660
4661
2
            if (rowset_meta->end_version() != 1) {
4662
2
                int ret = abort_txn_or_job_for_recycle(rowset);
4663
4664
2
                if (ret != 0) {
4665
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4666
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4667
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4668
0
                                 << rowset_meta->end_version() << "]";
4669
0
                    return ret;
4670
0
                }
4671
2
            }
4672
2
        }
4673
4674
        // TODO(plat1ko): check rowset not referenced
4675
2
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4676
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4677
0
                LOG_INFO("recycle rowset that has empty resource id");
4678
0
            } else {
4679
                // other situations, keep this key-value pair and it needs to be checked manually
4680
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4681
0
                return -1;
4682
0
            }
4683
0
        }
4684
2
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4685
2
                  << " tablet_id=" << rowset_meta->tablet_id()
4686
2
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4687
2
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4688
2
                  << "] txn_id=" << rowset_meta->txn_id()
4689
2
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4690
2
                  << " rowset_meta_size=" << v.size()
4691
2
                  << " creation_time=" << rowset_meta->creation_time()
4692
2
                  << " task_type=" << metrics_context.operation_type;
4693
2
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4694
            // unable to calculate file path, can only be deleted by rowset id prefix
4695
2
            num_prepare += 1;
4696
2
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4697
2
                                             rowset_meta->tablet_id(),
4698
2
                                             rowset_meta->rowset_id_v2()) != 0) {
4699
0
                return -1;
4700
0
            }
4701
2
        } else {
4702
0
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4703
0
            rowset_keys.emplace_back(k);
4704
0
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4705
0
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4706
0
                ++num_empty_rowset;
4707
0
            }
4708
0
        }
4709
2
        return 0;
4710
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4578
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4579
7.75k
        ++num_scanned;
4580
7.75k
        total_rowset_key_size += k.size();
4581
7.75k
        total_rowset_value_size += v.size();
4582
7.75k
        RecycleRowsetPB rowset;
4583
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4584
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4585
0
            return -1;
4586
0
        }
4587
4588
7.75k
        int64_t current_time = ::time(nullptr);
4589
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4590
4591
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4592
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4593
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4594
7.75k
        if (current_time < expiration) { // not expired
4595
0
            return 0;
4596
0
        }
4597
7.75k
        ++num_expired;
4598
7.75k
        expired_rowset_size += v.size();
4599
4600
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4601
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4602
                // in old version, keep this key-value pair and it needs to be checked manually
4603
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4604
0
                return -1;
4605
0
            }
4606
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4607
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4608
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4609
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4610
0
                rowset_keys.emplace_back(k);
4611
0
                return -1;
4612
0
            }
4613
            // decode rowset_id
4614
250
            auto k1 = k;
4615
250
            k1.remove_prefix(1);
4616
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4617
250
            decode_key(&k1, &out);
4618
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4619
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4620
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4621
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4622
250
                      << " task_type=" << metrics_context.operation_type;
4623
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4624
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4625
0
                return -1;
4626
0
            }
4627
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4628
250
            metrics_context.total_recycled_num++;
4629
250
            segment_metrics_context_.total_recycled_data_size +=
4630
250
                    rowset.rowset_meta().total_disk_size();
4631
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4632
250
            segment_metrics_context_.report();
4633
250
            metrics_context.report();
4634
250
            return 0;
4635
250
        }
4636
4637
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
4638
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
4639
7.50k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4640
7.50k
            if (mark_ret == -1) {
4641
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4642
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4643
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4644
0
                             << "]";
4645
0
                return -1;
4646
7.50k
            } else if (mark_ret == 1) {
4647
3.75k
                LOG(INFO)
4648
3.75k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4649
3.75k
                           "next turn, instance_id="
4650
3.75k
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4651
3.75k
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4652
3.75k
                return 0;
4653
3.75k
            }
4654
7.50k
        }
4655
4656
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4657
3.75k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4658
3.75k
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4659
3.75k
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4660
4661
3.75k
            if (rowset_meta->end_version() != 1) {
4662
3.75k
                int ret = abort_txn_or_job_for_recycle(rowset);
4663
4664
3.75k
                if (ret != 0) {
4665
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4666
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4667
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4668
0
                                 << rowset_meta->end_version() << "]";
4669
0
                    return ret;
4670
0
                }
4671
3.75k
            }
4672
3.75k
        }
4673
4674
        // TODO(plat1ko): check rowset not referenced
4675
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4676
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4677
0
                LOG_INFO("recycle rowset that has empty resource id");
4678
0
            } else {
4679
                // other situations, keep this key-value pair and it needs to be checked manually
4680
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4681
0
                return -1;
4682
0
            }
4683
0
        }
4684
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4685
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4686
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4687
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4688
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4689
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4690
3.75k
                  << " rowset_meta_size=" << v.size()
4691
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4692
3.75k
                  << " task_type=" << metrics_context.operation_type;
4693
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4694
            // unable to calculate file path, can only be deleted by rowset id prefix
4695
650
            num_prepare += 1;
4696
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4697
650
                                             rowset_meta->tablet_id(),
4698
650
                                             rowset_meta->rowset_id_v2()) != 0) {
4699
0
                return -1;
4700
0
            }
4701
3.10k
        } else {
4702
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4703
3.10k
            rowset_keys.emplace_back(k);
4704
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4705
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4706
3.10k
                ++num_empty_rowset;
4707
3.10k
            }
4708
3.10k
        }
4709
3.75k
        return 0;
4710
3.75k
    };
4711
4712
49
    auto loop_done = [&]() -> int {
4713
49
        std::vector<std::string> rowset_keys_to_delete;
4714
        // rowset_id -> rowset_meta
4715
        // store rowset id and meta for statistics rs size when delete
4716
49
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4717
49
        rowset_keys_to_delete.swap(rowset_keys);
4718
49
        rowsets_to_delete.swap(rowsets);
4719
49
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4720
49
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4721
49
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4722
49
                                   metrics_context) != 0) {
4723
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4724
0
                return;
4725
0
            }
4726
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4727
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4728
0
                    return;
4729
0
                }
4730
3.10k
            }
4731
49
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4732
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4733
0
                return;
4734
0
            }
4735
49
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4736
49
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4720
7
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4721
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4722
7
                                   metrics_context) != 0) {
4723
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4724
0
                return;
4725
0
            }
4726
7
            for (const auto& [_, rs] : rowsets_to_delete) {
4727
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4728
0
                    return;
4729
0
                }
4730
0
            }
4731
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4732
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4733
0
                return;
4734
0
            }
4735
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4736
7
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4720
42
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4721
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4722
42
                                   metrics_context) != 0) {
4723
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4724
0
                return;
4725
0
            }
4726
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4727
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4728
0
                    return;
4729
0
                }
4730
3.10k
            }
4731
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4732
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4733
0
                return;
4734
0
            }
4735
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4736
42
        });
4737
49
        return 0;
4738
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4712
7
    auto loop_done = [&]() -> int {
4713
7
        std::vector<std::string> rowset_keys_to_delete;
4714
        // rowset_id -> rowset_meta
4715
        // store rowset id and meta for statistics rs size when delete
4716
7
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4717
7
        rowset_keys_to_delete.swap(rowset_keys);
4718
7
        rowsets_to_delete.swap(rowsets);
4719
7
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4720
7
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4721
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4722
7
                                   metrics_context) != 0) {
4723
7
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4724
7
                return;
4725
7
            }
4726
7
            for (const auto& [_, rs] : rowsets_to_delete) {
4727
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4728
7
                    return;
4729
7
                }
4730
7
            }
4731
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4732
7
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4733
7
                return;
4734
7
            }
4735
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4736
7
        });
4737
7
        return 0;
4738
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4712
42
    auto loop_done = [&]() -> int {
4713
42
        std::vector<std::string> rowset_keys_to_delete;
4714
        // rowset_id -> rowset_meta
4715
        // store rowset id and meta for statistics rs size when delete
4716
42
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4717
42
        rowset_keys_to_delete.swap(rowset_keys);
4718
42
        rowsets_to_delete.swap(rowsets);
4719
42
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4720
42
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4721
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4722
42
                                   metrics_context) != 0) {
4723
42
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4724
42
                return;
4725
42
            }
4726
42
            for (const auto& [_, rs] : rowsets_to_delete) {
4727
42
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4728
42
                    return;
4729
42
                }
4730
42
            }
4731
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4732
42
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4733
42
                return;
4734
42
            }
4735
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4736
42
        });
4737
42
        return 0;
4738
42
    };
4739
4740
22
    if (config::enable_recycler_stats_metrics) {
4741
0
        scan_and_statistics_rowsets();
4742
0
    }
4743
    // recycle_func and loop_done for scan and recycle
4744
22
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4745
22
                               std::move(loop_done));
4746
4747
22
    worker_pool->stop();
4748
4749
22
    if (!async_recycled_rowset_keys.empty()) {
4750
5
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
4751
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4752
0
            return -1;
4753
5
        } else {
4754
5
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
4755
5
        }
4756
5
    }
4757
22
    return ret;
4758
22
}
4759
4760
13
int InstanceRecycler::recycle_restore_jobs() {
4761
13
    const std::string task_name = "recycle_restore_jobs";
4762
13
    int64_t num_scanned = 0;
4763
13
    int64_t num_expired = 0;
4764
13
    int64_t num_recycled = 0;
4765
13
    int64_t num_aborted = 0;
4766
4767
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4768
4769
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
4770
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
4771
13
    std::string restore_job_key0;
4772
13
    std::string restore_job_key1;
4773
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
4774
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
4775
4776
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
4777
4778
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4779
13
    register_recycle_task(task_name, start_time);
4780
4781
13
    DORIS_CLOUD_DEFER {
4782
13
        unregister_recycle_task(task_name);
4783
13
        int64_t cost =
4784
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4785
13
        metrics_context.finish_report();
4786
4787
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4788
13
                .tag("instance_id", instance_id_)
4789
13
                .tag("num_scanned", num_scanned)
4790
13
                .tag("num_expired", num_expired)
4791
13
                .tag("num_recycled", num_recycled)
4792
13
                .tag("num_aborted", num_aborted);
4793
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
4781
13
    DORIS_CLOUD_DEFER {
4782
13
        unregister_recycle_task(task_name);
4783
13
        int64_t cost =
4784
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4785
13
        metrics_context.finish_report();
4786
4787
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
4788
13
                .tag("instance_id", instance_id_)
4789
13
                .tag("num_scanned", num_scanned)
4790
13
                .tag("num_expired", num_expired)
4791
13
                .tag("num_recycled", num_recycled)
4792
13
                .tag("num_aborted", num_aborted);
4793
13
    };
4794
4795
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4796
4797
13
    std::vector<std::string_view> restore_job_keys;
4798
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4799
41
        ++num_scanned;
4800
41
        RestoreJobCloudPB restore_job_pb;
4801
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4802
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4803
0
            return -1;
4804
0
        }
4805
41
        int64_t expiration =
4806
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4807
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4808
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4809
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
4810
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
4811
0
                   << " state=" << restore_job_pb.state();
4812
41
        int64_t current_time = ::time(nullptr);
4813
41
        if (current_time < expiration) { // not expired
4814
0
            return 0;
4815
0
        }
4816
41
        ++num_expired;
4817
4818
41
        int64_t tablet_id = restore_job_pb.tablet_id();
4819
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
4820
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
4821
4822
41
        std::unique_ptr<Transaction> txn;
4823
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4824
41
        if (err != TxnErrorCode::TXN_OK) {
4825
0
            LOG_WARNING("failed to recycle restore job")
4826
0
                    .tag("err", err)
4827
0
                    .tag("tablet id", tablet_id)
4828
0
                    .tag("instance_id", instance_id_)
4829
0
                    .tag("reason", "failed to create txn");
4830
0
            return -1;
4831
0
        }
4832
4833
41
        std::string val;
4834
41
        err = txn->get(k, &val);
4835
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
4836
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
4837
0
            return 0;
4838
0
        }
4839
41
        if (err != TxnErrorCode::TXN_OK) {
4840
0
            LOG_WARNING("failed to get kv");
4841
0
            return -1;
4842
0
        }
4843
41
        restore_job_pb.Clear();
4844
41
        if (!restore_job_pb.ParseFromString(val)) {
4845
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
4846
0
            return -1;
4847
0
        }
4848
4849
        // PREPARED or COMMITTED, change state to DROPPED and return
4850
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
4851
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
4852
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
4853
0
            restore_job_pb.set_need_recycle_data(true);
4854
0
            txn->put(k, restore_job_pb.SerializeAsString());
4855
0
            err = txn->commit();
4856
0
            if (err != TxnErrorCode::TXN_OK) {
4857
0
                LOG_WARNING("failed to commit txn: {}", err);
4858
0
                return -1;
4859
0
            }
4860
0
            num_aborted++;
4861
0
            return 0;
4862
0
        }
4863
4864
        // Change state to RECYCLING
4865
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
4866
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
4867
21
            txn->put(k, restore_job_pb.SerializeAsString());
4868
21
            err = txn->commit();
4869
21
            if (err != TxnErrorCode::TXN_OK) {
4870
0
                LOG_WARNING("failed to commit txn: {}", err);
4871
0
                return -1;
4872
0
            }
4873
21
            return 0;
4874
21
        }
4875
4876
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4877
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4878
4879
        // Recycle all data associated with the restore job.
4880
        // This includes rowsets, segments, and related resources.
4881
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
4882
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
4883
0
            LOG_WARNING("failed to recycle tablet")
4884
0
                    .tag("tablet_id", tablet_id)
4885
0
                    .tag("instance_id", instance_id_);
4886
0
            return -1;
4887
0
        }
4888
4889
        // delete all restore job rowset kv
4890
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4891
4892
20
        err = txn->commit();
4893
20
        if (err != TxnErrorCode::TXN_OK) {
4894
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
4895
0
                    .tag("err", err)
4896
0
                    .tag("tablet id", tablet_id)
4897
0
                    .tag("instance_id", instance_id_)
4898
0
                    .tag("reason", "failed to commit txn");
4899
0
            return -1;
4900
0
        }
4901
4902
20
        metrics_context.total_recycled_num = ++num_recycled;
4903
20
        metrics_context.report();
4904
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4905
20
        restore_job_keys.push_back(k);
4906
4907
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
4908
20
                  << " tablet_id=" << tablet_id;
4909
20
        return 0;
4910
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4798
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
4799
41
        ++num_scanned;
4800
41
        RestoreJobCloudPB restore_job_pb;
4801
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
4802
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
4803
0
            return -1;
4804
0
        }
4805
41
        int64_t expiration =
4806
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
4807
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
4808
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
4809
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
4810
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
4811
0
                   << " state=" << restore_job_pb.state();
4812
41
        int64_t current_time = ::time(nullptr);
4813
41
        if (current_time < expiration) { // not expired
4814
0
            return 0;
4815
0
        }
4816
41
        ++num_expired;
4817
4818
41
        int64_t tablet_id = restore_job_pb.tablet_id();
4819
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
4820
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
4821
4822
41
        std::unique_ptr<Transaction> txn;
4823
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4824
41
        if (err != TxnErrorCode::TXN_OK) {
4825
0
            LOG_WARNING("failed to recycle restore job")
4826
0
                    .tag("err", err)
4827
0
                    .tag("tablet id", tablet_id)
4828
0
                    .tag("instance_id", instance_id_)
4829
0
                    .tag("reason", "failed to create txn");
4830
0
            return -1;
4831
0
        }
4832
4833
41
        std::string val;
4834
41
        err = txn->get(k, &val);
4835
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
4836
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
4837
0
            return 0;
4838
0
        }
4839
41
        if (err != TxnErrorCode::TXN_OK) {
4840
0
            LOG_WARNING("failed to get kv");
4841
0
            return -1;
4842
0
        }
4843
41
        restore_job_pb.Clear();
4844
41
        if (!restore_job_pb.ParseFromString(val)) {
4845
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
4846
0
            return -1;
4847
0
        }
4848
4849
        // PREPARED or COMMITTED, change state to DROPPED and return
4850
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
4851
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
4852
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
4853
0
            restore_job_pb.set_need_recycle_data(true);
4854
0
            txn->put(k, restore_job_pb.SerializeAsString());
4855
0
            err = txn->commit();
4856
0
            if (err != TxnErrorCode::TXN_OK) {
4857
0
                LOG_WARNING("failed to commit txn: {}", err);
4858
0
                return -1;
4859
0
            }
4860
0
            num_aborted++;
4861
0
            return 0;
4862
0
        }
4863
4864
        // Change state to RECYCLING
4865
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
4866
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
4867
21
            txn->put(k, restore_job_pb.SerializeAsString());
4868
21
            err = txn->commit();
4869
21
            if (err != TxnErrorCode::TXN_OK) {
4870
0
                LOG_WARNING("failed to commit txn: {}", err);
4871
0
                return -1;
4872
0
            }
4873
21
            return 0;
4874
21
        }
4875
4876
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4877
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4878
4879
        // Recycle all data associated with the restore job.
4880
        // This includes rowsets, segments, and related resources.
4881
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
4882
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
4883
0
            LOG_WARNING("failed to recycle tablet")
4884
0
                    .tag("tablet_id", tablet_id)
4885
0
                    .tag("instance_id", instance_id_);
4886
0
            return -1;
4887
0
        }
4888
4889
        // delete all restore job rowset kv
4890
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4891
4892
20
        err = txn->commit();
4893
20
        if (err != TxnErrorCode::TXN_OK) {
4894
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
4895
0
                    .tag("err", err)
4896
0
                    .tag("tablet id", tablet_id)
4897
0
                    .tag("instance_id", instance_id_)
4898
0
                    .tag("reason", "failed to commit txn");
4899
0
            return -1;
4900
0
        }
4901
4902
20
        metrics_context.total_recycled_num = ++num_recycled;
4903
20
        metrics_context.report();
4904
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
4905
20
        restore_job_keys.push_back(k);
4906
4907
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
4908
20
                  << " tablet_id=" << tablet_id;
4909
20
        return 0;
4910
20
    };
4911
4912
13
    auto loop_done = [&restore_job_keys, this]() -> int {
4913
3
        if (restore_job_keys.empty()) return 0;
4914
1
        DORIS_CLOUD_DEFER {
4915
1
            restore_job_keys.clear();
4916
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
4914
1
        DORIS_CLOUD_DEFER {
4915
1
            restore_job_keys.clear();
4916
1
        };
4917
4918
1
        std::unique_ptr<Transaction> txn;
4919
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4920
1
        if (err != TxnErrorCode::TXN_OK) {
4921
0
            LOG_WARNING("failed to recycle restore job")
4922
0
                    .tag("err", err)
4923
0
                    .tag("instance_id", instance_id_)
4924
0
                    .tag("reason", "failed to create txn");
4925
0
            return -1;
4926
0
        }
4927
20
        for (auto& k : restore_job_keys) {
4928
20
            txn->remove(k);
4929
20
        }
4930
1
        err = txn->commit();
4931
1
        if (err != TxnErrorCode::TXN_OK) {
4932
0
            LOG_WARNING("failed to recycle restore job")
4933
0
                    .tag("err", err)
4934
0
                    .tag("instance_id", instance_id_)
4935
0
                    .tag("reason", "failed to commit txn");
4936
0
            return -1;
4937
0
        }
4938
1
        return 0;
4939
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
4912
3
    auto loop_done = [&restore_job_keys, this]() -> int {
4913
3
        if (restore_job_keys.empty()) return 0;
4914
1
        DORIS_CLOUD_DEFER {
4915
1
            restore_job_keys.clear();
4916
1
        };
4917
4918
1
        std::unique_ptr<Transaction> txn;
4919
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
4920
1
        if (err != TxnErrorCode::TXN_OK) {
4921
0
            LOG_WARNING("failed to recycle restore job")
4922
0
                    .tag("err", err)
4923
0
                    .tag("instance_id", instance_id_)
4924
0
                    .tag("reason", "failed to create txn");
4925
0
            return -1;
4926
0
        }
4927
20
        for (auto& k : restore_job_keys) {
4928
20
            txn->remove(k);
4929
20
        }
4930
1
        err = txn->commit();
4931
1
        if (err != TxnErrorCode::TXN_OK) {
4932
0
            LOG_WARNING("failed to recycle restore job")
4933
0
                    .tag("err", err)
4934
0
                    .tag("instance_id", instance_id_)
4935
0
                    .tag("reason", "failed to commit txn");
4936
0
            return -1;
4937
0
        }
4938
1
        return 0;
4939
1
    };
4940
4941
13
    if (config::enable_recycler_stats_metrics) {
4942
0
        scan_and_statistics_restore_jobs();
4943
0
    }
4944
4945
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
4946
13
                            std::move(loop_done));
4947
13
}
4948
4949
8
int InstanceRecycler::recycle_versioned_rowsets() {
4950
8
    const std::string task_name = "recycle_rowsets";
4951
8
    int64_t num_scanned = 0;
4952
8
    int64_t num_expired = 0;
4953
8
    int64_t num_prepare = 0;
4954
8
    int64_t num_compacted = 0;
4955
8
    int64_t num_empty_rowset = 0;
4956
8
    size_t total_rowset_key_size = 0;
4957
8
    size_t total_rowset_value_size = 0;
4958
8
    size_t expired_rowset_size = 0;
4959
8
    std::atomic_long num_recycled = 0;
4960
8
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4961
4962
8
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4963
8
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4964
8
    std::string recyc_rs_key0;
4965
8
    std::string recyc_rs_key1;
4966
8
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4967
8
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4968
4969
8
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4970
4971
8
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4972
8
    register_recycle_task(task_name, start_time);
4973
4974
8
    DORIS_CLOUD_DEFER {
4975
8
        unregister_recycle_task(task_name);
4976
8
        int64_t cost =
4977
8
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4978
8
        metrics_context.finish_report();
4979
8
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4980
8
                .tag("instance_id", instance_id_)
4981
8
                .tag("num_scanned", num_scanned)
4982
8
                .tag("num_expired", num_expired)
4983
8
                .tag("num_recycled", num_recycled)
4984
8
                .tag("num_recycled.prepare", num_prepare)
4985
8
                .tag("num_recycled.compacted", num_compacted)
4986
8
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4987
8
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4988
8
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4989
8
                .tag("expired_rowset_meta_size", expired_rowset_size);
4990
8
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
4974
8
    DORIS_CLOUD_DEFER {
4975
8
        unregister_recycle_task(task_name);
4976
8
        int64_t cost =
4977
8
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4978
8
        metrics_context.finish_report();
4979
8
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4980
8
                .tag("instance_id", instance_id_)
4981
8
                .tag("num_scanned", num_scanned)
4982
8
                .tag("num_expired", num_expired)
4983
8
                .tag("num_recycled", num_recycled)
4984
8
                .tag("num_recycled.prepare", num_prepare)
4985
8
                .tag("num_recycled.compacted", num_compacted)
4986
8
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4987
8
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4988
8
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4989
8
                .tag("expired_rowset_meta_size", expired_rowset_size);
4990
8
    };
4991
4992
8
    std::vector<std::string> orphan_rowset_keys;
4993
4994
    // Store keys of rowset recycled by background workers
4995
8
    std::mutex async_recycled_rowset_keys_mutex;
4996
8
    std::vector<std::string> async_recycled_rowset_keys;
4997
8
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4998
8
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4999
8
    worker_pool->start();
5000
8
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
5001
200
                                            int64_t tablet_id, const std::string& rowset_id) {
5002
        // Try to delete rowset data in background thread
5003
200
        int ret = worker_pool->submit_with_timeout(
5004
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5005
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5006
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5007
200
                        return;
5008
200
                    }
5009
                    // The async recycled rowsets are staled format or has not been used,
5010
                    // so we don't need to check the rowset ref count key.
5011
0
                    std::vector<std::string> keys;
5012
0
                    {
5013
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5014
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5015
0
                        if (async_recycled_rowset_keys.size() > 100) {
5016
0
                            keys.swap(async_recycled_rowset_keys);
5017
0
                        }
5018
0
                    }
5019
0
                    if (keys.empty()) return;
5020
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5021
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5022
0
                                     << instance_id_;
5023
0
                    } else {
5024
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5025
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5026
0
                                           num_recycled, start_time);
5027
0
                    }
5028
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5004
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5005
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5006
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5007
200
                        return;
5008
200
                    }
5009
                    // The async recycled rowsets are staled format or has not been used,
5010
                    // so we don't need to check the rowset ref count key.
5011
0
                    std::vector<std::string> keys;
5012
0
                    {
5013
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5014
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5015
0
                        if (async_recycled_rowset_keys.size() > 100) {
5016
0
                            keys.swap(async_recycled_rowset_keys);
5017
0
                        }
5018
0
                    }
5019
0
                    if (keys.empty()) return;
5020
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5021
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5022
0
                                     << instance_id_;
5023
0
                    } else {
5024
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5025
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5026
0
                                           num_recycled, start_time);
5027
0
                    }
5028
0
                },
5029
200
                0);
5030
200
        if (ret == 0) return 0;
5031
        // Submit task failed, delete rowset data in current thread
5032
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5033
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5034
0
            return -1;
5035
0
        }
5036
0
        orphan_rowset_keys.push_back(std::move(key));
5037
0
        return 0;
5038
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5001
200
                                            int64_t tablet_id, const std::string& rowset_id) {
5002
        // Try to delete rowset data in background thread
5003
200
        int ret = worker_pool->submit_with_timeout(
5004
200
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5005
200
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5006
200
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5007
200
                        return;
5008
200
                    }
5009
                    // The async recycled rowsets are staled format or has not been used,
5010
                    // so we don't need to check the rowset ref count key.
5011
200
                    std::vector<std::string> keys;
5012
200
                    {
5013
200
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5014
200
                        async_recycled_rowset_keys.push_back(std::move(key));
5015
200
                        if (async_recycled_rowset_keys.size() > 100) {
5016
200
                            keys.swap(async_recycled_rowset_keys);
5017
200
                        }
5018
200
                    }
5019
200
                    if (keys.empty()) return;
5020
200
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5021
200
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5022
200
                                     << instance_id_;
5023
200
                    } else {
5024
200
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5025
200
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5026
200
                                           num_recycled, start_time);
5027
200
                    }
5028
200
                },
5029
200
                0);
5030
200
        if (ret == 0) return 0;
5031
        // Submit task failed, delete rowset data in current thread
5032
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5033
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5034
0
            return -1;
5035
0
        }
5036
0
        orphan_rowset_keys.push_back(std::move(key));
5037
0
        return 0;
5038
0
    };
5039
5040
8
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5041
5042
1.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5043
1.01k
        ++num_scanned;
5044
1.01k
        total_rowset_key_size += k.size();
5045
1.01k
        total_rowset_value_size += v.size();
5046
1.01k
        RecycleRowsetPB rowset;
5047
1.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5048
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5049
0
            return -1;
5050
0
        }
5051
5052
1.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5053
5054
1.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5055
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5056
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5057
1.01k
        int64_t current_time = ::time(nullptr);
5058
1.01k
        if (current_time < final_expiration) { // not expired
5059
0
            return 0;
5060
0
        }
5061
1.01k
        ++num_expired;
5062
1.01k
        expired_rowset_size += v.size();
5063
1.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5064
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5065
                // in old version, keep this key-value pair and it needs to be checked manually
5066
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5067
0
                return -1;
5068
0
            }
5069
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5070
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5071
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5072
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5073
0
                orphan_rowset_keys.emplace_back(k);
5074
0
                return -1;
5075
0
            }
5076
            // decode rowset_id
5077
0
            auto k1 = k;
5078
0
            k1.remove_prefix(1);
5079
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5080
0
            decode_key(&k1, &out);
5081
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5082
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5083
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5084
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5085
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5086
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5087
0
                return -1;
5088
0
            }
5089
0
            return 0;
5090
0
        }
5091
        // TODO(plat1ko): check rowset not referenced
5092
1.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5093
1.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5094
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5095
0
                LOG_INFO("recycle rowset that has empty resource id");
5096
0
            } else {
5097
                // other situations, keep this key-value pair and it needs to be checked manually
5098
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5099
0
                return -1;
5100
0
            }
5101
0
        }
5102
1.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5103
1.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5104
1.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5105
1.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5106
1.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5107
1.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5108
1.01k
                  << " rowset_meta_size=" << v.size()
5109
1.01k
                  << " creation_time=" << rowset_meta->creation_time();
5110
1.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5111
            // unable to calculate file path, can only be deleted by rowset id prefix
5112
200
            num_prepare += 1;
5113
200
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5114
200
                                             rowset_meta->tablet_id(),
5115
200
                                             rowset_meta->rowset_id_v2()) != 0) {
5116
0
                return -1;
5117
0
            }
5118
813
        } else {
5119
813
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5120
813
            worker_pool->submit(
5121
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5122
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
5123
800
                            return;
5124
800
                        }
5125
13
                        num_compacted += is_compacted;
5126
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5127
13
                        if (rowset_meta.num_segments() == 0) {
5128
0
                            ++num_empty_rowset;
5129
0
                        }
5130
13
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
5121
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5122
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
5123
800
                            return;
5124
800
                        }
5125
13
                        num_compacted += is_compacted;
5126
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5127
13
                        if (rowset_meta.num_segments() == 0) {
5128
0
                            ++num_empty_rowset;
5129
0
                        }
5130
13
                    });
5131
813
        }
5132
1.01k
        return 0;
5133
1.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5042
1.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5043
1.01k
        ++num_scanned;
5044
1.01k
        total_rowset_key_size += k.size();
5045
1.01k
        total_rowset_value_size += v.size();
5046
1.01k
        RecycleRowsetPB rowset;
5047
1.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5048
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5049
0
            return -1;
5050
0
        }
5051
5052
1.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5053
5054
1.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5055
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5056
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5057
1.01k
        int64_t current_time = ::time(nullptr);
5058
1.01k
        if (current_time < final_expiration) { // not expired
5059
0
            return 0;
5060
0
        }
5061
1.01k
        ++num_expired;
5062
1.01k
        expired_rowset_size += v.size();
5063
1.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5064
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5065
                // in old version, keep this key-value pair and it needs to be checked manually
5066
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5067
0
                return -1;
5068
0
            }
5069
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5070
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5071
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5072
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5073
0
                orphan_rowset_keys.emplace_back(k);
5074
0
                return -1;
5075
0
            }
5076
            // decode rowset_id
5077
0
            auto k1 = k;
5078
0
            k1.remove_prefix(1);
5079
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5080
0
            decode_key(&k1, &out);
5081
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5082
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5083
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5084
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5085
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5086
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5087
0
                return -1;
5088
0
            }
5089
0
            return 0;
5090
0
        }
5091
        // TODO(plat1ko): check rowset not referenced
5092
1.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5093
1.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5094
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5095
0
                LOG_INFO("recycle rowset that has empty resource id");
5096
0
            } else {
5097
                // other situations, keep this key-value pair and it needs to be checked manually
5098
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5099
0
                return -1;
5100
0
            }
5101
0
        }
5102
1.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5103
1.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5104
1.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5105
1.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5106
1.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5107
1.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5108
1.01k
                  << " rowset_meta_size=" << v.size()
5109
1.01k
                  << " creation_time=" << rowset_meta->creation_time();
5110
1.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5111
            // unable to calculate file path, can only be deleted by rowset id prefix
5112
200
            num_prepare += 1;
5113
200
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5114
200
                                             rowset_meta->tablet_id(),
5115
200
                                             rowset_meta->rowset_id_v2()) != 0) {
5116
0
                return -1;
5117
0
            }
5118
813
        } else {
5119
813
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5120
813
            worker_pool->submit(
5121
813
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5122
813
                        if (recycle_rowset_meta_and_data(k, rowset_meta) != 0) {
5123
813
                            return;
5124
813
                        }
5125
813
                        num_compacted += is_compacted;
5126
813
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5127
813
                        if (rowset_meta.num_segments() == 0) {
5128
813
                            ++num_empty_rowset;
5129
813
                        }
5130
813
                    });
5131
813
        }
5132
1.01k
        return 0;
5133
1.01k
    };
5134
5135
8
    if (config::enable_recycler_stats_metrics) {
5136
0
        scan_and_statistics_rowsets();
5137
0
    }
5138
5139
8
    auto loop_done = [&]() -> int {
5140
5
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5141
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5142
0
        }
5143
5
        orphan_rowset_keys.clear();
5144
5
        return 0;
5145
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
5139
5
    auto loop_done = [&]() -> int {
5140
5
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5141
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5142
0
        }
5143
5
        orphan_rowset_keys.clear();
5144
5
        return 0;
5145
5
    };
5146
5147
    // recycle_func and loop_done for scan and recycle
5148
8
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5149
8
                               std::move(loop_done));
5150
5151
8
    worker_pool->stop();
5152
5153
8
    if (!async_recycled_rowset_keys.empty()) {
5154
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5155
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5156
0
            return -1;
5157
0
        } else {
5158
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5159
0
        }
5160
0
    }
5161
8
    return ret;
5162
8
}
5163
5164
int InstanceRecycler::recycle_rowset_meta_and_data(std::string_view recycle_rowset_key,
5165
                                                   const RowsetMetaCloudPB& rowset_meta,
5166
813
                                                   std::string_view non_versioned_rowset_key) {
5167
813
    constexpr int MAX_RETRY = 10;
5168
813
    int64_t tablet_id = rowset_meta.tablet_id();
5169
813
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
5170
813
    std::string_view reference_instance_id = instance_id_;
5171
813
    if (rowset_meta.has_reference_instance_id()) {
5172
8
        reference_instance_id = rowset_meta.reference_instance_id();
5173
8
    }
5174
5175
813
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
5176
813
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
5177
813
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(recycle_rowset_key));
5178
813
    AnnotateTag instance_id_tag("instance_id", instance_id_);
5179
813
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
5180
813
    for (int i = 0; i < MAX_RETRY; ++i) {
5181
813
        std::unique_ptr<Transaction> txn;
5182
813
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5183
813
        if (err != TxnErrorCode::TXN_OK) {
5184
0
            LOG_WARNING("failed to create txn").tag("err", err);
5185
0
            return -1;
5186
0
        }
5187
5188
813
        std::string rowset_ref_count_key =
5189
813
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
5190
813
        int64_t ref_count = 0;
5191
813
        {
5192
813
            std::string value;
5193
813
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
5194
813
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5195
                // This is the old version rowset, we could recycle it directly.
5196
802
                ref_count = 1;
5197
802
            } else if (err != TxnErrorCode::TXN_OK) {
5198
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
5199
0
                return -1;
5200
11
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
5201
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
5202
0
                return -1;
5203
0
            }
5204
813
        }
5205
5206
813
        if (ref_count == 1) {
5207
            // It would not be added since it is recycling.
5208
810
            if (delete_rowset_data(rowset_meta) != 0) {
5209
800
                LOG_WARNING("failed to delete rowset data");
5210
800
                return -1;
5211
800
            }
5212
5213
            // Reset the transaction to avoid timeout.
5214
10
            err = txn_kv_->create_txn(&txn);
5215
10
            if (err != TxnErrorCode::TXN_OK) {
5216
0
                LOG_WARNING("failed to create txn").tag("err", err);
5217
0
                return -1;
5218
0
            }
5219
10
            txn->remove(rowset_ref_count_key);
5220
10
            LOG_INFO("delete rowset data ref count key")
5221
10
                    .tag("txn_id", rowset_meta.txn_id())
5222
10
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5223
5224
10
            std::string dbm_start_key =
5225
10
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
5226
10
            std::string dbm_end_key = meta_delete_bitmap_key(
5227
10
                    {reference_instance_id, tablet_id, rowset_id,
5228
10
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
5229
10
            txn->remove(dbm_start_key, dbm_end_key);
5230
10
            LOG_INFO("remove delete bitmap kv")
5231
10
                    .tag("begin", hex(dbm_start_key))
5232
10
                    .tag("end", hex(dbm_end_key));
5233
5234
10
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
5235
10
                    {reference_instance_id, tablet_id, rowset_id});
5236
10
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
5237
10
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
5238
10
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5239
10
            LOG_INFO("remove versioned delete bitmap kv")
5240
10
                    .tag("begin", hex(versioned_dbm_start_key))
5241
10
                    .tag("end", hex(versioned_dbm_end_key));
5242
5243
10
            std::string meta_rowset_key_begin =
5244
10
                    versioned::meta_rowset_key({reference_instance_id, tablet_id, rowset_id});
5245
10
            std::string meta_rowset_key_end = meta_rowset_key_begin;
5246
10
            encode_int64(INT64_MAX, &meta_rowset_key_end);
5247
10
            txn->remove(meta_rowset_key_begin, meta_rowset_key_end);
5248
10
            LOG_INFO("remove meta rowset key").tag("key", hex(meta_rowset_key_begin));
5249
10
        } else {
5250
            // Decrease the rowset ref count.
5251
            //
5252
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
5253
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
5254
3
            txn->atomic_add(rowset_ref_count_key, -1);
5255
3
            LOG_INFO("decrease rowset data ref count")
5256
3
                    .tag("txn_id", rowset_meta.txn_id())
5257
3
                    .tag("ref_count", ref_count - 1)
5258
3
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5259
3
        }
5260
5261
13
        if (!recycle_rowset_key.empty()) { // empty when recycle ref rowsets for deleted instance
5262
13
            txn->remove(recycle_rowset_key);
5263
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(recycle_rowset_key));
5264
13
        }
5265
13
        if (!non_versioned_rowset_key.empty()) {
5266
0
            txn->remove(non_versioned_rowset_key);
5267
0
            LOG_INFO("remove non versioned rowset key").tag("key", hex(non_versioned_rowset_key));
5268
0
        }
5269
5270
13
        err = txn->commit();
5271
13
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
5272
            // The rowset ref count key has been changed, we need to retry.
5273
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
5274
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5275
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
5276
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5277
0
            continue;
5278
13
        } else if (err != TxnErrorCode::TXN_OK) {
5279
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
5280
0
            return -1;
5281
0
        }
5282
13
        LOG_INFO("recycle rowset meta and data success");
5283
13
        return 0;
5284
13
    }
5285
0
    LOG_WARNING("failed to recycle rowset meta and data after retry")
5286
0
            .tag("tablet_id", tablet_id)
5287
0
            .tag("rowset_id", rowset_id)
5288
0
            .tag("retry", MAX_RETRY);
5289
0
    return -1;
5290
813
}
5291
5292
29
int InstanceRecycler::recycle_tmp_rowsets() {
5293
29
    const std::string task_name = "recycle_tmp_rowsets";
5294
29
    int64_t num_scanned = 0;
5295
29
    int64_t num_expired = 0;
5296
29
    std::atomic_long num_recycled = 0;
5297
29
    size_t expired_rowset_size = 0;
5298
29
    size_t total_rowset_key_size = 0;
5299
29
    size_t total_rowset_value_size = 0;
5300
29
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5301
5302
29
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5303
29
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5304
29
    std::string tmp_rs_key0;
5305
29
    std::string tmp_rs_key1;
5306
29
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5307
29
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5308
5309
29
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
5310
5311
29
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5312
29
    register_recycle_task(task_name, start_time);
5313
5314
29
    DORIS_CLOUD_DEFER {
5315
29
        unregister_recycle_task(task_name);
5316
29
        int64_t cost =
5317
29
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5318
29
        metrics_context.finish_report();
5319
29
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5320
29
                .tag("instance_id", instance_id_)
5321
29
                .tag("num_scanned", num_scanned)
5322
29
                .tag("num_expired", num_expired)
5323
29
                .tag("num_recycled", num_recycled)
5324
29
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5325
29
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5326
29
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5327
29
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5314
12
    DORIS_CLOUD_DEFER {
5315
12
        unregister_recycle_task(task_name);
5316
12
        int64_t cost =
5317
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5318
12
        metrics_context.finish_report();
5319
12
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5320
12
                .tag("instance_id", instance_id_)
5321
12
                .tag("num_scanned", num_scanned)
5322
12
                .tag("num_expired", num_expired)
5323
12
                .tag("num_recycled", num_recycled)
5324
12
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5325
12
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5326
12
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5327
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5314
17
    DORIS_CLOUD_DEFER {
5315
17
        unregister_recycle_task(task_name);
5316
17
        int64_t cost =
5317
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5318
17
        metrics_context.finish_report();
5319
17
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5320
17
                .tag("instance_id", instance_id_)
5321
17
                .tag("num_scanned", num_scanned)
5322
17
                .tag("num_expired", num_expired)
5323
17
                .tag("num_recycled", num_recycled)
5324
17
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5325
17
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5326
17
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5327
17
    };
5328
5329
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
5330
5331
29
    std::vector<std::string> tmp_rowset_keys;
5332
29
    std::vector<std::string> tmp_rowset_ref_count_keys;
5333
5334
    // rowset_id -> rowset_meta
5335
    // store tmp_rowset id and meta for statistics rs size when delete
5336
29
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
5337
29
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5338
29
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
5339
29
    worker_pool->start();
5340
5341
29
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5342
5343
29
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
5344
29
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
5345
29
                             &earlest_ts, &tmp_rowset_ref_count_keys, this,
5346
102k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5347
102k
        ++num_scanned;
5348
102k
        total_rowset_key_size += k.size();
5349
102k
        total_rowset_value_size += v.size();
5350
102k
        doris::RowsetMetaCloudPB rowset;
5351
102k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5352
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5353
0
            return -1;
5354
0
        }
5355
102k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5356
102k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5357
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5358
0
                   << " txn_expiration=" << rowset.txn_expiration()
5359
0
                   << " rowset_creation_time=" << rowset.creation_time();
5360
102k
        int64_t current_time = ::time(nullptr);
5361
102k
        if (current_time < expiration) { // not expired
5362
0
            return 0;
5363
0
        }
5364
5365
102k
        if (config::enable_mark_delete_rowset_before_recycle) {
5366
102k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5367
102k
            if (mark_ret == -1) {
5368
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5369
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5370
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5371
0
                return -1;
5372
102k
            } else if (mark_ret == 1) {
5373
51.0k
                LOG(INFO)
5374
51.0k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5375
51.0k
                           "next turn, instance_id="
5376
51.0k
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5377
51.0k
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5378
51.0k
                return 0;
5379
51.0k
            }
5380
102k
        }
5381
5382
51.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5383
51.0k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5384
51.0k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5385
51.0k
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5386
5387
51.0k
            int ret = abort_txn_or_job_for_recycle(rowset);
5388
51.0k
            if (ret != 0) {
5389
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5390
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5391
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5392
0
                return ret;
5393
0
            }
5394
51.0k
        }
5395
5396
51.0k
        ++num_expired;
5397
51.0k
        expired_rowset_size += v.size();
5398
51.0k
        if (!rowset.has_resource_id()) {
5399
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5400
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5401
0
                return -1;
5402
0
            }
5403
            // might be a delete pred rowset
5404
0
            tmp_rowset_keys.emplace_back(k);
5405
0
            return 0;
5406
0
        }
5407
        // TODO(plat1ko): check rowset not referenced
5408
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5409
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5410
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5411
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5412
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5413
51.0k
                  << " num_expired=" << num_expired
5414
51.0k
                  << " task_type=" << metrics_context.operation_type;
5415
5416
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5417
        // Remove the rowset ref count key directly since it has not been used.
5418
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5419
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5420
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5421
51.0k
                  << "key=" << hex(rowset_ref_count_key);
5422
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5423
5424
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5425
51.0k
        return 0;
5426
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5346
16
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5347
16
        ++num_scanned;
5348
16
        total_rowset_key_size += k.size();
5349
16
        total_rowset_value_size += v.size();
5350
16
        doris::RowsetMetaCloudPB rowset;
5351
16
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5352
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5353
0
            return -1;
5354
0
        }
5355
16
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5356
16
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5357
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5358
0
                   << " txn_expiration=" << rowset.txn_expiration()
5359
0
                   << " rowset_creation_time=" << rowset.creation_time();
5360
16
        int64_t current_time = ::time(nullptr);
5361
16
        if (current_time < expiration) { // not expired
5362
0
            return 0;
5363
0
        }
5364
5365
16
        if (config::enable_mark_delete_rowset_before_recycle) {
5366
16
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5367
16
            if (mark_ret == -1) {
5368
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5369
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5370
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5371
0
                return -1;
5372
16
            } else if (mark_ret == 1) {
5373
9
                LOG(INFO)
5374
9
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5375
9
                           "next turn, instance_id="
5376
9
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5377
9
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5378
9
                return 0;
5379
9
            }
5380
16
        }
5381
5382
7
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5383
7
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5384
7
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5385
7
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5386
5387
7
            int ret = abort_txn_or_job_for_recycle(rowset);
5388
7
            if (ret != 0) {
5389
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5390
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5391
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5392
0
                return ret;
5393
0
            }
5394
7
        }
5395
5396
7
        ++num_expired;
5397
7
        expired_rowset_size += v.size();
5398
7
        if (!rowset.has_resource_id()) {
5399
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5400
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5401
0
                return -1;
5402
0
            }
5403
            // might be a delete pred rowset
5404
0
            tmp_rowset_keys.emplace_back(k);
5405
0
            return 0;
5406
0
        }
5407
        // TODO(plat1ko): check rowset not referenced
5408
7
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5409
7
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5410
7
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5411
7
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5412
7
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5413
7
                  << " num_expired=" << num_expired
5414
7
                  << " task_type=" << metrics_context.operation_type;
5415
5416
7
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5417
        // Remove the rowset ref count key directly since it has not been used.
5418
7
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5419
7
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5420
7
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5421
7
                  << "key=" << hex(rowset_ref_count_key);
5422
7
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5423
5424
7
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5425
7
        return 0;
5426
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5346
102k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5347
102k
        ++num_scanned;
5348
102k
        total_rowset_key_size += k.size();
5349
102k
        total_rowset_value_size += v.size();
5350
102k
        doris::RowsetMetaCloudPB rowset;
5351
102k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5352
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5353
0
            return -1;
5354
0
        }
5355
102k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5356
102k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5357
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5358
0
                   << " txn_expiration=" << rowset.txn_expiration()
5359
0
                   << " rowset_creation_time=" << rowset.creation_time();
5360
102k
        int64_t current_time = ::time(nullptr);
5361
102k
        if (current_time < expiration) { // not expired
5362
0
            return 0;
5363
0
        }
5364
5365
102k
        if (config::enable_mark_delete_rowset_before_recycle) {
5366
102k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5367
102k
            if (mark_ret == -1) {
5368
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5369
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5370
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5371
0
                return -1;
5372
102k
            } else if (mark_ret == 1) {
5373
51.0k
                LOG(INFO)
5374
51.0k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5375
51.0k
                           "next turn, instance_id="
5376
51.0k
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5377
51.0k
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5378
51.0k
                return 0;
5379
51.0k
            }
5380
102k
        }
5381
5382
51.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5383
51.0k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5384
51.0k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5385
51.0k
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5386
5387
51.0k
            int ret = abort_txn_or_job_for_recycle(rowset);
5388
51.0k
            if (ret != 0) {
5389
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5390
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5391
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5392
0
                return ret;
5393
0
            }
5394
51.0k
        }
5395
5396
51.0k
        ++num_expired;
5397
51.0k
        expired_rowset_size += v.size();
5398
51.0k
        if (!rowset.has_resource_id()) {
5399
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5400
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5401
0
                return -1;
5402
0
            }
5403
            // might be a delete pred rowset
5404
0
            tmp_rowset_keys.emplace_back(k);
5405
0
            return 0;
5406
0
        }
5407
        // TODO(plat1ko): check rowset not referenced
5408
51.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5409
51.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5410
51.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5411
51.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5412
51.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5413
51.0k
                  << " num_expired=" << num_expired
5414
51.0k
                  << " task_type=" << metrics_context.operation_type;
5415
5416
51.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5417
        // Remove the rowset ref count key directly since it has not been used.
5418
51.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5419
51.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5420
51.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5421
51.0k
                  << "key=" << hex(rowset_ref_count_key);
5422
51.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5423
5424
51.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5425
51.0k
        return 0;
5426
51.0k
    };
5427
5428
    // TODO bacth delete
5429
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5430
51.0k
        std::string dbm_start_key =
5431
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5432
51.0k
        std::string dbm_end_key = dbm_start_key;
5433
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5434
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5435
51.0k
        if (ret != 0) {
5436
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5437
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5438
0
                         << ", rowset_id=" << rowset_id;
5439
0
        }
5440
51.0k
        return ret;
5441
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5429
7
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5430
7
        std::string dbm_start_key =
5431
7
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5432
7
        std::string dbm_end_key = dbm_start_key;
5433
7
        encode_int64(INT64_MAX, &dbm_end_key);
5434
7
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5435
7
        if (ret != 0) {
5436
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5437
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5438
0
                         << ", rowset_id=" << rowset_id;
5439
0
        }
5440
7
        return ret;
5441
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5429
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5430
51.0k
        std::string dbm_start_key =
5431
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5432
51.0k
        std::string dbm_end_key = dbm_start_key;
5433
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5434
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5435
51.0k
        if (ret != 0) {
5436
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5437
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5438
0
                         << ", rowset_id=" << rowset_id;
5439
0
        }
5440
51.0k
        return ret;
5441
51.0k
    };
5442
5443
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5444
51.0k
        auto delete_bitmap_start =
5445
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5446
51.0k
        auto delete_bitmap_end =
5447
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5448
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5449
51.0k
        if (ret != 0) {
5450
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5451
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5452
0
        }
5453
51.0k
        return ret;
5454
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5443
7
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5444
7
        auto delete_bitmap_start =
5445
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5446
7
        auto delete_bitmap_end =
5447
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5448
7
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5449
7
        if (ret != 0) {
5450
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5451
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5452
0
        }
5453
7
        return ret;
5454
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5443
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5444
51.0k
        auto delete_bitmap_start =
5445
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5446
51.0k
        auto delete_bitmap_end =
5447
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5448
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5449
51.0k
        if (ret != 0) {
5450
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5451
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5452
0
        }
5453
51.0k
        return ret;
5454
51.0k
    };
5455
5456
29
    auto loop_done = [&]() -> int {
5457
26
        DORIS_CLOUD_DEFER {
5458
26
            tmp_rowset_keys.clear();
5459
26
            tmp_rowsets.clear();
5460
26
            tmp_rowset_ref_count_keys.clear();
5461
26
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5457
12
        DORIS_CLOUD_DEFER {
5458
12
            tmp_rowset_keys.clear();
5459
12
            tmp_rowsets.clear();
5460
12
            tmp_rowset_ref_count_keys.clear();
5461
12
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5457
14
        DORIS_CLOUD_DEFER {
5458
14
            tmp_rowset_keys.clear();
5459
14
            tmp_rowsets.clear();
5460
14
            tmp_rowset_ref_count_keys.clear();
5461
14
        };
5462
26
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5463
26
                             tmp_rowsets_to_delete = tmp_rowsets,
5464
26
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5465
26
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5466
26
                                   metrics_context) != 0) {
5467
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5468
0
                return;
5469
0
            }
5470
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5471
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5472
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5473
0
                                 << rs.ShortDebugString();
5474
0
                    return;
5475
0
                }
5476
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5477
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5478
0
                                 << rs.ShortDebugString();
5479
0
                    return;
5480
0
                }
5481
51.0k
            }
5482
26
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5483
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5484
0
                return;
5485
0
            }
5486
26
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5487
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5488
0
                return;
5489
0
            }
5490
26
            num_recycled += tmp_rowset_keys.size();
5491
26
            return;
5492
26
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5464
12
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5465
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5466
12
                                   metrics_context) != 0) {
5467
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5468
0
                return;
5469
0
            }
5470
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5471
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5472
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5473
0
                                 << rs.ShortDebugString();
5474
0
                    return;
5475
0
                }
5476
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5477
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5478
0
                                 << rs.ShortDebugString();
5479
0
                    return;
5480
0
                }
5481
7
            }
5482
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5483
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5484
0
                return;
5485
0
            }
5486
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5487
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5488
0
                return;
5489
0
            }
5490
12
            num_recycled += tmp_rowset_keys.size();
5491
12
            return;
5492
12
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5464
14
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5465
14
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5466
14
                                   metrics_context) != 0) {
5467
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5468
0
                return;
5469
0
            }
5470
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5471
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5472
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5473
0
                                 << rs.ShortDebugString();
5474
0
                    return;
5475
0
                }
5476
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5477
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5478
0
                                 << rs.ShortDebugString();
5479
0
                    return;
5480
0
                }
5481
51.0k
            }
5482
14
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5483
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5484
0
                return;
5485
0
            }
5486
14
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5487
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5488
0
                return;
5489
0
            }
5490
14
            num_recycled += tmp_rowset_keys.size();
5491
14
            return;
5492
14
        });
5493
26
        return 0;
5494
26
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5456
12
    auto loop_done = [&]() -> int {
5457
12
        DORIS_CLOUD_DEFER {
5458
12
            tmp_rowset_keys.clear();
5459
12
            tmp_rowsets.clear();
5460
12
            tmp_rowset_ref_count_keys.clear();
5461
12
        };
5462
12
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5463
12
                             tmp_rowsets_to_delete = tmp_rowsets,
5464
12
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5465
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5466
12
                                   metrics_context) != 0) {
5467
12
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5468
12
                return;
5469
12
            }
5470
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5471
12
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5472
12
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5473
12
                                 << rs.ShortDebugString();
5474
12
                    return;
5475
12
                }
5476
12
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5477
12
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5478
12
                                 << rs.ShortDebugString();
5479
12
                    return;
5480
12
                }
5481
12
            }
5482
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5483
12
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5484
12
                return;
5485
12
            }
5486
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5487
12
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5488
12
                return;
5489
12
            }
5490
12
            num_recycled += tmp_rowset_keys.size();
5491
12
            return;
5492
12
        });
5493
12
        return 0;
5494
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5456
14
    auto loop_done = [&]() -> int {
5457
14
        DORIS_CLOUD_DEFER {
5458
14
            tmp_rowset_keys.clear();
5459
14
            tmp_rowsets.clear();
5460
14
            tmp_rowset_ref_count_keys.clear();
5461
14
        };
5462
14
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5463
14
                             tmp_rowsets_to_delete = tmp_rowsets,
5464
14
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5465
14
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5466
14
                                   metrics_context) != 0) {
5467
14
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5468
14
                return;
5469
14
            }
5470
14
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5471
14
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5472
14
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5473
14
                                 << rs.ShortDebugString();
5474
14
                    return;
5475
14
                }
5476
14
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5477
14
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5478
14
                                 << rs.ShortDebugString();
5479
14
                    return;
5480
14
                }
5481
14
            }
5482
14
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5483
14
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5484
14
                return;
5485
14
            }
5486
14
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5487
14
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5488
14
                return;
5489
14
            }
5490
14
            num_recycled += tmp_rowset_keys.size();
5491
14
            return;
5492
14
        });
5493
14
        return 0;
5494
14
    };
5495
5496
29
    if (config::enable_recycler_stats_metrics) {
5497
0
        scan_and_statistics_tmp_rowsets();
5498
0
    }
5499
    // recycle_func and loop_done for scan and recycle
5500
29
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
5501
29
                               std::move(loop_done));
5502
5503
29
    worker_pool->stop();
5504
29
    return ret;
5505
29
}
5506
5507
int InstanceRecycler::scan_and_recycle(
5508
        std::string begin, std::string_view end,
5509
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
5510
255
        std::function<int()> loop_done) {
5511
255
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
5512
255
    int ret = 0;
5513
255
    int64_t cnt = 0;
5514
255
    int get_range_retried = 0;
5515
255
    std::string err;
5516
255
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5517
255
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5518
255
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5519
255
                  << " ret=" << ret << " err=" << err;
5520
255
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5516
31
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5517
31
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5518
31
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5519
31
                  << " ret=" << ret << " err=" << err;
5520
31
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5516
224
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5517
224
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5518
224
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5519
224
                  << " ret=" << ret << " err=" << err;
5520
224
    };
5521
5522
255
    std::unique_ptr<RangeGetIterator> it;
5523
308
    do {
5524
308
        if (get_range_retried > 1000) {
5525
0
            err = "txn_get exceeds max retry, may not scan all keys";
5526
0
            ret = -1;
5527
0
            return -1;
5528
0
        }
5529
308
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
5530
308
        if (get_ret != 0) { // txn kv may complain "Request for future version"
5531
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
5532
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
5533
0
                         << " get_range_retried=" << get_range_retried;
5534
0
            ++get_range_retried;
5535
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5536
0
            continue; // try again
5537
0
        }
5538
308
        if (!it->has_next()) {
5539
134
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
5540
134
            break; // scan finished
5541
134
        }
5542
149k
        while (it->has_next()) {
5543
149k
            ++cnt;
5544
            // recycle corresponding resources
5545
149k
            auto [k, v] = it->next();
5546
149k
            if (!it->has_next()) {
5547
174
                begin = k;
5548
174
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
5549
174
            }
5550
            // if we want to continue scanning, the recycle_func should not return non-zero
5551
149k
            if (recycle_func(k, v) != 0) {
5552
4.00k
                err = "recycle_func error";
5553
4.00k
                ret = -1;
5554
4.00k
            }
5555
149k
        }
5556
174
        begin.push_back('\x00'); // Update to next smallest key for iteration
5557
        // if we want to continue scanning, the recycle_func should not return non-zero
5558
174
        if (loop_done && loop_done() != 0) {
5559
4
            err = "loop_done error";
5560
4
            ret = -1;
5561
4
        }
5562
174
    } while (it->more() && !stopped());
5563
255
    return ret;
5564
255
}
5565
5566
19
int InstanceRecycler::abort_timeout_txn() {
5567
19
    const std::string task_name = "abort_timeout_txn";
5568
19
    int64_t num_scanned = 0;
5569
19
    int64_t num_timeout = 0;
5570
19
    int64_t num_abort = 0;
5571
19
    int64_t num_advance = 0;
5572
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5573
5574
19
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
5575
19
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5576
19
    std::string begin_txn_running_key;
5577
19
    std::string end_txn_running_key;
5578
19
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
5579
19
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
5580
5581
19
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
5582
5583
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5584
19
    register_recycle_task(task_name, start_time);
5585
5586
19
    DORIS_CLOUD_DEFER {
5587
19
        unregister_recycle_task(task_name);
5588
19
        int64_t cost =
5589
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5590
19
        metrics_context.finish_report();
5591
19
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5592
19
                .tag("instance_id", instance_id_)
5593
19
                .tag("num_scanned", num_scanned)
5594
19
                .tag("num_timeout", num_timeout)
5595
19
                .tag("num_abort", num_abort)
5596
19
                .tag("num_advance", num_advance);
5597
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5586
3
    DORIS_CLOUD_DEFER {
5587
3
        unregister_recycle_task(task_name);
5588
3
        int64_t cost =
5589
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5590
3
        metrics_context.finish_report();
5591
3
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5592
3
                .tag("instance_id", instance_id_)
5593
3
                .tag("num_scanned", num_scanned)
5594
3
                .tag("num_timeout", num_timeout)
5595
3
                .tag("num_abort", num_abort)
5596
3
                .tag("num_advance", num_advance);
5597
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5586
16
    DORIS_CLOUD_DEFER {
5587
16
        unregister_recycle_task(task_name);
5588
16
        int64_t cost =
5589
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5590
16
        metrics_context.finish_report();
5591
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5592
16
                .tag("instance_id", instance_id_)
5593
16
                .tag("num_scanned", num_scanned)
5594
16
                .tag("num_timeout", num_timeout)
5595
16
                .tag("num_abort", num_abort)
5596
16
                .tag("num_advance", num_advance);
5597
16
    };
5598
5599
19
    int64_t current_time =
5600
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5601
5602
19
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
5603
19
                                  &current_time, &metrics_context,
5604
19
                                  this](std::string_view k, std::string_view v) -> int {
5605
9
        ++num_scanned;
5606
5607
9
        std::unique_ptr<Transaction> txn;
5608
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5609
9
        if (err != TxnErrorCode::TXN_OK) {
5610
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5611
0
            return -1;
5612
0
        }
5613
9
        std::string_view k1 = k;
5614
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5615
9
        k1.remove_prefix(1); // Remove key space
5616
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5617
9
        if (decode_key(&k1, &out) != 0) {
5618
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5619
0
            return -1;
5620
0
        }
5621
9
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5622
9
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5623
9
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5624
        // Update txn_info
5625
9
        std::string txn_inf_key, txn_inf_val;
5626
9
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5627
9
        err = txn->get(txn_inf_key, &txn_inf_val);
5628
9
        if (err != TxnErrorCode::TXN_OK) {
5629
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5630
0
            return -1;
5631
0
        }
5632
9
        TxnInfoPB txn_info;
5633
9
        if (!txn_info.ParseFromString(txn_inf_val)) {
5634
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5635
0
            return -1;
5636
0
        }
5637
5638
9
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5639
3
            txn.reset();
5640
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5641
3
            std::shared_ptr<TxnLazyCommitTask> task =
5642
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5643
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5644
3
            if (ret.first != MetaServiceCode::OK) {
5645
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5646
0
                             << "msg=" << ret.second;
5647
0
                return -1;
5648
0
            }
5649
3
            ++num_advance;
5650
3
            return 0;
5651
6
        } else {
5652
6
            TxnRunningPB txn_running_pb;
5653
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5654
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5655
0
                return -1;
5656
0
            }
5657
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5658
4
                return 0;
5659
4
            }
5660
2
            ++num_timeout;
5661
5662
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5663
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5664
2
            txn_info.set_finish_time(current_time);
5665
2
            txn_info.set_reason("timeout");
5666
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5667
2
            txn_inf_val.clear();
5668
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5669
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5670
0
                return -1;
5671
0
            }
5672
2
            txn->put(txn_inf_key, txn_inf_val);
5673
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5674
            // Put recycle txn key
5675
2
            std::string recyc_txn_key, recyc_txn_val;
5676
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5677
2
            RecycleTxnPB recycle_txn_pb;
5678
2
            recycle_txn_pb.set_creation_time(current_time);
5679
2
            recycle_txn_pb.set_label(txn_info.label());
5680
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5681
0
                LOG_WARNING("failed to serialize txn recycle info")
5682
0
                        .tag("key", hex(k))
5683
0
                        .tag("db_id", db_id)
5684
0
                        .tag("txn_id", txn_id);
5685
0
                return -1;
5686
0
            }
5687
2
            txn->put(recyc_txn_key, recyc_txn_val);
5688
            // Remove txn running key
5689
2
            txn->remove(k);
5690
2
            err = txn->commit();
5691
2
            if (err != TxnErrorCode::TXN_OK) {
5692
0
                LOG_WARNING("failed to commit txn err={}", err)
5693
0
                        .tag("key", hex(k))
5694
0
                        .tag("db_id", db_id)
5695
0
                        .tag("txn_id", txn_id);
5696
0
                return -1;
5697
0
            }
5698
2
            metrics_context.total_recycled_num = ++num_abort;
5699
2
            metrics_context.report();
5700
2
        }
5701
5702
2
        return 0;
5703
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5604
3
                                  this](std::string_view k, std::string_view v) -> int {
5605
3
        ++num_scanned;
5606
5607
3
        std::unique_ptr<Transaction> txn;
5608
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5609
3
        if (err != TxnErrorCode::TXN_OK) {
5610
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5611
0
            return -1;
5612
0
        }
5613
3
        std::string_view k1 = k;
5614
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5615
3
        k1.remove_prefix(1); // Remove key space
5616
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5617
3
        if (decode_key(&k1, &out) != 0) {
5618
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5619
0
            return -1;
5620
0
        }
5621
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5622
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5623
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5624
        // Update txn_info
5625
3
        std::string txn_inf_key, txn_inf_val;
5626
3
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5627
3
        err = txn->get(txn_inf_key, &txn_inf_val);
5628
3
        if (err != TxnErrorCode::TXN_OK) {
5629
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5630
0
            return -1;
5631
0
        }
5632
3
        TxnInfoPB txn_info;
5633
3
        if (!txn_info.ParseFromString(txn_inf_val)) {
5634
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5635
0
            return -1;
5636
0
        }
5637
5638
3
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5639
3
            txn.reset();
5640
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5641
3
            std::shared_ptr<TxnLazyCommitTask> task =
5642
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5643
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5644
3
            if (ret.first != MetaServiceCode::OK) {
5645
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5646
0
                             << "msg=" << ret.second;
5647
0
                return -1;
5648
0
            }
5649
3
            ++num_advance;
5650
3
            return 0;
5651
3
        } else {
5652
0
            TxnRunningPB txn_running_pb;
5653
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5654
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5655
0
                return -1;
5656
0
            }
5657
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5658
0
                return 0;
5659
0
            }
5660
0
            ++num_timeout;
5661
5662
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5663
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5664
0
            txn_info.set_finish_time(current_time);
5665
0
            txn_info.set_reason("timeout");
5666
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5667
0
            txn_inf_val.clear();
5668
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5669
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5670
0
                return -1;
5671
0
            }
5672
0
            txn->put(txn_inf_key, txn_inf_val);
5673
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5674
            // Put recycle txn key
5675
0
            std::string recyc_txn_key, recyc_txn_val;
5676
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5677
0
            RecycleTxnPB recycle_txn_pb;
5678
0
            recycle_txn_pb.set_creation_time(current_time);
5679
0
            recycle_txn_pb.set_label(txn_info.label());
5680
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5681
0
                LOG_WARNING("failed to serialize txn recycle info")
5682
0
                        .tag("key", hex(k))
5683
0
                        .tag("db_id", db_id)
5684
0
                        .tag("txn_id", txn_id);
5685
0
                return -1;
5686
0
            }
5687
0
            txn->put(recyc_txn_key, recyc_txn_val);
5688
            // Remove txn running key
5689
0
            txn->remove(k);
5690
0
            err = txn->commit();
5691
0
            if (err != TxnErrorCode::TXN_OK) {
5692
0
                LOG_WARNING("failed to commit txn err={}", err)
5693
0
                        .tag("key", hex(k))
5694
0
                        .tag("db_id", db_id)
5695
0
                        .tag("txn_id", txn_id);
5696
0
                return -1;
5697
0
            }
5698
0
            metrics_context.total_recycled_num = ++num_abort;
5699
0
            metrics_context.report();
5700
0
        }
5701
5702
0
        return 0;
5703
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5604
6
                                  this](std::string_view k, std::string_view v) -> int {
5605
6
        ++num_scanned;
5606
5607
6
        std::unique_ptr<Transaction> txn;
5608
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5609
6
        if (err != TxnErrorCode::TXN_OK) {
5610
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5611
0
            return -1;
5612
0
        }
5613
6
        std::string_view k1 = k;
5614
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5615
6
        k1.remove_prefix(1); // Remove key space
5616
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5617
6
        if (decode_key(&k1, &out) != 0) {
5618
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5619
0
            return -1;
5620
0
        }
5621
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5622
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5623
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5624
        // Update txn_info
5625
6
        std::string txn_inf_key, txn_inf_val;
5626
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5627
6
        err = txn->get(txn_inf_key, &txn_inf_val);
5628
6
        if (err != TxnErrorCode::TXN_OK) {
5629
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5630
0
            return -1;
5631
0
        }
5632
6
        TxnInfoPB txn_info;
5633
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
5634
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5635
0
            return -1;
5636
0
        }
5637
5638
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5639
0
            txn.reset();
5640
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5641
0
            std::shared_ptr<TxnLazyCommitTask> task =
5642
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5643
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5644
0
            if (ret.first != MetaServiceCode::OK) {
5645
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5646
0
                             << "msg=" << ret.second;
5647
0
                return -1;
5648
0
            }
5649
0
            ++num_advance;
5650
0
            return 0;
5651
6
        } else {
5652
6
            TxnRunningPB txn_running_pb;
5653
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5654
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5655
0
                return -1;
5656
0
            }
5657
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5658
4
                return 0;
5659
4
            }
5660
2
            ++num_timeout;
5661
5662
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5663
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5664
2
            txn_info.set_finish_time(current_time);
5665
2
            txn_info.set_reason("timeout");
5666
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5667
2
            txn_inf_val.clear();
5668
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5669
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5670
0
                return -1;
5671
0
            }
5672
2
            txn->put(txn_inf_key, txn_inf_val);
5673
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5674
            // Put recycle txn key
5675
2
            std::string recyc_txn_key, recyc_txn_val;
5676
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5677
2
            RecycleTxnPB recycle_txn_pb;
5678
2
            recycle_txn_pb.set_creation_time(current_time);
5679
2
            recycle_txn_pb.set_label(txn_info.label());
5680
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5681
0
                LOG_WARNING("failed to serialize txn recycle info")
5682
0
                        .tag("key", hex(k))
5683
0
                        .tag("db_id", db_id)
5684
0
                        .tag("txn_id", txn_id);
5685
0
                return -1;
5686
0
            }
5687
2
            txn->put(recyc_txn_key, recyc_txn_val);
5688
            // Remove txn running key
5689
2
            txn->remove(k);
5690
2
            err = txn->commit();
5691
2
            if (err != TxnErrorCode::TXN_OK) {
5692
0
                LOG_WARNING("failed to commit txn err={}", err)
5693
0
                        .tag("key", hex(k))
5694
0
                        .tag("db_id", db_id)
5695
0
                        .tag("txn_id", txn_id);
5696
0
                return -1;
5697
0
            }
5698
2
            metrics_context.total_recycled_num = ++num_abort;
5699
2
            metrics_context.report();
5700
2
        }
5701
5702
2
        return 0;
5703
6
    };
5704
5705
19
    if (config::enable_recycler_stats_metrics) {
5706
0
        scan_and_statistics_abort_timeout_txn();
5707
0
    }
5708
    // recycle_func and loop_done for scan and recycle
5709
19
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
5710
19
                            std::move(handle_txn_running_kv));
5711
19
}
5712
5713
19
int InstanceRecycler::recycle_expired_txn_label() {
5714
19
    const std::string task_name = "recycle_expired_txn_label";
5715
19
    int64_t num_scanned = 0;
5716
19
    int64_t num_expired = 0;
5717
19
    int64_t num_recycled = 0;
5718
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5719
19
    int ret = 0;
5720
5721
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
5722
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5723
19
    std::string begin_recycle_txn_key;
5724
19
    std::string end_recycle_txn_key;
5725
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
5726
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
5727
19
    std::vector<std::string> recycle_txn_info_keys;
5728
5729
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
5730
5731
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5732
19
    register_recycle_task(task_name, start_time);
5733
19
    DORIS_CLOUD_DEFER {
5734
19
        unregister_recycle_task(task_name);
5735
19
        int64_t cost =
5736
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5737
19
        metrics_context.finish_report();
5738
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5739
19
                .tag("instance_id", instance_id_)
5740
19
                .tag("num_scanned", num_scanned)
5741
19
                .tag("num_expired", num_expired)
5742
19
                .tag("num_recycled", num_recycled);
5743
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5733
1
    DORIS_CLOUD_DEFER {
5734
1
        unregister_recycle_task(task_name);
5735
1
        int64_t cost =
5736
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5737
1
        metrics_context.finish_report();
5738
1
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5739
1
                .tag("instance_id", instance_id_)
5740
1
                .tag("num_scanned", num_scanned)
5741
1
                .tag("num_expired", num_expired)
5742
1
                .tag("num_recycled", num_recycled);
5743
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5733
18
    DORIS_CLOUD_DEFER {
5734
18
        unregister_recycle_task(task_name);
5735
18
        int64_t cost =
5736
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5737
18
        metrics_context.finish_report();
5738
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5739
18
                .tag("instance_id", instance_id_)
5740
18
                .tag("num_scanned", num_scanned)
5741
18
                .tag("num_expired", num_expired)
5742
18
                .tag("num_recycled", num_recycled);
5743
18
    };
5744
5745
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5746
5747
19
    SyncExecutor<int> concurrent_delete_executor(
5748
19
            _thread_pool_group.s3_producer_pool,
5749
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
5750
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5750
1
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5750
23.0k
            [](const int& ret) { return ret != 0; });
5751
5752
19
    int64_t current_time_ms =
5753
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5754
5755
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5756
30.0k
        ++num_scanned;
5757
30.0k
        RecycleTxnPB recycle_txn_pb;
5758
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5759
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5760
0
            return -1;
5761
0
        }
5762
30.0k
        if ((config::force_immediate_recycle) ||
5763
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5764
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5765
30.0k
             current_time_ms)) {
5766
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5767
23.0k
            num_expired++;
5768
23.0k
            recycle_txn_info_keys.emplace_back(k);
5769
23.0k
        }
5770
30.0k
        return 0;
5771
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5755
1
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5756
1
        ++num_scanned;
5757
1
        RecycleTxnPB recycle_txn_pb;
5758
1
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5759
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5760
0
            return -1;
5761
0
        }
5762
1
        if ((config::force_immediate_recycle) ||
5763
1
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5764
1
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5765
1
             current_time_ms)) {
5766
1
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5767
1
            num_expired++;
5768
1
            recycle_txn_info_keys.emplace_back(k);
5769
1
        }
5770
1
        return 0;
5771
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5755
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
5756
30.0k
        ++num_scanned;
5757
30.0k
        RecycleTxnPB recycle_txn_pb;
5758
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
5759
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5760
0
            return -1;
5761
0
        }
5762
30.0k
        if ((config::force_immediate_recycle) ||
5763
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
5764
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
5765
30.0k
             current_time_ms)) {
5766
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
5767
23.0k
            num_expired++;
5768
23.0k
            recycle_txn_info_keys.emplace_back(k);
5769
23.0k
        }
5770
30.0k
        return 0;
5771
30.0k
    };
5772
5773
    // int 0 for success, 1 for conflict, -1 for error
5774
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5775
23.0k
        std::string_view k1 = k;
5776
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5777
23.0k
        k1.remove_prefix(1); // Remove key space
5778
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5779
23.0k
        int ret = decode_key(&k1, &out);
5780
23.0k
        if (ret != 0) {
5781
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5782
0
            return -1;
5783
0
        }
5784
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5785
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5786
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5787
23.0k
        std::unique_ptr<Transaction> txn;
5788
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5789
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5790
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5791
0
            return -1;
5792
0
        }
5793
        // Remove txn index kv
5794
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
5795
23.0k
        txn->remove(index_key);
5796
        // Remove txn info kv
5797
23.0k
        std::string info_key, info_val;
5798
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5799
23.0k
        err = txn->get(info_key, &info_val);
5800
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5801
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5802
0
            return -1;
5803
0
        }
5804
23.0k
        TxnInfoPB txn_info;
5805
23.0k
        if (!txn_info.ParseFromString(info_val)) {
5806
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5807
0
            return -1;
5808
0
        }
5809
23.0k
        txn->remove(info_key);
5810
        // Remove sub txn index kvs
5811
23.0k
        std::vector<std::string> sub_txn_index_keys;
5812
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5813
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5814
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
5815
22.9k
        }
5816
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5817
22.9k
            txn->remove(sub_txn_index_key);
5818
22.9k
        }
5819
        // Update txn label
5820
23.0k
        std::string label_key, label_val;
5821
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5822
23.0k
        err = txn->get(label_key, &label_val);
5823
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5824
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5825
0
                         << " err=" << err;
5826
0
            return -1;
5827
0
        }
5828
23.0k
        TxnLabelPB txn_label;
5829
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5830
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5831
0
            return -1;
5832
0
        }
5833
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5834
23.0k
        if (it != txn_label.txn_ids().end()) {
5835
23.0k
            txn_label.mutable_txn_ids()->erase(it);
5836
23.0k
        }
5837
23.0k
        if (txn_label.txn_ids().empty()) {
5838
23.0k
            txn->remove(label_key);
5839
23.0k
            TEST_SYNC_POINT_CALLBACK(
5840
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5841
23.0k
        } else {
5842
73
            if (!txn_label.SerializeToString(&label_val)) {
5843
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5844
0
                return -1;
5845
0
            }
5846
73
            TEST_SYNC_POINT_CALLBACK(
5847
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5848
73
            txn->atomic_set_ver_value(label_key, label_val);
5849
73
            TEST_SYNC_POINT_CALLBACK(
5850
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5851
73
        }
5852
        // Remove recycle txn kv
5853
23.0k
        txn->remove(k);
5854
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5855
23.0k
        err = txn->commit();
5856
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5857
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
5858
62
                TEST_SYNC_POINT_CALLBACK(
5859
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5860
                // log the txn_id and label
5861
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5862
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5863
62
                             << " txn_label=" << txn_info.label();
5864
62
                return 1;
5865
62
            }
5866
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5867
0
            return -1;
5868
62
        }
5869
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
5870
23.0k
        metrics_context.report();
5871
5872
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5873
23.0k
        return 0;
5874
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5774
1
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5775
1
        std::string_view k1 = k;
5776
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5777
1
        k1.remove_prefix(1); // Remove key space
5778
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5779
1
        int ret = decode_key(&k1, &out);
5780
1
        if (ret != 0) {
5781
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5782
0
            return -1;
5783
0
        }
5784
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5785
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5786
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5787
1
        std::unique_ptr<Transaction> txn;
5788
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5789
1
        if (err != TxnErrorCode::TXN_OK) {
5790
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5791
0
            return -1;
5792
0
        }
5793
        // Remove txn index kv
5794
1
        auto index_key = txn_index_key({instance_id_, txn_id});
5795
1
        txn->remove(index_key);
5796
        // Remove txn info kv
5797
1
        std::string info_key, info_val;
5798
1
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5799
1
        err = txn->get(info_key, &info_val);
5800
1
        if (err != TxnErrorCode::TXN_OK) {
5801
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5802
0
            return -1;
5803
0
        }
5804
1
        TxnInfoPB txn_info;
5805
1
        if (!txn_info.ParseFromString(info_val)) {
5806
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5807
0
            return -1;
5808
0
        }
5809
1
        txn->remove(info_key);
5810
        // Remove sub txn index kvs
5811
1
        std::vector<std::string> sub_txn_index_keys;
5812
1
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5813
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5814
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
5815
0
        }
5816
1
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5817
0
            txn->remove(sub_txn_index_key);
5818
0
        }
5819
        // Update txn label
5820
1
        std::string label_key, label_val;
5821
1
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5822
1
        err = txn->get(label_key, &label_val);
5823
1
        if (err != TxnErrorCode::TXN_OK) {
5824
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5825
0
                         << " err=" << err;
5826
0
            return -1;
5827
0
        }
5828
1
        TxnLabelPB txn_label;
5829
1
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5830
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5831
0
            return -1;
5832
0
        }
5833
1
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5834
1
        if (it != txn_label.txn_ids().end()) {
5835
1
            txn_label.mutable_txn_ids()->erase(it);
5836
1
        }
5837
1
        if (txn_label.txn_ids().empty()) {
5838
1
            txn->remove(label_key);
5839
1
            TEST_SYNC_POINT_CALLBACK(
5840
1
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5841
1
        } else {
5842
0
            if (!txn_label.SerializeToString(&label_val)) {
5843
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5844
0
                return -1;
5845
0
            }
5846
0
            TEST_SYNC_POINT_CALLBACK(
5847
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5848
0
            txn->atomic_set_ver_value(label_key, label_val);
5849
0
            TEST_SYNC_POINT_CALLBACK(
5850
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5851
0
        }
5852
        // Remove recycle txn kv
5853
1
        txn->remove(k);
5854
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5855
1
        err = txn->commit();
5856
1
        if (err != TxnErrorCode::TXN_OK) {
5857
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
5858
0
                TEST_SYNC_POINT_CALLBACK(
5859
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5860
                // log the txn_id and label
5861
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5862
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5863
0
                             << " txn_label=" << txn_info.label();
5864
0
                return 1;
5865
0
            }
5866
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5867
0
            return -1;
5868
0
        }
5869
1
        metrics_context.total_recycled_num = ++num_recycled;
5870
1
        metrics_context.report();
5871
5872
1
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5873
1
        return 0;
5874
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5774
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
5775
23.0k
        std::string_view k1 = k;
5776
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
5777
23.0k
        k1.remove_prefix(1); // Remove key space
5778
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5779
23.0k
        int ret = decode_key(&k1, &out);
5780
23.0k
        if (ret != 0) {
5781
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
5782
0
            return -1;
5783
0
        }
5784
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5785
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5786
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5787
23.0k
        std::unique_ptr<Transaction> txn;
5788
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5789
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5790
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5791
0
            return -1;
5792
0
        }
5793
        // Remove txn index kv
5794
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
5795
23.0k
        txn->remove(index_key);
5796
        // Remove txn info kv
5797
23.0k
        std::string info_key, info_val;
5798
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
5799
23.0k
        err = txn->get(info_key, &info_val);
5800
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5801
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
5802
0
            return -1;
5803
0
        }
5804
23.0k
        TxnInfoPB txn_info;
5805
23.0k
        if (!txn_info.ParseFromString(info_val)) {
5806
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
5807
0
            return -1;
5808
0
        }
5809
23.0k
        txn->remove(info_key);
5810
        // Remove sub txn index kvs
5811
23.0k
        std::vector<std::string> sub_txn_index_keys;
5812
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
5813
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
5814
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
5815
22.9k
        }
5816
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
5817
22.9k
            txn->remove(sub_txn_index_key);
5818
22.9k
        }
5819
        // Update txn label
5820
23.0k
        std::string label_key, label_val;
5821
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
5822
23.0k
        err = txn->get(label_key, &label_val);
5823
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5824
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
5825
0
                         << " err=" << err;
5826
0
            return -1;
5827
0
        }
5828
23.0k
        TxnLabelPB txn_label;
5829
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
5830
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
5831
0
            return -1;
5832
0
        }
5833
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
5834
23.0k
        if (it != txn_label.txn_ids().end()) {
5835
23.0k
            txn_label.mutable_txn_ids()->erase(it);
5836
23.0k
        }
5837
23.0k
        if (txn_label.txn_ids().empty()) {
5838
23.0k
            txn->remove(label_key);
5839
23.0k
            TEST_SYNC_POINT_CALLBACK(
5840
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
5841
23.0k
        } else {
5842
73
            if (!txn_label.SerializeToString(&label_val)) {
5843
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
5844
0
                return -1;
5845
0
            }
5846
73
            TEST_SYNC_POINT_CALLBACK(
5847
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
5848
73
            txn->atomic_set_ver_value(label_key, label_val);
5849
73
            TEST_SYNC_POINT_CALLBACK(
5850
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
5851
73
        }
5852
        // Remove recycle txn kv
5853
23.0k
        txn->remove(k);
5854
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
5855
23.0k
        err = txn->commit();
5856
23.0k
        if (err != TxnErrorCode::TXN_OK) {
5857
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
5858
62
                TEST_SYNC_POINT_CALLBACK(
5859
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
5860
                // log the txn_id and label
5861
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
5862
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
5863
62
                             << " txn_label=" << txn_info.label();
5864
62
                return 1;
5865
62
            }
5866
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
5867
0
            return -1;
5868
62
        }
5869
23.0k
        metrics_context.total_recycled_num = ++num_recycled;
5870
23.0k
        metrics_context.report();
5871
5872
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
5873
23.0k
        return 0;
5874
23.0k
    };
5875
5876
19
    auto loop_done = [&]() -> int {
5877
10
        DORIS_CLOUD_DEFER {
5878
10
            recycle_txn_info_keys.clear();
5879
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5877
1
        DORIS_CLOUD_DEFER {
5878
1
            recycle_txn_info_keys.clear();
5879
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5877
9
        DORIS_CLOUD_DEFER {
5878
9
            recycle_txn_info_keys.clear();
5879
9
        };
5880
10
        TEST_SYNC_POINT_CALLBACK(
5881
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5882
10
                &recycle_txn_info_keys);
5883
23.0k
        for (const auto& k : recycle_txn_info_keys) {
5884
23.0k
            concurrent_delete_executor.add([&]() {
5885
23.0k
                int ret = delete_recycle_txn_kv(k);
5886
23.0k
                if (ret == 1) {
5887
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5888
54
                    for (int i = 1; i <= max_retry; ++i) {
5889
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5890
54
                        ret = delete_recycle_txn_kv(k);
5891
                        // clang-format off
5892
54
                        TEST_SYNC_POINT_CALLBACK(
5893
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5894
                        // clang-format off
5895
54
                        if (ret != 1) {
5896
18
                            break;
5897
18
                        }
5898
                        // random sleep 0-100 ms to retry
5899
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5900
36
                    }
5901
18
                }
5902
23.0k
                if (ret != 0) {
5903
9
                    LOG_WARNING("failed to delete recycle txn kv")
5904
9
                            .tag("instance id", instance_id_)
5905
9
                            .tag("key", hex(k));
5906
9
                    return -1;
5907
9
                }
5908
23.0k
                return 0;
5909
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5884
1
            concurrent_delete_executor.add([&]() {
5885
1
                int ret = delete_recycle_txn_kv(k);
5886
1
                if (ret == 1) {
5887
0
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5888
0
                    for (int i = 1; i <= max_retry; ++i) {
5889
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5890
0
                        ret = delete_recycle_txn_kv(k);
5891
                        // clang-format off
5892
0
                        TEST_SYNC_POINT_CALLBACK(
5893
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5894
                        // clang-format off
5895
0
                        if (ret != 1) {
5896
0
                            break;
5897
0
                        }
5898
                        // random sleep 0-100 ms to retry
5899
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5900
0
                    }
5901
0
                }
5902
1
                if (ret != 0) {
5903
0
                    LOG_WARNING("failed to delete recycle txn kv")
5904
0
                            .tag("instance id", instance_id_)
5905
0
                            .tag("key", hex(k));
5906
0
                    return -1;
5907
0
                }
5908
1
                return 0;
5909
1
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5884
23.0k
            concurrent_delete_executor.add([&]() {
5885
23.0k
                int ret = delete_recycle_txn_kv(k);
5886
23.0k
                if (ret == 1) {
5887
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5888
54
                    for (int i = 1; i <= max_retry; ++i) {
5889
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5890
54
                        ret = delete_recycle_txn_kv(k);
5891
                        // clang-format off
5892
54
                        TEST_SYNC_POINT_CALLBACK(
5893
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5894
                        // clang-format off
5895
54
                        if (ret != 1) {
5896
18
                            break;
5897
18
                        }
5898
                        // random sleep 0-100 ms to retry
5899
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5900
36
                    }
5901
18
                }
5902
23.0k
                if (ret != 0) {
5903
9
                    LOG_WARNING("failed to delete recycle txn kv")
5904
9
                            .tag("instance id", instance_id_)
5905
9
                            .tag("key", hex(k));
5906
9
                    return -1;
5907
9
                }
5908
23.0k
                return 0;
5909
23.0k
            });
5910
23.0k
        }
5911
10
        bool finished = true;
5912
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5913
23.0k
        for (int r : rets) {
5914
23.0k
            if (r != 0) {
5915
9
                ret = -1;
5916
9
            }
5917
23.0k
        }
5918
5919
10
        ret = finished ? ret : -1;
5920
5921
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5922
5923
10
        if (ret != 0) {
5924
3
            LOG_WARNING("recycle txn kv ret!=0")
5925
3
                    .tag("finished", finished)
5926
3
                    .tag("ret", ret)
5927
3
                    .tag("instance_id", instance_id_);
5928
3
            return ret;
5929
3
        }
5930
7
        return ret;
5931
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
5876
1
    auto loop_done = [&]() -> int {
5877
1
        DORIS_CLOUD_DEFER {
5878
1
            recycle_txn_info_keys.clear();
5879
1
        };
5880
1
        TEST_SYNC_POINT_CALLBACK(
5881
1
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5882
1
                &recycle_txn_info_keys);
5883
1
        for (const auto& k : recycle_txn_info_keys) {
5884
1
            concurrent_delete_executor.add([&]() {
5885
1
                int ret = delete_recycle_txn_kv(k);
5886
1
                if (ret == 1) {
5887
1
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5888
1
                    for (int i = 1; i <= max_retry; ++i) {
5889
1
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5890
1
                        ret = delete_recycle_txn_kv(k);
5891
                        // clang-format off
5892
1
                        TEST_SYNC_POINT_CALLBACK(
5893
1
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5894
                        // clang-format off
5895
1
                        if (ret != 1) {
5896
1
                            break;
5897
1
                        }
5898
                        // random sleep 0-100 ms to retry
5899
1
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5900
1
                    }
5901
1
                }
5902
1
                if (ret != 0) {
5903
1
                    LOG_WARNING("failed to delete recycle txn kv")
5904
1
                            .tag("instance id", instance_id_)
5905
1
                            .tag("key", hex(k));
5906
1
                    return -1;
5907
1
                }
5908
1
                return 0;
5909
1
            });
5910
1
        }
5911
1
        bool finished = true;
5912
1
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5913
1
        for (int r : rets) {
5914
1
            if (r != 0) {
5915
0
                ret = -1;
5916
0
            }
5917
1
        }
5918
5919
1
        ret = finished ? ret : -1;
5920
5921
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5922
5923
1
        if (ret != 0) {
5924
0
            LOG_WARNING("recycle txn kv ret!=0")
5925
0
                    .tag("finished", finished)
5926
0
                    .tag("ret", ret)
5927
0
                    .tag("instance_id", instance_id_);
5928
0
            return ret;
5929
0
        }
5930
1
        return ret;
5931
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
5876
9
    auto loop_done = [&]() -> int {
5877
9
        DORIS_CLOUD_DEFER {
5878
9
            recycle_txn_info_keys.clear();
5879
9
        };
5880
9
        TEST_SYNC_POINT_CALLBACK(
5881
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
5882
9
                &recycle_txn_info_keys);
5883
23.0k
        for (const auto& k : recycle_txn_info_keys) {
5884
23.0k
            concurrent_delete_executor.add([&]() {
5885
23.0k
                int ret = delete_recycle_txn_kv(k);
5886
23.0k
                if (ret == 1) {
5887
23.0k
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
5888
23.0k
                    for (int i = 1; i <= max_retry; ++i) {
5889
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
5890
23.0k
                        ret = delete_recycle_txn_kv(k);
5891
                        // clang-format off
5892
23.0k
                        TEST_SYNC_POINT_CALLBACK(
5893
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
5894
                        // clang-format off
5895
23.0k
                        if (ret != 1) {
5896
23.0k
                            break;
5897
23.0k
                        }
5898
                        // random sleep 0-100 ms to retry
5899
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
5900
23.0k
                    }
5901
23.0k
                }
5902
23.0k
                if (ret != 0) {
5903
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
5904
23.0k
                            .tag("instance id", instance_id_)
5905
23.0k
                            .tag("key", hex(k));
5906
23.0k
                    return -1;
5907
23.0k
                }
5908
23.0k
                return 0;
5909
23.0k
            });
5910
23.0k
        }
5911
9
        bool finished = true;
5912
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
5913
23.0k
        for (int r : rets) {
5914
23.0k
            if (r != 0) {
5915
9
                ret = -1;
5916
9
            }
5917
23.0k
        }
5918
5919
9
        ret = finished ? ret : -1;
5920
5921
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
5922
5923
9
        if (ret != 0) {
5924
3
            LOG_WARNING("recycle txn kv ret!=0")
5925
3
                    .tag("finished", finished)
5926
3
                    .tag("ret", ret)
5927
3
                    .tag("instance_id", instance_id_);
5928
3
            return ret;
5929
3
        }
5930
6
        return ret;
5931
9
    };
5932
5933
19
    if (config::enable_recycler_stats_metrics) {
5934
0
        scan_and_statistics_expired_txn_label();
5935
0
    }
5936
    // recycle_func and loop_done for scan and recycle
5937
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
5938
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
5939
19
}
5940
5941
struct CopyJobIdTuple {
5942
    std::string instance_id;
5943
    std::string stage_id;
5944
    long table_id;
5945
    std::string copy_id;
5946
    std::string stage_path;
5947
};
5948
struct BatchObjStoreAccessor {
5949
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
5950
                          TxnKv* txn_kv)
5951
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
5952
3
    ~BatchObjStoreAccessor() {
5953
3
        if (!paths_.empty()) {
5954
3
            consume();
5955
3
        }
5956
3
    }
5957
5958
    /**
5959
    * To implicitely do batch work and submit the batch delete task to s3
5960
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
5961
    *
5962
    * @param copy_job The protubuf struct consists of the copy job files.
5963
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
5964
    *            it would last until we finish the delete task, here we need pass one string value
5965
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
5966
    */
5967
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
5968
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
5969
5
        auto& file_keys = copy_file_keys_[key];
5970
5
        file_keys.log_trace =
5971
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
5972
5
                            instance_id, stage_id, table_id, copy_id, path);
5973
5
        std::string_view log_trace = file_keys.log_trace;
5974
2.03k
        for (const auto& file : copy_job.object_files()) {
5975
2.03k
            auto relative_path = file.relative_path();
5976
2.03k
            paths_.push_back(relative_path);
5977
2.03k
            file_keys.keys.push_back(copy_file_key(
5978
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
5979
2.03k
            LOG_INFO(log_trace)
5980
2.03k
                    .tag("relative_path", relative_path)
5981
2.03k
                    .tag("batch_count", batch_count_);
5982
2.03k
        }
5983
5
        LOG_INFO(log_trace)
5984
5
                .tag("objects_num", copy_job.object_files().size())
5985
5
                .tag("batch_count", batch_count_);
5986
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
5987
        // recommend using delete objects when objects num is less than 10)
5988
5
        if (paths_.size() < 1000) {
5989
3
            return;
5990
3
        }
5991
2
        consume();
5992
2
    }
5993
5994
private:
5995
5
    void consume() {
5996
5
        DORIS_CLOUD_DEFER {
5997
5
            paths_.clear();
5998
5
            copy_file_keys_.clear();
5999
5
            batch_count_++;
6000
6001
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
6002
5
                        batch_count_);
6003
5
        };
6004
6005
5
        StopWatch sw;
6006
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
6007
5
        if (0 != accessor_->delete_files(paths_)) {
6008
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
6009
2
                        paths_.size(), batch_count_, sw.elapsed_us());
6010
2
            return;
6011
2
        }
6012
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
6013
3
                    paths_.size(), batch_count_, sw.elapsed_us());
6014
        // delete fdb's keys
6015
3
        for (auto& file_keys : copy_file_keys_) {
6016
3
            auto& [log_trace, keys] = file_keys.second;
6017
3
            std::unique_ptr<Transaction> txn;
6018
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
6019
0
                LOG(WARNING) << "failed to create txn";
6020
0
                continue;
6021
0
            }
6022
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6023
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6024
            // limited, should not cause the txn commit failed.
6025
1.02k
            for (const auto& key : keys) {
6026
1.02k
                txn->remove(key);
6027
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
6028
1.02k
            }
6029
3
            txn->remove(file_keys.first);
6030
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
6031
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
6032
0
                continue;
6033
0
            }
6034
3
        }
6035
3
    }
6036
    std::shared_ptr<StorageVaultAccessor> accessor_;
6037
    // the path of the s3 files to be deleted
6038
    std::vector<std::string> paths_;
6039
    struct CopyFiles {
6040
        std::string log_trace;
6041
        std::vector<std::string> keys;
6042
    };
6043
    // pair<std::string, std::vector<std::string>>
6044
    // first: instance_id_ stage_id table_id query_id
6045
    // second: keys to be deleted
6046
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
6047
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
6048
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
6049
    // which can together uniquely identifies different tasks for tracing log
6050
    uint64_t& batch_count_;
6051
    TxnKv* txn_kv_;
6052
};
6053
6054
13
int InstanceRecycler::recycle_copy_jobs() {
6055
13
    int64_t num_scanned = 0;
6056
13
    int64_t num_finished = 0;
6057
13
    int64_t num_expired = 0;
6058
13
    int64_t num_recycled = 0;
6059
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
6060
13
    uint64_t batch_count = 0;
6061
13
    const std::string task_name = "recycle_copy_jobs";
6062
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6063
6064
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
6065
6066
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6067
13
    register_recycle_task(task_name, start_time);
6068
6069
13
    DORIS_CLOUD_DEFER {
6070
13
        unregister_recycle_task(task_name);
6071
13
        int64_t cost =
6072
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6073
13
        metrics_context.finish_report();
6074
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6075
13
                .tag("instance_id", instance_id_)
6076
13
                .tag("num_scanned", num_scanned)
6077
13
                .tag("num_finished", num_finished)
6078
13
                .tag("num_expired", num_expired)
6079
13
                .tag("num_recycled", num_recycled);
6080
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
6069
13
    DORIS_CLOUD_DEFER {
6070
13
        unregister_recycle_task(task_name);
6071
13
        int64_t cost =
6072
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6073
13
        metrics_context.finish_report();
6074
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6075
13
                .tag("instance_id", instance_id_)
6076
13
                .tag("num_scanned", num_scanned)
6077
13
                .tag("num_finished", num_finished)
6078
13
                .tag("num_expired", num_expired)
6079
13
                .tag("num_recycled", num_recycled);
6080
13
    };
6081
6082
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6083
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6084
13
    std::string key0;
6085
13
    std::string key1;
6086
13
    copy_job_key(key_info0, &key0);
6087
13
    copy_job_key(key_info1, &key1);
6088
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
6089
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
6090
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
6091
16
                         this](std::string_view k, std::string_view v) -> int {
6092
16
        ++num_scanned;
6093
16
        CopyJobPB copy_job;
6094
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6095
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6096
0
            return -1;
6097
0
        }
6098
6099
        // decode copy job key
6100
16
        auto k1 = k;
6101
16
        k1.remove_prefix(1);
6102
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6103
16
        decode_key(&k1, &out);
6104
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6105
        // -> CopyJobPB
6106
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6107
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6108
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6109
6110
16
        bool check_storage = true;
6111
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6112
12
            ++num_finished;
6113
6114
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6115
7
                auto it = stage_accessor_map.find(stage_id);
6116
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6117
7
                std::string_view path;
6118
7
                if (it != stage_accessor_map.end()) {
6119
2
                    accessor = it->second;
6120
5
                } else {
6121
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6122
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6123
5
                                                      &inner_accessor);
6124
5
                    if (ret < 0) { // error
6125
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6126
0
                        return -1;
6127
5
                    } else if (ret == 0) {
6128
3
                        path = inner_accessor->uri();
6129
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6130
3
                                inner_accessor, batch_count, txn_kv_.get());
6131
3
                        stage_accessor_map.emplace(stage_id, accessor);
6132
3
                    } else { // stage not found, skip check storage
6133
2
                        check_storage = false;
6134
2
                    }
6135
5
                }
6136
7
                if (check_storage) {
6137
                    // TODO delete objects with key and etag is not supported
6138
5
                    accessor->add(std::move(copy_job), std::string(k),
6139
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6140
5
                    return 0;
6141
5
                }
6142
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6143
5
                int64_t current_time =
6144
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6145
5
                if (copy_job.finish_time_ms() > 0) {
6146
2
                    if (!config::force_immediate_recycle &&
6147
2
                        current_time < copy_job.finish_time_ms() +
6148
2
                                               config::copy_job_max_retention_second * 1000) {
6149
1
                        return 0;
6150
1
                    }
6151
3
                } else {
6152
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6153
3
                    if (!config::force_immediate_recycle &&
6154
3
                        current_time < copy_job.start_time_ms() +
6155
3
                                               config::copy_job_max_retention_second * 1000) {
6156
1
                        return 0;
6157
1
                    }
6158
3
                }
6159
5
            }
6160
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6161
4
            int64_t current_time =
6162
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6163
            // if copy job is timeout: delete all copy file kvs and copy job kv
6164
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6165
2
                return 0;
6166
2
            }
6167
2
            ++num_expired;
6168
2
        }
6169
6170
        // delete all copy files
6171
7
        std::vector<std::string> copy_file_keys;
6172
70
        for (auto& file : copy_job.object_files()) {
6173
70
            copy_file_keys.push_back(copy_file_key(
6174
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6175
70
        }
6176
7
        std::unique_ptr<Transaction> txn;
6177
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6178
0
            LOG(WARNING) << "failed to create txn";
6179
0
            return -1;
6180
0
        }
6181
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6182
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6183
        // limited, should not cause the txn commit failed.
6184
70
        for (const auto& key : copy_file_keys) {
6185
70
            txn->remove(key);
6186
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6187
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6188
70
                      << ", query_id=" << copy_id;
6189
70
        }
6190
7
        txn->remove(k);
6191
7
        TxnErrorCode err = txn->commit();
6192
7
        if (err != TxnErrorCode::TXN_OK) {
6193
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6194
0
            return -1;
6195
0
        }
6196
6197
7
        metrics_context.total_recycled_num = ++num_recycled;
6198
7
        metrics_context.report();
6199
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6200
7
        return 0;
6201
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6091
16
                         this](std::string_view k, std::string_view v) -> int {
6092
16
        ++num_scanned;
6093
16
        CopyJobPB copy_job;
6094
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6095
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6096
0
            return -1;
6097
0
        }
6098
6099
        // decode copy job key
6100
16
        auto k1 = k;
6101
16
        k1.remove_prefix(1);
6102
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6103
16
        decode_key(&k1, &out);
6104
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6105
        // -> CopyJobPB
6106
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6107
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6108
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6109
6110
16
        bool check_storage = true;
6111
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6112
12
            ++num_finished;
6113
6114
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6115
7
                auto it = stage_accessor_map.find(stage_id);
6116
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6117
7
                std::string_view path;
6118
7
                if (it != stage_accessor_map.end()) {
6119
2
                    accessor = it->second;
6120
5
                } else {
6121
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6122
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6123
5
                                                      &inner_accessor);
6124
5
                    if (ret < 0) { // error
6125
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6126
0
                        return -1;
6127
5
                    } else if (ret == 0) {
6128
3
                        path = inner_accessor->uri();
6129
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6130
3
                                inner_accessor, batch_count, txn_kv_.get());
6131
3
                        stage_accessor_map.emplace(stage_id, accessor);
6132
3
                    } else { // stage not found, skip check storage
6133
2
                        check_storage = false;
6134
2
                    }
6135
5
                }
6136
7
                if (check_storage) {
6137
                    // TODO delete objects with key and etag is not supported
6138
5
                    accessor->add(std::move(copy_job), std::string(k),
6139
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6140
5
                    return 0;
6141
5
                }
6142
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6143
5
                int64_t current_time =
6144
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6145
5
                if (copy_job.finish_time_ms() > 0) {
6146
2
                    if (!config::force_immediate_recycle &&
6147
2
                        current_time < copy_job.finish_time_ms() +
6148
2
                                               config::copy_job_max_retention_second * 1000) {
6149
1
                        return 0;
6150
1
                    }
6151
3
                } else {
6152
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6153
3
                    if (!config::force_immediate_recycle &&
6154
3
                        current_time < copy_job.start_time_ms() +
6155
3
                                               config::copy_job_max_retention_second * 1000) {
6156
1
                        return 0;
6157
1
                    }
6158
3
                }
6159
5
            }
6160
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6161
4
            int64_t current_time =
6162
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6163
            // if copy job is timeout: delete all copy file kvs and copy job kv
6164
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6165
2
                return 0;
6166
2
            }
6167
2
            ++num_expired;
6168
2
        }
6169
6170
        // delete all copy files
6171
7
        std::vector<std::string> copy_file_keys;
6172
70
        for (auto& file : copy_job.object_files()) {
6173
70
            copy_file_keys.push_back(copy_file_key(
6174
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6175
70
        }
6176
7
        std::unique_ptr<Transaction> txn;
6177
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6178
0
            LOG(WARNING) << "failed to create txn";
6179
0
            return -1;
6180
0
        }
6181
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6182
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6183
        // limited, should not cause the txn commit failed.
6184
70
        for (const auto& key : copy_file_keys) {
6185
70
            txn->remove(key);
6186
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6187
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6188
70
                      << ", query_id=" << copy_id;
6189
70
        }
6190
7
        txn->remove(k);
6191
7
        TxnErrorCode err = txn->commit();
6192
7
        if (err != TxnErrorCode::TXN_OK) {
6193
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6194
0
            return -1;
6195
0
        }
6196
6197
7
        metrics_context.total_recycled_num = ++num_recycled;
6198
7
        metrics_context.report();
6199
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6200
7
        return 0;
6201
7
    };
6202
6203
13
    if (config::enable_recycler_stats_metrics) {
6204
0
        scan_and_statistics_copy_jobs();
6205
0
    }
6206
    // recycle_func and loop_done for scan and recycle
6207
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
6208
13
}
6209
6210
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
6211
                                             const StagePB::StageType& stage_type,
6212
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
6213
5
#ifdef UNIT_TEST
6214
    // In unit test, external use the same accessor as the internal stage
6215
5
    auto it = accessor_map_.find(stage_id);
6216
5
    if (it != accessor_map_.end()) {
6217
3
        *accessor = it->second;
6218
3
    } else {
6219
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
6220
2
        return 1;
6221
2
    }
6222
#else
6223
    // init s3 accessor and add to accessor map
6224
    auto stage_it =
6225
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
6226
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
6227
6228
    if (stage_it == instance_info_.stages().end()) {
6229
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
6230
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
6231
        return 1;
6232
    }
6233
6234
    const auto& object_store_info = stage_it->obj_info();
6235
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
6236
6237
    S3Conf s3_conf;
6238
    if (stage_type == StagePB::EXTERNAL) {
6239
        if (stage_access_type == StagePB::AKSK) {
6240
            auto conf = S3Conf::from_obj_store_info(object_store_info);
6241
            if (!conf) {
6242
                return -1;
6243
            }
6244
6245
            s3_conf = std::move(*conf);
6246
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
6247
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
6248
            if (!conf) {
6249
                return -1;
6250
            }
6251
6252
            s3_conf = std::move(*conf);
6253
            if (instance_info_.ram_user().has_encryption_info()) {
6254
                AkSkPair plain_ak_sk_pair;
6255
                int ret = decrypt_ak_sk_helper(
6256
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
6257
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
6258
                if (ret != 0) {
6259
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
6260
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
6261
                    return -1;
6262
                }
6263
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
6264
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
6265
            } else {
6266
                s3_conf.ak = instance_info_.ram_user().ak();
6267
                s3_conf.sk = instance_info_.ram_user().sk();
6268
            }
6269
        } else {
6270
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
6271
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
6272
            return -1;
6273
        }
6274
    } else if (stage_type == StagePB::INTERNAL) {
6275
        int idx = stoi(object_store_info.id());
6276
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6277
            LOG(WARNING) << "invalid idx: " << idx;
6278
            return -1;
6279
        }
6280
6281
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6282
        auto conf = S3Conf::from_obj_store_info(old_obj);
6283
        if (!conf) {
6284
            return -1;
6285
        }
6286
6287
        s3_conf = std::move(*conf);
6288
        s3_conf.prefix = object_store_info.prefix();
6289
    } else {
6290
        LOG(WARNING) << "unknown stage type " << stage_type;
6291
        return -1;
6292
    }
6293
6294
    std::shared_ptr<S3Accessor> s3_accessor;
6295
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
6296
    if (ret != 0) {
6297
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
6298
        return -1;
6299
    }
6300
6301
    *accessor = std::move(s3_accessor);
6302
#endif
6303
3
    return 0;
6304
5
}
6305
6306
11
int InstanceRecycler::recycle_stage() {
6307
11
    int64_t num_scanned = 0;
6308
11
    int64_t num_recycled = 0;
6309
11
    const std::string task_name = "recycle_stage";
6310
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6311
6312
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
6313
6314
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6315
11
    register_recycle_task(task_name, start_time);
6316
6317
11
    DORIS_CLOUD_DEFER {
6318
11
        unregister_recycle_task(task_name);
6319
11
        int64_t cost =
6320
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6321
11
        metrics_context.finish_report();
6322
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6323
11
                .tag("instance_id", instance_id_)
6324
11
                .tag("num_scanned", num_scanned)
6325
11
                .tag("num_recycled", num_recycled);
6326
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
6317
11
    DORIS_CLOUD_DEFER {
6318
11
        unregister_recycle_task(task_name);
6319
11
        int64_t cost =
6320
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6321
11
        metrics_context.finish_report();
6322
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6323
11
                .tag("instance_id", instance_id_)
6324
11
                .tag("num_scanned", num_scanned)
6325
11
                .tag("num_recycled", num_recycled);
6326
11
    };
6327
6328
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6329
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6330
11
    std::string key0 = recycle_stage_key(key_info0);
6331
11
    std::string key1 = recycle_stage_key(key_info1);
6332
6333
11
    std::vector<std::string_view> stage_keys;
6334
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
6335
11
                         this](std::string_view k, std::string_view v) -> int {
6336
1
        ++num_scanned;
6337
1
        RecycleStagePB recycle_stage;
6338
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6339
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6340
0
            return -1;
6341
0
        }
6342
6343
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6344
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6345
0
            LOG(WARNING) << "invalid idx: " << idx;
6346
0
            return -1;
6347
0
        }
6348
6349
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6350
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6351
1
                [&] {
6352
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6353
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6354
1
                    if (!s3_conf) {
6355
1
                        return -1;
6356
1
                    }
6357
6358
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6359
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6360
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6361
1
                    if (ret != 0) {
6362
1
                        return -1;
6363
1
                    }
6364
6365
1
                    accessor = std::move(s3_accessor);
6366
1
                    return 0;
6367
1
                }(),
6368
1
                "recycle_stage:get_accessor", &accessor);
6369
6370
1
        if (ret != 0) {
6371
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6372
0
            return ret;
6373
0
        }
6374
6375
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6376
1
                .tag("instance_id", instance_id_)
6377
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6378
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6379
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6380
1
                .tag("obj_info_id", idx)
6381
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6382
1
        ret = accessor->delete_all();
6383
1
        if (ret != 0) {
6384
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6385
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6386
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6387
0
                         << ", ret=" << ret;
6388
0
            return -1;
6389
0
        }
6390
1
        metrics_context.total_recycled_num = ++num_recycled;
6391
1
        metrics_context.report();
6392
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6393
1
        stage_keys.push_back(k);
6394
1
        return 0;
6395
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6335
1
                         this](std::string_view k, std::string_view v) -> int {
6336
1
        ++num_scanned;
6337
1
        RecycleStagePB recycle_stage;
6338
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6339
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6340
0
            return -1;
6341
0
        }
6342
6343
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6344
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6345
0
            LOG(WARNING) << "invalid idx: " << idx;
6346
0
            return -1;
6347
0
        }
6348
6349
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6350
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6351
1
                [&] {
6352
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6353
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6354
1
                    if (!s3_conf) {
6355
1
                        return -1;
6356
1
                    }
6357
6358
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6359
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6360
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6361
1
                    if (ret != 0) {
6362
1
                        return -1;
6363
1
                    }
6364
6365
1
                    accessor = std::move(s3_accessor);
6366
1
                    return 0;
6367
1
                }(),
6368
1
                "recycle_stage:get_accessor", &accessor);
6369
6370
1
        if (ret != 0) {
6371
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6372
0
            return ret;
6373
0
        }
6374
6375
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6376
1
                .tag("instance_id", instance_id_)
6377
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6378
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6379
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6380
1
                .tag("obj_info_id", idx)
6381
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6382
1
        ret = accessor->delete_all();
6383
1
        if (ret != 0) {
6384
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6385
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6386
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6387
0
                         << ", ret=" << ret;
6388
0
            return -1;
6389
0
        }
6390
1
        metrics_context.total_recycled_num = ++num_recycled;
6391
1
        metrics_context.report();
6392
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6393
1
        stage_keys.push_back(k);
6394
1
        return 0;
6395
1
    };
6396
6397
11
    auto loop_done = [&stage_keys, this]() -> int {
6398
1
        if (stage_keys.empty()) return 0;
6399
1
        DORIS_CLOUD_DEFER {
6400
1
            stage_keys.clear();
6401
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6399
1
        DORIS_CLOUD_DEFER {
6400
1
            stage_keys.clear();
6401
1
        };
6402
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6403
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6404
0
            return -1;
6405
0
        }
6406
1
        return 0;
6407
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
6397
1
    auto loop_done = [&stage_keys, this]() -> int {
6398
1
        if (stage_keys.empty()) return 0;
6399
1
        DORIS_CLOUD_DEFER {
6400
1
            stage_keys.clear();
6401
1
        };
6402
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6403
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6404
0
            return -1;
6405
0
        }
6406
1
        return 0;
6407
1
    };
6408
11
    if (config::enable_recycler_stats_metrics) {
6409
0
        scan_and_statistics_stage();
6410
0
    }
6411
    // recycle_func and loop_done for scan and recycle
6412
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
6413
11
}
6414
6415
10
int InstanceRecycler::recycle_expired_stage_objects() {
6416
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
6417
6418
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6419
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6420
6421
10
    DORIS_CLOUD_DEFER {
6422
10
        int64_t cost =
6423
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6424
10
        metrics_context.finish_report();
6425
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6426
10
                .tag("instance_id", instance_id_);
6427
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
6421
10
    DORIS_CLOUD_DEFER {
6422
10
        int64_t cost =
6423
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6424
10
        metrics_context.finish_report();
6425
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6426
10
                .tag("instance_id", instance_id_);
6427
10
    };
6428
6429
10
    int ret = 0;
6430
6431
10
    if (config::enable_recycler_stats_metrics) {
6432
0
        scan_and_statistics_expired_stage_objects();
6433
0
    }
6434
6435
10
    for (const auto& stage : instance_info_.stages()) {
6436
0
        std::stringstream ss;
6437
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
6438
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
6439
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
6440
0
           << ", prefix=" << stage.obj_info().prefix();
6441
6442
0
        if (stopped()) {
6443
0
            break;
6444
0
        }
6445
0
        if (stage.type() == StagePB::EXTERNAL) {
6446
0
            continue;
6447
0
        }
6448
0
        int idx = stoi(stage.obj_info().id());
6449
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6450
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
6451
0
            continue;
6452
0
        }
6453
6454
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6455
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6456
0
        if (!s3_conf) {
6457
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
6458
0
            continue;
6459
0
        }
6460
6461
0
        s3_conf->prefix = stage.obj_info().prefix();
6462
0
        std::shared_ptr<S3Accessor> accessor;
6463
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
6464
0
        if (ret1 != 0) {
6465
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
6466
0
            ret = -1;
6467
0
            continue;
6468
0
        }
6469
6470
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6471
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
6472
0
            ret = -1;
6473
0
            continue;
6474
0
        }
6475
6476
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
6477
0
        int64_t expiration_time =
6478
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
6479
0
                config::internal_stage_objects_expire_time_second;
6480
0
        if (config::force_immediate_recycle) {
6481
0
            expiration_time = INT64_MAX;
6482
0
        }
6483
0
        ret1 = accessor->delete_all(expiration_time);
6484
0
        if (ret1 != 0) {
6485
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
6486
0
                         << ss.str();
6487
0
            ret = -1;
6488
0
            continue;
6489
0
        }
6490
0
        metrics_context.total_recycled_num++;
6491
0
        metrics_context.report();
6492
0
    }
6493
10
    return ret;
6494
10
}
6495
6496
180
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
6497
180
    std::lock_guard lock(recycle_tasks_mutex);
6498
180
    running_recycle_tasks[task_name] = start_time;
6499
180
}
6500
6501
180
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
6502
180
    std::lock_guard lock(recycle_tasks_mutex);
6503
180
    DCHECK(running_recycle_tasks[task_name] > 0);
6504
180
    running_recycle_tasks.erase(task_name);
6505
180
}
6506
6507
21
bool InstanceRecycler::check_recycle_tasks() {
6508
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
6509
21
    {
6510
21
        std::lock_guard lock(recycle_tasks_mutex);
6511
21
        tmp_running_recycle_tasks = running_recycle_tasks;
6512
21
    }
6513
6514
21
    bool found = false;
6515
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6516
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
6517
20
        int64_t cost = now - start_time;
6518
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
6519
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
6520
20
                    .tag("instance_id", instance_id_)
6521
20
                    .tag("task", task_name);
6522
20
            found = true;
6523
20
        }
6524
20
    }
6525
6526
21
    return found;
6527
21
}
6528
6529
// Scan and statistics indexes that need to be recycled
6530
0
int InstanceRecycler::scan_and_statistics_indexes() {
6531
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
6532
6533
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
6534
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
6535
0
    std::string index_key0;
6536
0
    std::string index_key1;
6537
0
    recycle_index_key(index_key_info0, &index_key0);
6538
0
    recycle_index_key(index_key_info1, &index_key1);
6539
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6540
6541
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
6542
0
        RecycleIndexPB index_pb;
6543
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
6544
0
            return 0;
6545
0
        }
6546
0
        int64_t current_time = ::time(nullptr);
6547
0
        if (current_time <
6548
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
6549
0
            return 0;
6550
0
        }
6551
        // decode index_id
6552
0
        auto k1 = k;
6553
0
        k1.remove_prefix(1);
6554
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6555
0
        decode_key(&k1, &out);
6556
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
6557
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
6558
0
        std::unique_ptr<Transaction> txn;
6559
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6560
0
        if (err != TxnErrorCode::TXN_OK) {
6561
0
            return 0;
6562
0
        }
6563
0
        std::string val;
6564
0
        err = txn->get(k, &val);
6565
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6566
0
            return 0;
6567
0
        }
6568
0
        if (err != TxnErrorCode::TXN_OK) {
6569
0
            return 0;
6570
0
        }
6571
0
        index_pb.Clear();
6572
0
        if (!index_pb.ParseFromString(val)) {
6573
0
            return 0;
6574
0
        }
6575
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
6576
0
            return 0;
6577
0
        }
6578
0
        metrics_context.total_need_recycle_num++;
6579
0
        return 0;
6580
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6581
6582
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
6583
0
    metrics_context.report(true);
6584
0
    segment_metrics_context_.report(true);
6585
0
    tablet_metrics_context_.report(true);
6586
0
    return ret;
6587
0
}
6588
6589
// Scan and statistics partitions that need to be recycled
6590
0
int InstanceRecycler::scan_and_statistics_partitions() {
6591
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
6592
6593
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
6594
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
6595
0
    std::string part_key0;
6596
0
    std::string part_key1;
6597
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6598
6599
0
    recycle_partition_key(part_key_info0, &part_key0);
6600
0
    recycle_partition_key(part_key_info1, &part_key1);
6601
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
6602
0
        RecyclePartitionPB part_pb;
6603
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
6604
0
            return 0;
6605
0
        }
6606
0
        int64_t current_time = ::time(nullptr);
6607
0
        if (current_time <
6608
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
6609
0
            return 0;
6610
0
        }
6611
        // decode partition_id
6612
0
        auto k1 = k;
6613
0
        k1.remove_prefix(1);
6614
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6615
0
        decode_key(&k1, &out);
6616
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
6617
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
6618
        // Change state to RECYCLING
6619
0
        std::unique_ptr<Transaction> txn;
6620
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6621
0
        if (err != TxnErrorCode::TXN_OK) {
6622
0
            return 0;
6623
0
        }
6624
0
        std::string val;
6625
0
        err = txn->get(k, &val);
6626
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6627
0
            return 0;
6628
0
        }
6629
0
        if (err != TxnErrorCode::TXN_OK) {
6630
0
            return 0;
6631
0
        }
6632
0
        part_pb.Clear();
6633
0
        if (!part_pb.ParseFromString(val)) {
6634
0
            return 0;
6635
0
        }
6636
        // Partitions with PREPARED state MUST have no data
6637
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
6638
0
        int ret = 0;
6639
0
        for (int64_t index_id : part_pb.index_id()) {
6640
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
6641
0
                                            partition_id, is_empty_tablet) != 0) {
6642
0
                ret = 0;
6643
0
            }
6644
0
        }
6645
0
        metrics_context.total_need_recycle_num++;
6646
0
        return ret;
6647
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6648
6649
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
6650
0
    metrics_context.report(true);
6651
0
    segment_metrics_context_.report(true);
6652
0
    tablet_metrics_context_.report(true);
6653
0
    return ret;
6654
0
}
6655
6656
// Scan and statistics rowsets that need to be recycled
6657
0
int InstanceRecycler::scan_and_statistics_rowsets() {
6658
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
6659
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
6660
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
6661
0
    std::string recyc_rs_key0;
6662
0
    std::string recyc_rs_key1;
6663
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
6664
0
                recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
6665
0
       int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6666
6667
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
6668
0
        RecycleRowsetPB rowset;
6669
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6670
0
            return 0;
6671
0
        }
6672
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
6673
0
        int64_t current_time = ::time(nullptr);
6674
0
        if (current_time <
6675
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
6676
0
            return 0;
6677
0
        }
6678
6679
0
        if (!rowset.has_type()) {
6680
0
            if (!rowset.has_resource_id()) [[unlikely]] {
6681
0
                return 0;
6682
0
            }
6683
0
            if (rowset.resource_id().empty()) [[unlikely]] {
6684
0
                return 0;
6685
0
            }
6686
0
            metrics_context.total_need_recycle_num++;
6687
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6688
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
6689
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6690
0
            return 0;
6691
0
        }
6692
6693
0
        if(!rowset_meta->has_is_recycled() || !rowset_meta->is_recycled()) {
6694
0
            return 0;
6695
0
        }
6696
6697
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
6698
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
6699
0
                return 0;
6700
0
            }
6701
0
        }
6702
0
        metrics_context.total_need_recycle_num++;
6703
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
6704
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
6705
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
6706
0
        return 0;
6707
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6708
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
6709
0
    metrics_context.report(true);
6710
0
    segment_metrics_context_.report(true);
6711
0
    return ret;
6712
0
}
6713
6714
// Scan and statistics tmp_rowsets that need to be recycled
6715
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
6716
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
6717
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
6718
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
6719
0
    std::string tmp_rs_key0;
6720
0
    std::string tmp_rs_key1;
6721
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
6722
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
6723
6724
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6725
6726
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
6727
0
        doris::RowsetMetaCloudPB rowset;
6728
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6729
0
            return 0;
6730
0
        }
6731
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
6732
0
        int64_t current_time = ::time(nullptr);
6733
0
        if (current_time < expiration) {
6734
0
            return 0;
6735
0
        }
6736
6737
0
        DCHECK_GT(rowset.txn_id(), 0)
6738
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
6739
6740
0
        if(!rowset.has_is_recycled() || !rowset.is_recycled()) {
6741
0
            return 0;
6742
0
        }
6743
6744
0
        if (!rowset.has_resource_id()) {
6745
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6746
0
                return 0;
6747
0
            }
6748
0
            return 0;
6749
0
        }
6750
6751
0
        metrics_context.total_need_recycle_num++;
6752
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
6753
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
6754
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
6755
0
        return 0;
6756
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6757
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
6758
0
    metrics_context.report(true);
6759
0
    segment_metrics_context_.report(true);
6760
0
    return ret;
6761
0
}
6762
6763
// Scan and statistics abort_timeout_txn that need to be recycled
6764
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
6765
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
6766
6767
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
6768
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6769
0
    std::string begin_txn_running_key;
6770
0
    std::string end_txn_running_key;
6771
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
6772
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
6773
6774
0
    int64_t current_time =
6775
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6776
6777
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
6778
0
                                               std::string_view k, std::string_view v) -> int {
6779
0
        std::unique_ptr<Transaction> txn;
6780
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6781
0
        if (err != TxnErrorCode::TXN_OK) {
6782
0
            return 0;
6783
0
        }
6784
0
        std::string_view k1 = k;
6785
0
        k1.remove_prefix(1);
6786
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6787
0
        if (decode_key(&k1, &out) != 0) {
6788
0
            return 0;
6789
0
        }
6790
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6791
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6792
        // Update txn_info
6793
0
        std::string txn_inf_key, txn_inf_val;
6794
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
6795
0
        err = txn->get(txn_inf_key, &txn_inf_val);
6796
0
        if (err != TxnErrorCode::TXN_OK) {
6797
0
            return 0;
6798
0
        }
6799
0
        TxnInfoPB txn_info;
6800
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
6801
0
            return 0;
6802
0
        }
6803
6804
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
6805
0
            TxnRunningPB txn_running_pb;
6806
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
6807
0
                return 0;
6808
0
            }
6809
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
6810
0
                return 0;
6811
0
            }
6812
0
            metrics_context.total_need_recycle_num++;
6813
0
        }
6814
0
        return 0;
6815
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6816
6817
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
6818
0
    metrics_context.report(true);
6819
0
    return ret;
6820
0
}
6821
6822
// Scan and statistics expired_txn_label that need to be recycled
6823
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
6824
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
6825
6826
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
6827
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
6828
0
    std::string begin_recycle_txn_key;
6829
0
    std::string end_recycle_txn_key;
6830
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
6831
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
6832
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6833
0
    int64_t current_time_ms =
6834
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6835
6836
    // for calculate the total num or bytes of recyled objects
6837
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
6838
0
        RecycleTxnPB recycle_txn_pb;
6839
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6840
0
            return 0;
6841
0
        }
6842
0
        if ((config::force_immediate_recycle) ||
6843
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6844
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6845
0
             current_time_ms)) {
6846
0
            metrics_context.total_need_recycle_num++;
6847
0
        }
6848
0
        return 0;
6849
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6850
6851
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
6852
0
    metrics_context.report(true);
6853
0
    return ret;
6854
0
}
6855
6856
// Scan and statistics copy_jobs that need to be recycled
6857
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
6858
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
6859
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6860
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6861
0
    std::string key0;
6862
0
    std::string key1;
6863
0
    copy_job_key(key_info0, &key0);
6864
0
    copy_job_key(key_info1, &key1);
6865
6866
    // for calculate the total num or bytes of recyled objects
6867
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
6868
0
        CopyJobPB copy_job;
6869
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6870
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6871
0
            return 0;
6872
0
        }
6873
6874
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6875
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
6876
0
                int64_t current_time =
6877
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6878
0
                if (copy_job.finish_time_ms() > 0) {
6879
0
                    if (!config::force_immediate_recycle &&
6880
0
                        current_time < copy_job.finish_time_ms() +
6881
0
                                               config::copy_job_max_retention_second * 1000) {
6882
0
                        return 0;
6883
0
                    }
6884
0
                } else {
6885
0
                    if (!config::force_immediate_recycle &&
6886
0
                        current_time < copy_job.start_time_ms() +
6887
0
                                               config::copy_job_max_retention_second * 1000) {
6888
0
                        return 0;
6889
0
                    }
6890
0
                }
6891
0
            }
6892
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6893
0
            int64_t current_time =
6894
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6895
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6896
0
                return 0;
6897
0
            }
6898
0
        }
6899
0
        metrics_context.total_need_recycle_num++;
6900
0
        return 0;
6901
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6902
6903
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
6904
0
    metrics_context.report(true);
6905
0
    return ret;
6906
0
}
6907
6908
// Scan and statistics stage that need to be recycled
6909
0
int InstanceRecycler::scan_and_statistics_stage() {
6910
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
6911
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6912
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6913
0
    std::string key0 = recycle_stage_key(key_info0);
6914
0
    std::string key1 = recycle_stage_key(key_info1);
6915
6916
    // for calculate the total num or bytes of recyled objects
6917
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
6918
0
                                                        std::string_view v) -> int {
6919
0
        RecycleStagePB recycle_stage;
6920
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6921
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6922
0
            return 0;
6923
0
        }
6924
6925
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
6926
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6927
0
            LOG(WARNING) << "invalid idx: " << idx;
6928
0
            return 0;
6929
0
        }
6930
6931
0
        std::shared_ptr<StorageVaultAccessor> accessor;
6932
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6933
0
                [&] {
6934
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6935
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6936
0
                    if (!s3_conf) {
6937
0
                        return 0;
6938
0
                    }
6939
6940
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6941
0
                    std::shared_ptr<S3Accessor> s3_accessor;
6942
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6943
0
                    if (ret != 0) {
6944
0
                        return 0;
6945
0
                    }
6946
6947
0
                    accessor = std::move(s3_accessor);
6948
0
                    return 0;
6949
0
                }(),
6950
0
                "recycle_stage:get_accessor", &accessor);
6951
6952
0
        if (ret != 0) {
6953
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6954
0
            return 0;
6955
0
        }
6956
6957
0
        metrics_context.total_need_recycle_num++;
6958
0
        return 0;
6959
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6960
6961
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
6962
0
    metrics_context.report(true);
6963
0
    return ret;
6964
0
}
6965
6966
// Scan and statistics expired_stage_objects that need to be recycled
6967
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
6968
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6969
6970
    // for calculate the total num or bytes of recyled objects
6971
0
    auto scan_and_statistics = [&metrics_context, this]() {
6972
0
        for (const auto& stage : instance_info_.stages()) {
6973
0
            if (stopped()) {
6974
0
                break;
6975
0
            }
6976
0
            if (stage.type() == StagePB::EXTERNAL) {
6977
0
                continue;
6978
0
            }
6979
0
            int idx = stoi(stage.obj_info().id());
6980
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
6981
0
                continue;
6982
0
            }
6983
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
6984
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6985
0
            if (!s3_conf) {
6986
0
                continue;
6987
0
            }
6988
0
            s3_conf->prefix = stage.obj_info().prefix();
6989
0
            std::shared_ptr<S3Accessor> accessor;
6990
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
6991
0
            if (ret1 != 0) {
6992
0
                continue;
6993
0
            }
6994
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6995
0
                continue;
6996
0
            }
6997
0
            metrics_context.total_need_recycle_num++;
6998
0
        }
6999
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
7000
7001
0
    scan_and_statistics();
7002
0
    metrics_context.report(true);
7003
0
    return 0;
7004
0
}
7005
7006
// Scan and statistics versions that need to be recycled
7007
0
int InstanceRecycler::scan_and_statistics_versions() {
7008
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
7009
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
7010
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
7011
7012
0
    int64_t last_scanned_table_id = 0;
7013
0
    bool is_recycled = false; // Is last scanned kv recycled
7014
    // for calculate the total num or bytes of recyled objects
7015
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
7016
0
                                       std::string_view k, std::string_view) {
7017
0
        auto k1 = k;
7018
0
        k1.remove_prefix(1);
7019
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
7020
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7021
0
        decode_key(&k1, &out);
7022
0
        DCHECK_EQ(out.size(), 6) << k;
7023
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
7024
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
7025
0
            metrics_context.total_need_recycle_num +=
7026
0
                    is_recycled; // Version kv of this table has been recycled
7027
0
            return 0;
7028
0
        }
7029
0
        last_scanned_table_id = table_id;
7030
0
        is_recycled = false;
7031
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
7032
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
7033
0
        std::unique_ptr<Transaction> txn;
7034
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7035
0
        if (err != TxnErrorCode::TXN_OK) {
7036
0
            return 0;
7037
0
        }
7038
0
        std::unique_ptr<RangeGetIterator> iter;
7039
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
7040
0
        if (err != TxnErrorCode::TXN_OK) {
7041
0
            return 0;
7042
0
        }
7043
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
7044
0
            return 0;
7045
0
        }
7046
0
        metrics_context.total_need_recycle_num++;
7047
0
        is_recycled = true;
7048
0
        return 0;
7049
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7050
7051
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
7052
0
    metrics_context.report(true);
7053
0
    return ret;
7054
0
}
7055
7056
// Scan and statistics restore jobs that need to be recycled
7057
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
7058
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
7059
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
7060
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
7061
0
    std::string restore_job_key0;
7062
0
    std::string restore_job_key1;
7063
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
7064
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
7065
7066
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7067
7068
    // for calculate the total num or bytes of recyled objects
7069
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
7070
0
        RestoreJobCloudPB restore_job_pb;
7071
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
7072
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
7073
0
            return 0;
7074
0
        }
7075
0
        int64_t expiration =
7076
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
7077
0
        int64_t current_time = ::time(nullptr);
7078
0
        if (current_time < expiration) { // not expired
7079
0
            return 0;
7080
0
        }
7081
0
        metrics_context.total_need_recycle_num++;
7082
0
        if(restore_job_pb.need_recycle_data()) {
7083
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
7084
0
        }
7085
0
        return 0;
7086
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7087
7088
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
7089
0
    metrics_context.report(true);
7090
0
    return ret;
7091
0
}
7092
7093
int InstanceRecycler::classify_rowset_task_by_ref_count(
7094
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
7095
60
    constexpr int MAX_RETRY = 10;
7096
60
    const auto& rowset_meta = task.rowset_meta;
7097
60
    int64_t tablet_id = rowset_meta.tablet_id();
7098
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
7099
60
    std::string_view reference_instance_id = instance_id_;
7100
60
    if (rowset_meta.has_reference_instance_id()) {
7101
5
        reference_instance_id = rowset_meta.reference_instance_id();
7102
5
    }
7103
7104
61
    for (int i = 0; i < MAX_RETRY; ++i) {
7105
61
        std::unique_ptr<Transaction> txn;
7106
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7107
61
        if (err != TxnErrorCode::TXN_OK) {
7108
0
            LOG_WARNING("failed to create txn when classifying rowset task")
7109
0
                    .tag("instance_id", instance_id_)
7110
0
                    .tag("tablet_id", tablet_id)
7111
0
                    .tag("rowset_id", rowset_id)
7112
0
                    .tag("err", err);
7113
0
            return -1;
7114
0
        }
7115
7116
61
        std::string rowset_ref_count_key =
7117
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
7118
61
        task.rowset_ref_count_key = rowset_ref_count_key;
7119
7120
61
        int64_t ref_count = 0;
7121
61
        {
7122
61
            std::string value;
7123
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
7124
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7125
0
                ref_count = 1;
7126
61
            } else if (err != TxnErrorCode::TXN_OK) {
7127
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
7128
0
                        .tag("instance_id", instance_id_)
7129
0
                        .tag("tablet_id", tablet_id)
7130
0
                        .tag("rowset_id", rowset_id)
7131
0
                        .tag("err", err);
7132
0
                return -1;
7133
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
7134
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
7135
0
                        .tag("instance_id", instance_id_)
7136
0
                        .tag("tablet_id", tablet_id)
7137
0
                        .tag("rowset_id", rowset_id)
7138
0
                        .tag("value", hex(value));
7139
0
                return -1;
7140
0
            }
7141
61
        }
7142
7143
61
        if (ref_count > 1) {
7144
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
7145
12
            txn->atomic_add(rowset_ref_count_key, -1);
7146
12
            LOG_INFO("decrease rowset data ref count in classification phase")
7147
12
                    .tag("instance_id", instance_id_)
7148
12
                    .tag("tablet_id", tablet_id)
7149
12
                    .tag("rowset_id", rowset_id)
7150
12
                    .tag("ref_count", ref_count - 1)
7151
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
7152
7153
12
            if (!task.recycle_rowset_key.empty()) {
7154
12
                txn->remove(task.recycle_rowset_key);
7155
12
                LOG_INFO("remove recycle rowset key in classification phase")
7156
12
                        .tag("key", hex(task.recycle_rowset_key));
7157
12
            }
7158
12
            if (!task.non_versioned_rowset_key.empty()) {
7159
12
                txn->remove(task.non_versioned_rowset_key);
7160
12
                LOG_INFO("remove non versioned rowset key in classification phase")
7161
12
                        .tag("key", hex(task.non_versioned_rowset_key));
7162
12
            }
7163
7164
12
            err = txn->commit();
7165
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
7166
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
7167
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
7168
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
7169
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
7170
1
                continue;
7171
11
            } else if (err != TxnErrorCode::TXN_OK) {
7172
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
7173
0
                        .tag("instance_id", instance_id_)
7174
0
                        .tag("tablet_id", tablet_id)
7175
0
                        .tag("rowset_id", rowset_id)
7176
0
                        .tag("err", err);
7177
0
                return -1;
7178
0
            }
7179
11
            return 1; // handled, not added to batch delete
7180
49
        } else {
7181
            // ref_count == 1: Add to batch delete plan without modifying any KV.
7182
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
7183
49
            LOG_INFO("add rowset to batch delete plan")
7184
49
                    .tag("instance_id", instance_id_)
7185
49
                    .tag("tablet_id", tablet_id)
7186
49
                    .tag("rowset_id", rowset_id)
7187
49
                    .tag("resource_id", rowset_meta.resource_id())
7188
49
                    .tag("ref_count", ref_count);
7189
7190
49
            batch_delete_tasks.push_back(std::move(task));
7191
49
            return 0; // added to batch delete
7192
49
        }
7193
61
    }
7194
7195
0
    LOG_WARNING("failed to classify rowset task after retry")
7196
0
            .tag("instance_id", instance_id_)
7197
0
            .tag("tablet_id", tablet_id)
7198
0
            .tag("rowset_id", rowset_id)
7199
0
            .tag("retry", MAX_RETRY);
7200
0
    return -1;
7201
60
}
7202
7203
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
7204
10
    int ret = 0;
7205
49
    for (const auto& task : tasks) {
7206
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
7207
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
7208
7209
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
7210
        // so we don't need to call it again here.
7211
7212
        // Remove all metadata keys in one transaction
7213
49
        std::unique_ptr<Transaction> txn;
7214
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7215
49
        if (err != TxnErrorCode::TXN_OK) {
7216
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
7217
0
                    .tag("instance_id", instance_id_)
7218
0
                    .tag("tablet_id", tablet_id)
7219
0
                    .tag("rowset_id", rowset_id)
7220
0
                    .tag("err", err);
7221
0
            ret = -1;
7222
0
            continue;
7223
0
        }
7224
7225
49
        std::string_view reference_instance_id = instance_id_;
7226
49
        if (task.rowset_meta.has_reference_instance_id()) {
7227
5
            reference_instance_id = task.rowset_meta.reference_instance_id();
7228
5
        }
7229
7230
49
        txn->remove(task.rowset_ref_count_key);
7231
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
7232
49
                .tag("instance_id", instance_id_)
7233
49
                .tag("tablet_id", tablet_id)
7234
49
                .tag("rowset_id", rowset_id)
7235
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
7236
7237
49
        std::string dbm_start_key =
7238
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
7239
49
        std::string dbm_end_key = meta_delete_bitmap_key(
7240
49
                {reference_instance_id, tablet_id, rowset_id,
7241
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
7242
49
        txn->remove(dbm_start_key, dbm_end_key);
7243
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
7244
49
                .tag("instance_id", instance_id_)
7245
49
                .tag("tablet_id", tablet_id)
7246
49
                .tag("rowset_id", rowset_id)
7247
49
                .tag("begin", hex(dbm_start_key))
7248
49
                .tag("end", hex(dbm_end_key));
7249
7250
49
        std::string versioned_dbm_start_key =
7251
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
7252
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
7253
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
7254
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
7255
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
7256
49
                .tag("instance_id", instance_id_)
7257
49
                .tag("tablet_id", tablet_id)
7258
49
                .tag("rowset_id", rowset_id)
7259
49
                .tag("begin", hex(versioned_dbm_start_key))
7260
49
                .tag("end", hex(versioned_dbm_end_key));
7261
7262
        // Remove versioned meta rowset key
7263
49
        if (!task.versioned_rowset_key.empty()) {
7264
49
            std::string versioned_rowset_key_end = task.versioned_rowset_key;
7265
49
            encode_int64(INT64_MAX, &versioned_rowset_key_end);
7266
49
            txn->remove(task.versioned_rowset_key, versioned_rowset_key_end);
7267
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
7268
49
                    .tag("instance_id", instance_id_)
7269
49
                    .tag("tablet_id", tablet_id)
7270
49
                    .tag("rowset_id", rowset_id)
7271
49
                    .tag("begin", hex(task.versioned_rowset_key))
7272
49
                    .tag("end", hex(versioned_rowset_key_end));
7273
49
        }
7274
7275
49
        if (!task.non_versioned_rowset_key.empty()) {
7276
49
            txn->remove(task.non_versioned_rowset_key);
7277
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
7278
49
                    .tag("instance_id", instance_id_)
7279
49
                    .tag("tablet_id", tablet_id)
7280
49
                    .tag("rowset_id", rowset_id)
7281
49
                    .tag("key", hex(task.non_versioned_rowset_key));
7282
49
        }
7283
7284
        // Remove recycle_rowset_key last to ensure retry safety:
7285
        // if cleanup fails, this key remains and triggers next round retry.
7286
49
        if (!task.recycle_rowset_key.empty()) {
7287
49
            txn->remove(task.recycle_rowset_key);
7288
49
            LOG_INFO("remove recycle rowset key in cleanup phase")
7289
49
                    .tag("instance_id", instance_id_)
7290
49
                    .tag("tablet_id", tablet_id)
7291
49
                    .tag("rowset_id", rowset_id)
7292
49
                    .tag("key", hex(task.recycle_rowset_key));
7293
49
        }
7294
7295
49
        err = txn->commit();
7296
49
        if (err != TxnErrorCode::TXN_OK) {
7297
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
7298
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
7299
0
                    .tag("instance_id", instance_id_)
7300
0
                    .tag("tablet_id", tablet_id)
7301
0
                    .tag("rowset_id", rowset_id)
7302
0
                    .tag("err", err);
7303
0
            ret = -1;
7304
0
            continue;
7305
0
        }
7306
7307
49
        LOG_INFO("cleanup rowset metadata success")
7308
49
                .tag("instance_id", instance_id_)
7309
49
                .tag("tablet_id", tablet_id)
7310
49
                .tag("rowset_id", rowset_id);
7311
49
    }
7312
10
    return ret;
7313
10
}
7314
7315
} // namespace doris::cloud