Coverage Report

Created: 2026-05-09 17:16

/root/doris/cloud/src/recycler/recycler.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "recycler/recycler.h"
19
20
#include <brpc/builtin_service.pb.h>
21
#include <brpc/server.h>
22
#include <butil/endpoint.h>
23
#include <butil/strings/string_split.h>
24
#include <bvar/status.h>
25
#include <gen_cpp/cloud.pb.h>
26
#include <gen_cpp/olap_file.pb.h>
27
28
#include <algorithm>
29
#include <atomic>
30
#include <chrono>
31
#include <cstddef>
32
#include <cstdint>
33
#include <cstdlib>
34
#include <deque>
35
#include <functional>
36
#include <initializer_list>
37
#include <memory>
38
#include <numeric>
39
#include <random>
40
#include <string>
41
#include <string_view>
42
#include <thread>
43
#include <unordered_map>
44
#include <utility>
45
#include <variant>
46
47
#include "common/defer.h"
48
#include "common/stopwatch.h"
49
#include "meta-service/meta_service.h"
50
#include "meta-service/meta_service_helper.h"
51
#include "meta-service/meta_service_schema.h"
52
#include "meta-store/blob_message.h"
53
#include "meta-store/meta_reader.h"
54
#include "meta-store/txn_kv.h"
55
#include "meta-store/txn_kv_error.h"
56
#include "meta-store/versioned_value.h"
57
#include "recycler/checker.h"
58
#ifdef ENABLE_HDFS_STORAGE_VAULT
59
#include "recycler/hdfs_accessor.h"
60
#endif
61
#include "recycler/s3_accessor.h"
62
#include "recycler/storage_vault_accessor.h"
63
#ifdef UNIT_TEST
64
#include "../test/mock_accessor.h"
65
#endif
66
#include "common/bvars.h"
67
#include "common/config.h"
68
#include "common/encryption_util.h"
69
#include "common/logging.h"
70
#include "common/simple_thread_pool.h"
71
#include "common/util.h"
72
#include "cpp/sync_point.h"
73
#include "meta-store/codec.h"
74
#include "meta-store/document_message.h"
75
#include "meta-store/keys.h"
76
#include "recycler/recycler_service.h"
77
#include "recycler/sync_executor.h"
78
#include "recycler/util.h"
79
#include "snapshot/snapshot_manager_factory.h"
80
81
namespace doris::cloud {
82
83
using namespace std::chrono;
84
85
namespace {
86
87
0
int64_t packed_file_retry_sleep_ms() {
88
0
    const int64_t min_ms = std::max<int64_t>(0, config::packed_file_txn_retry_sleep_min_ms);
89
0
    const int64_t max_ms = std::max<int64_t>(min_ms, config::packed_file_txn_retry_sleep_max_ms);
90
0
    thread_local std::mt19937_64 gen(std::random_device {}());
91
0
    std::uniform_int_distribution<int64_t> dist(min_ms, max_ms);
92
0
    return dist(gen);
93
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_126packed_file_retry_sleep_msEv
94
95
0
void sleep_for_packed_file_retry() {
96
0
    std::this_thread::sleep_for(std::chrono::milliseconds(packed_file_retry_sleep_ms()));
97
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloud12_GLOBAL__N_127sleep_for_packed_file_retryEv
98
99
} // namespace
100
101
// return 0 for success get a key, 1 for key not found, negative for error
102
0
[[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) {
103
0
    std::unique_ptr<Transaction> txn;
104
0
    TxnErrorCode err = txn_kv->create_txn(&txn);
105
0
    if (err != TxnErrorCode::TXN_OK) {
106
0
        return -1;
107
0
    }
108
0
    switch (txn->get(key, &val, true)) {
109
0
    case TxnErrorCode::TXN_OK:
110
0
        return 0;
111
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
112
0
        return 1;
113
0
    default:
114
0
        return -1;
115
0
    };
116
0
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
Unexecuted instantiation: recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEERNSt7__cxx1112basic_stringIcS5_SaIcEEE
117
118
// 0 for success, negative for error
119
static int txn_get(TxnKv* txn_kv, std::string_view begin, std::string_view end,
120
337
                   std::unique_ptr<RangeGetIterator>& it) {
121
337
    std::unique_ptr<Transaction> txn;
122
337
    TxnErrorCode err = txn_kv->create_txn(&txn);
123
337
    if (err != TxnErrorCode::TXN_OK) {
124
0
        return -1;
125
0
    }
126
337
    switch (txn->get(begin, end, &it, true)) {
127
337
    case TxnErrorCode::TXN_OK:
128
337
        return 0;
129
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
130
0
        return 1;
131
0
    default:
132
0
        return -1;
133
337
    };
134
0
}
recycler.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
120
31
                   std::unique_ptr<RangeGetIterator>& it) {
121
31
    std::unique_ptr<Transaction> txn;
122
31
    TxnErrorCode err = txn_kv->create_txn(&txn);
123
31
    if (err != TxnErrorCode::TXN_OK) {
124
0
        return -1;
125
0
    }
126
31
    switch (txn->get(begin, end, &it, true)) {
127
31
    case TxnErrorCode::TXN_OK:
128
31
        return 0;
129
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
130
0
        return 1;
131
0
    default:
132
0
        return -1;
133
31
    };
134
0
}
recycler_test.cpp:_ZN5doris5cloudL7txn_getEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_RSt10unique_ptrINS0_16RangeGetIteratorESt14default_deleteIS8_EE
Line
Count
Source
120
306
                   std::unique_ptr<RangeGetIterator>& it) {
121
306
    std::unique_ptr<Transaction> txn;
122
306
    TxnErrorCode err = txn_kv->create_txn(&txn);
123
306
    if (err != TxnErrorCode::TXN_OK) {
124
0
        return -1;
125
0
    }
126
306
    switch (txn->get(begin, end, &it, true)) {
127
306
    case TxnErrorCode::TXN_OK:
128
306
        return 0;
129
0
    case TxnErrorCode::TXN_KEY_NOT_FOUND:
130
0
        return 1;
131
0
    default:
132
0
        return -1;
133
306
    };
134
0
}
135
136
// return 0 for success otherwise error
137
6
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
138
6
    std::unique_ptr<Transaction> txn;
139
6
    TxnErrorCode err = txn_kv->create_txn(&txn);
140
6
    if (err != TxnErrorCode::TXN_OK) {
141
0
        return -1;
142
0
    }
143
10
    for (auto k : keys) {
144
10
        txn->remove(k);
145
10
    }
146
6
    switch (txn->commit()) {
147
6
    case TxnErrorCode::TXN_OK:
148
6
        return 0;
149
0
    case TxnErrorCode::TXN_CONFLICT:
150
0
        return -1;
151
0
    default:
152
0
        return -1;
153
6
    }
154
6
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
137
1
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
138
1
    std::unique_ptr<Transaction> txn;
139
1
    TxnErrorCode err = txn_kv->create_txn(&txn);
140
1
    if (err != TxnErrorCode::TXN_OK) {
141
0
        return -1;
142
0
    }
143
1
    for (auto k : keys) {
144
1
        txn->remove(k);
145
1
    }
146
1
    switch (txn->commit()) {
147
1
    case TxnErrorCode::TXN_OK:
148
1
        return 0;
149
0
    case TxnErrorCode::TXN_CONFLICT:
150
0
        return -1;
151
0
    default:
152
0
        return -1;
153
1
    }
154
1
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorISt17basic_string_viewIcSt11char_traitsIcEESaIS7_EE
Line
Count
Source
137
5
static int txn_remove(TxnKv* txn_kv, std::vector<std::string_view> keys) {
138
5
    std::unique_ptr<Transaction> txn;
139
5
    TxnErrorCode err = txn_kv->create_txn(&txn);
140
5
    if (err != TxnErrorCode::TXN_OK) {
141
0
        return -1;
142
0
    }
143
9
    for (auto k : keys) {
144
9
        txn->remove(k);
145
9
    }
146
5
    switch (txn->commit()) {
147
5
    case TxnErrorCode::TXN_OK:
148
5
        return 0;
149
0
    case TxnErrorCode::TXN_CONFLICT:
150
0
        return -1;
151
0
    default:
152
0
        return -1;
153
5
    }
154
5
}
155
156
// return 0 for success otherwise error
157
125
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
158
125
    std::unique_ptr<Transaction> txn;
159
125
    TxnErrorCode err = txn_kv->create_txn(&txn);
160
125
    if (err != TxnErrorCode::TXN_OK) {
161
0
        return -1;
162
0
    }
163
105k
    for (auto& k : keys) {
164
105k
        txn->remove(k);
165
105k
    }
166
125
    switch (txn->commit()) {
167
125
    case TxnErrorCode::TXN_OK:
168
125
        return 0;
169
0
    case TxnErrorCode::TXN_CONFLICT:
170
0
        return -1;
171
0
    default:
172
0
        return -1;
173
125
    }
174
125
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
157
33
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
158
33
    std::unique_ptr<Transaction> txn;
159
33
    TxnErrorCode err = txn_kv->create_txn(&txn);
160
33
    if (err != TxnErrorCode::TXN_OK) {
161
0
        return -1;
162
0
    }
163
33
    for (auto& k : keys) {
164
16
        txn->remove(k);
165
16
    }
166
33
    switch (txn->commit()) {
167
33
    case TxnErrorCode::TXN_OK:
168
33
        return 0;
169
0
    case TxnErrorCode::TXN_CONFLICT:
170
0
        return -1;
171
0
    default:
172
0
        return -1;
173
33
    }
174
33
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt6vectorINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESaIS9_EE
Line
Count
Source
157
92
static int txn_remove(TxnKv* txn_kv, std::vector<std::string> keys) {
158
92
    std::unique_ptr<Transaction> txn;
159
92
    TxnErrorCode err = txn_kv->create_txn(&txn);
160
92
    if (err != TxnErrorCode::TXN_OK) {
161
0
        return -1;
162
0
    }
163
105k
    for (auto& k : keys) {
164
105k
        txn->remove(k);
165
105k
    }
166
92
    switch (txn->commit()) {
167
92
    case TxnErrorCode::TXN_OK:
168
92
        return 0;
169
0
    case TxnErrorCode::TXN_CONFLICT:
170
0
        return -1;
171
0
    default:
172
0
        return -1;
173
92
    }
174
92
}
175
176
// return 0 for success otherwise error
177
[[maybe_unused]] static int txn_remove(TxnKv* txn_kv, std::string_view begin,
178
106k
                                       std::string_view end) {
179
106k
    std::unique_ptr<Transaction> txn;
180
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
181
106k
    if (err != TxnErrorCode::TXN_OK) {
182
0
        return -1;
183
0
    }
184
106k
    txn->remove(begin, end);
185
106k
    switch (txn->commit()) {
186
106k
    case TxnErrorCode::TXN_OK:
187
106k
        return 0;
188
0
    case TxnErrorCode::TXN_CONFLICT:
189
0
        return -1;
190
0
    default:
191
0
        return -1;
192
106k
    }
193
106k
}
recycler.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
178
16
                                       std::string_view end) {
179
16
    std::unique_ptr<Transaction> txn;
180
16
    TxnErrorCode err = txn_kv->create_txn(&txn);
181
16
    if (err != TxnErrorCode::TXN_OK) {
182
0
        return -1;
183
0
    }
184
16
    txn->remove(begin, end);
185
16
    switch (txn->commit()) {
186
16
    case TxnErrorCode::TXN_OK:
187
16
        return 0;
188
0
    case TxnErrorCode::TXN_CONFLICT:
189
0
        return -1;
190
0
    default:
191
0
        return -1;
192
16
    }
193
16
}
recycler_test.cpp:_ZN5doris5cloudL10txn_removeEPNS0_5TxnKvESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
178
106k
                                       std::string_view end) {
179
106k
    std::unique_ptr<Transaction> txn;
180
106k
    TxnErrorCode err = txn_kv->create_txn(&txn);
181
106k
    if (err != TxnErrorCode::TXN_OK) {
182
0
        return -1;
183
0
    }
184
106k
    txn->remove(begin, end);
185
106k
    switch (txn->commit()) {
186
106k
    case TxnErrorCode::TXN_OK:
187
106k
        return 0;
188
0
    case TxnErrorCode::TXN_CONFLICT:
189
0
        return -1;
190
0
    default:
191
0
        return -1;
192
106k
    }
193
106k
}
194
195
void scan_restore_job_rowset(
196
        Transaction* txn, const std::string& instance_id, int64_t tablet_id, MetaServiceCode& code,
197
        std::string& msg,
198
        std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>>* restore_job_rs_metas);
199
200
static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name,
201
                                      int64_t num_scanned, int64_t num_recycled,
202
52
                                      int64_t start_time) {
203
52
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
204
0
        int64_t cost =
205
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
206
0
        if (cost > config::recycle_task_threshold_seconds) {
207
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
208
0
                    .tag("instance_id", instance_id)
209
0
                    .tag("task", task_name)
210
0
                    .tag("num_scanned", num_scanned)
211
0
                    .tag("num_recycled", num_recycled);
212
0
        }
213
0
    }
214
52
    return;
215
52
}
recycler.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
202
2
                                      int64_t start_time) {
203
2
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
204
0
        int64_t cost =
205
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
206
0
        if (cost > config::recycle_task_threshold_seconds) {
207
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
208
0
                    .tag("instance_id", instance_id)
209
0
                    .tag("task", task_name)
210
0
                    .tag("num_scanned", num_scanned)
211
0
                    .tag("num_recycled", num_recycled);
212
0
        }
213
0
    }
214
2
    return;
215
2
}
recycler_test.cpp:_ZN5doris5cloudL18check_recycle_taskERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_lll
Line
Count
Source
202
50
                                      int64_t start_time) {
203
50
    if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] {
204
0
        int64_t cost =
205
0
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
206
0
        if (cost > config::recycle_task_threshold_seconds) {
207
0
            LOG_WARNING("recycle task cost too much time cost={}s", cost)
208
0
                    .tag("instance_id", instance_id)
209
0
                    .tag("task", task_name)
210
0
                    .tag("num_scanned", num_scanned)
211
0
                    .tag("num_recycled", num_recycled);
212
0
        }
213
0
    }
214
50
    return;
215
50
}
216
217
4
Recycler::Recycler(std::shared_ptr<TxnKv> txn_kv) : txn_kv_(std::move(txn_kv)) {
218
4
    ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port);
219
220
4
    auto s3_producer_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
221
4
                                                               "s3_producer_pool");
222
4
    s3_producer_pool->start();
223
4
    auto recycle_tablet_pool = std::make_shared<SimpleThreadPool>(config::recycle_pool_parallelism,
224
4
                                                                  "recycle_tablet_pool");
225
4
    recycle_tablet_pool->start();
226
4
    auto group_recycle_function_pool = std::make_shared<SimpleThreadPool>(
227
4
            config::recycle_pool_parallelism, "group_recycle_function_pool");
228
4
    group_recycle_function_pool->start();
229
4
    _thread_pool_group =
230
4
            RecyclerThreadPoolGroup(std::move(s3_producer_pool), std::move(recycle_tablet_pool),
231
4
                                    std::move(group_recycle_function_pool));
232
233
4
    auto resource_mgr = std::make_shared<ResourceManager>(txn_kv_);
234
4
    txn_lazy_committer_ = std::make_shared<TxnLazyCommitter>(txn_kv_, std::move(resource_mgr));
235
4
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
236
4
}
237
238
4
Recycler::~Recycler() {
239
4
    if (!stopped()) {
240
0
        stop();
241
0
    }
242
4
}
243
244
4
void Recycler::instance_scanner_callback() {
245
    // sleep 60 seconds before scheduling for the launch procedure to complete:
246
    // some bad hdfs connection may cause some log to stdout stderr
247
    // which may pollute .out file and affect the script to check success
248
4
    std::this_thread::sleep_for(
249
4
            std::chrono::seconds(config::recycler_sleep_before_scheduling_seconds));
250
8
    while (!stopped()) {
251
4
        std::vector<InstanceInfoPB> instances;
252
4
        get_all_instances(txn_kv_.get(), instances);
253
        // TODO(plat1ko): delete job recycle kv of non-existent instances
254
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
255
4
            std::stringstream ss;
256
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
257
4
            return ss.str();
258
4
        }();
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_0clB5cxx11Ev
Line
Count
Source
254
4
        LOG(INFO) << "Recycler get instances: " << [&instances] {
255
4
            std::stringstream ss;
256
30
            for (auto& i : instances) ss << ' ' << i.instance_id();
257
4
            return ss.str();
258
4
        }();
259
4
        if (!instances.empty()) {
260
            // enqueue instances
261
3
            std::lock_guard lock(mtx_);
262
30
            for (auto& instance : instances) {
263
30
                if (instance_filter_.filter_out(instance.instance_id())) continue;
264
30
                auto [_, success] = pending_instance_set_.insert(instance.instance_id());
265
                // skip instance already in pending queue
266
30
                if (success) {
267
30
                    pending_instance_queue_.push_back(std::move(instance));
268
30
                }
269
30
            }
270
3
            pending_instance_cond_.notify_all();
271
3
        }
272
4
        {
273
4
            std::unique_lock lock(mtx_);
274
4
            notifier_.wait_for(lock, std::chrono::seconds(config::recycle_interval_seconds),
275
7
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler25instance_scanner_callbackEvENK3$_1clEv
Line
Count
Source
275
7
                               [&]() { return stopped(); });
276
4
        }
277
4
    }
278
4
}
279
280
8
void Recycler::recycle_callback() {
281
38
    while (!stopped()) {
282
36
        InstanceInfoPB instance;
283
36
        {
284
36
            std::unique_lock lock(mtx_);
285
36
            pending_instance_cond_.wait(
286
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler16recycle_callbackEvENK3$_0clEv
Line
Count
Source
286
48
                    lock, [&]() { return !pending_instance_queue_.empty() || stopped(); });
287
36
            if (stopped()) {
288
6
                return;
289
6
            }
290
30
            instance = std::move(pending_instance_queue_.front());
291
30
            pending_instance_queue_.pop_front();
292
30
            pending_instance_set_.erase(instance.instance_id());
293
30
        }
294
0
        auto& instance_id = instance.instance_id();
295
30
        {
296
30
            std::lock_guard lock(mtx_);
297
            // skip instance in recycling
298
30
            if (recycling_instance_map_.count(instance_id)) continue;
299
30
        }
300
30
        auto instance_recycler = std::make_shared<InstanceRecycler>(
301
30
                txn_kv_, instance, _thread_pool_group, txn_lazy_committer_);
302
303
30
        if (int r = instance_recycler->init(); r != 0) {
304
0
            LOG(WARNING) << "failed to init instance recycler, instance_id=" << instance_id
305
0
                         << " ret=" << r;
306
0
            continue;
307
0
        }
308
30
        std::string recycle_job_key;
309
30
        job_recycle_key({instance_id}, &recycle_job_key);
310
30
        int ret = prepare_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id,
311
30
                                               ip_port_, config::recycle_interval_seconds * 1000);
312
30
        if (ret != 0) { // Prepare failed
313
19
            LOG(WARNING) << "failed to prepare recycle_job, instance_id=" << instance_id
314
19
                         << " ret=" << ret;
315
19
            continue;
316
19
        } else {
317
11
            std::lock_guard lock(mtx_);
318
11
            recycling_instance_map_.emplace(instance_id, instance_recycler);
319
11
        }
320
11
        if (stopped()) return;
321
11
        LOG_WARNING("begin to recycle instance").tag("instance_id", instance_id);
322
11
        auto ctime_ms = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
323
11
        g_bvar_recycler_instance_recycle_start_ts.put({instance_id}, ctime_ms);
324
11
        g_bvar_recycler_instance_recycle_task_status.put({"submitted"}, 1);
325
11
        ret = instance_recycler->do_recycle();
326
        // If instance recycler has been aborted, don't finish this job
327
328
11
        if (!instance_recycler->stopped()) {
329
10
            finish_instance_recycle_job(txn_kv_.get(), recycle_job_key, instance_id, ip_port_,
330
10
                                        ret == 0, ctime_ms);
331
10
        }
332
11
        if (instance_recycler->stopped() || ret != 0) {
333
0
            g_bvar_recycler_instance_recycle_task_status.put({"error"}, 1);
334
0
        }
335
11
        {
336
11
            std::lock_guard lock(mtx_);
337
11
            recycling_instance_map_.erase(instance_id);
338
11
        }
339
340
11
        auto now = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
341
11
        auto elpased_ms = now - ctime_ms;
342
11
        g_bvar_recycler_instance_recycle_end_ts.put({instance_id}, now);
343
11
        g_bvar_recycler_instance_last_round_recycle_duration.put({instance_id}, elpased_ms);
344
11
        g_bvar_recycler_instance_next_ts.put({instance_id},
345
11
                                             now + config::recycle_interval_seconds * 1000);
346
11
        g_bvar_recycler_instance_recycle_task_status.put({"completed"}, 1);
347
11
        LOG(INFO) << "recycle instance done, "
348
11
                  << "instance_id=" << instance_id << " ret=" << ret << " ctime_ms: " << ctime_ms
349
11
                  << " now: " << now;
350
351
11
        g_bvar_recycler_instance_recycle_last_success_ts.put({instance_id}, now);
352
353
11
        LOG_WARNING("finish recycle instance")
354
11
                .tag("instance_id", instance_id)
355
11
                .tag("cost_ms", elpased_ms);
356
11
    }
357
8
}
358
359
4
void Recycler::lease_recycle_jobs() {
360
54
    while (!stopped()) {
361
50
        std::vector<std::string> instances;
362
50
        instances.reserve(recycling_instance_map_.size());
363
50
        {
364
50
            std::lock_guard lock(mtx_);
365
50
            for (auto& [id, _] : recycling_instance_map_) {
366
30
                instances.push_back(id);
367
30
            }
368
50
        }
369
50
        for (auto& i : instances) {
370
30
            std::string recycle_job_key;
371
30
            job_recycle_key({i}, &recycle_job_key);
372
30
            int ret = lease_instance_recycle_job(txn_kv_.get(), recycle_job_key, i, ip_port_);
373
30
            if (ret == 1) {
374
0
                std::lock_guard lock(mtx_);
375
0
                if (auto it = recycling_instance_map_.find(i);
376
0
                    it != recycling_instance_map_.end()) {
377
0
                    it->second->stop();
378
0
                }
379
0
            }
380
30
        }
381
50
        {
382
50
            std::unique_lock lock(mtx_);
383
50
            notifier_.wait_for(lock,
384
50
                               std::chrono::milliseconds(config::recycle_job_lease_expired_ms / 3),
385
100
                               [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler18lease_recycle_jobsEvENK3$_0clEv
Line
Count
Source
385
100
                               [&]() { return stopped(); });
386
50
        }
387
50
    }
388
4
}
389
390
4
void Recycler::check_recycle_tasks() {
391
7
    while (!stopped()) {
392
3
        std::unordered_map<std::string, std::shared_ptr<InstanceRecycler>> recycling_instance_map;
393
3
        {
394
3
            std::lock_guard lock(mtx_);
395
3
            recycling_instance_map = recycling_instance_map_;
396
3
        }
397
3
        for (auto& entry : recycling_instance_map) {
398
0
            entry.second->check_recycle_tasks();
399
0
        }
400
401
3
        std::unique_lock lock(mtx_);
402
3
        notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds),
403
6
                           [&]() { return stopped(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler19check_recycle_tasksEvENK3$_0clEv
Line
Count
Source
403
6
                           [&]() { return stopped(); });
404
3
    }
405
4
}
406
407
4
int Recycler::start(brpc::Server* server) {
408
4
    instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist);
409
4
    g_bvar_recycler_task_max_concurrency.set_value(config::recycle_concurrency);
410
4
    S3Environment::getInstance();
411
412
4
    if (config::enable_checker) {
413
0
        checker_ = std::make_unique<Checker>(txn_kv_);
414
0
        int ret = checker_->start();
415
0
        std::string msg;
416
0
        if (ret != 0) {
417
0
            msg = "failed to start checker";
418
0
            LOG(ERROR) << msg;
419
0
            std::cerr << msg << std::endl;
420
0
            return ret;
421
0
        }
422
0
        msg = "checker started";
423
0
        LOG(INFO) << msg;
424
0
        std::cout << msg << std::endl;
425
0
    }
426
427
4
    if (server) {
428
        // Add service
429
1
        auto recycler_service =
430
1
                new RecyclerServiceImpl(txn_kv_, this, checker_.get(), txn_lazy_committer_);
431
1
        server->AddService(recycler_service, brpc::SERVER_OWNS_SERVICE);
432
1
    }
433
434
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_0clEv
Line
Count
Source
434
4
    workers_.emplace_back([this] { instance_scanner_callback(); });
435
12
    for (int i = 0; i < config::recycle_concurrency; ++i) {
436
8
        workers_.emplace_back([this] { recycle_callback(); });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud8Recycler5startEPN4brpc6ServerEENK3$_1clEv
Line
Count
Source
436
8
        workers_.emplace_back([this] { recycle_callback(); });
437
8
    }
438
439
4
    workers_.emplace_back(std::mem_fn(&Recycler::lease_recycle_jobs), this);
440
4
    workers_.emplace_back(std::mem_fn(&Recycler::check_recycle_tasks), this);
441
442
4
    if (config::enable_snapshot_data_migrator) {
443
0
        snapshot_data_migrator_ = std::make_shared<SnapshotDataMigrator>(txn_kv_);
444
0
        int ret = snapshot_data_migrator_->start();
445
0
        if (ret != 0) {
446
0
            LOG(ERROR) << "failed to start snapshot data migrator";
447
0
            return ret;
448
0
        }
449
0
        LOG(INFO) << "snapshot data migrator started";
450
0
    }
451
452
4
    if (config::enable_snapshot_chain_compactor) {
453
0
        snapshot_chain_compactor_ = std::make_shared<SnapshotChainCompactor>(txn_kv_);
454
0
        int ret = snapshot_chain_compactor_->start();
455
0
        if (ret != 0) {
456
0
            LOG(ERROR) << "failed to start snapshot chain compactor";
457
0
            return ret;
458
0
        }
459
0
        LOG(INFO) << "snapshot chain compactor started";
460
0
    }
461
462
4
    return 0;
463
4
}
464
465
4
void Recycler::stop() {
466
4
    stopped_ = true;
467
4
    notifier_.notify_all();
468
4
    pending_instance_cond_.notify_all();
469
4
    {
470
4
        std::lock_guard lock(mtx_);
471
4
        for (auto& [_, recycler] : recycling_instance_map_) {
472
0
            recycler->stop();
473
0
        }
474
4
    }
475
20
    for (auto& w : workers_) {
476
20
        if (w.joinable()) w.join();
477
20
    }
478
4
    if (checker_) {
479
0
        checker_->stop();
480
0
    }
481
4
    if (snapshot_data_migrator_) {
482
0
        snapshot_data_migrator_->stop();
483
0
    }
484
4
    if (snapshot_chain_compactor_) {
485
0
        snapshot_chain_compactor_->stop();
486
0
    }
487
4
}
488
489
class InstanceRecycler::InvertedIndexIdCache {
490
public:
491
    InvertedIndexIdCache(std::string instance_id, std::shared_ptr<TxnKv> txn_kv)
492
132
            : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) {}
493
494
    // Return 0 if success, 1 if schema kv not found, negative for error
495
    // For the same index_id, schema_version, res, since `get` is not completely atomic
496
    // one thread has not finished inserting, and another thread has not get the index_id and schema_version,
497
    // resulting in repeated addition and inaccuracy.
498
    // however, this approach can reduce the lock range and sacrifice a bit of meta repeated get to improve concurrency performance.
499
    // repeated addition does not affect correctness.
500
28.4k
    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
501
28.4k
        {
502
28.4k
            std::lock_guard lock(mtx_);
503
28.4k
            if (schemas_without_inverted_index_.count({index_id, schema_version})) {
504
4.07k
                return 0;
505
4.07k
            }
506
24.3k
            if (auto it = inverted_index_id_map_.find({index_id, schema_version});
507
24.3k
                it != inverted_index_id_map_.end()) {
508
16.5k
                res = it->second;
509
16.5k
                return 0;
510
16.5k
            }
511
24.3k
        }
512
        // Get schema from kv
513
        // TODO(plat1ko): Single flight
514
7.82k
        std::unique_ptr<Transaction> txn;
515
7.82k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
516
7.82k
        if (err != TxnErrorCode::TXN_OK) {
517
0
            LOG(WARNING) << "failed to create txn, err=" << err;
518
0
            return -1;
519
0
        }
520
7.82k
        auto schema_key = meta_schema_key({instance_id_, index_id, schema_version});
521
7.82k
        ValueBuf val_buf;
522
7.82k
        err = cloud::blob_get(txn.get(), schema_key, &val_buf);
523
7.82k
        if (err != TxnErrorCode::TXN_OK) {
524
504
            LOG(WARNING) << "failed to get schema, err=" << err;
525
504
            return static_cast<int>(err);
526
504
        }
527
7.31k
        doris::TabletSchemaCloudPB schema;
528
7.31k
        if (!parse_schema_value(val_buf, &schema)) {
529
0
            LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
530
0
            return -1;
531
0
        }
532
7.31k
        if (schema.index_size() > 0) {
533
5.69k
            InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
534
5.69k
            if (schema.has_inverted_index_storage_format()) {
535
5.68k
                index_format = schema.inverted_index_storage_format();
536
5.68k
            }
537
5.69k
            res.first = index_format;
538
5.69k
            res.second.reserve(schema.index_size());
539
13.4k
            for (auto& i : schema.index()) {
540
13.4k
                if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
541
13.4k
                    res.second.push_back(std::make_pair(i.index_id(), i.index_suffix_name()));
542
13.4k
                }
543
13.4k
            }
544
5.69k
        }
545
7.31k
        insert(index_id, schema_version, res);
546
7.31k
        return 0;
547
7.31k
    }
548
549
    // Empty `ids` means this schema has no inverted index
550
7.31k
    void insert(int64_t index_id, int32_t schema_version, const InvertedIndexInfo& index_info) {
551
7.31k
        if (index_info.second.empty()) {
552
1.62k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
553
1.62k
            std::lock_guard lock(mtx_);
554
1.62k
            schemas_without_inverted_index_.emplace(index_id, schema_version);
555
5.69k
        } else {
556
5.69k
            TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
557
5.69k
            std::lock_guard lock(mtx_);
558
5.69k
            inverted_index_id_map_.try_emplace({index_id, schema_version}, index_info);
559
5.69k
        }
560
7.31k
    }
561
562
private:
563
    std::string instance_id_;
564
    std::shared_ptr<TxnKv> txn_kv_;
565
566
    std::mutex mtx_;
567
    using Key = std::pair<int64_t, int32_t>; // <index_id, schema_version>
568
    struct HashOfKey {
569
60.0k
        size_t operator()(const Key& key) const {
570
60.0k
            size_t seed = 0;
571
60.0k
            seed = std::hash<int64_t> {}(key.first);
572
60.0k
            seed = std::hash<int32_t> {}(key.second);
573
60.0k
            return seed;
574
60.0k
        }
575
    };
576
    // <index_id, schema_version> -> inverted_index_ids
577
    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> inverted_index_id_map_;
578
    // Store <index_id, schema_version> of schema which doesn't have inverted index
579
    std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
580
};
581
582
InstanceRecycler::InstanceRecycler(std::shared_ptr<TxnKv> txn_kv, const InstanceInfoPB& instance,
583
                                   RecyclerThreadPoolGroup thread_pool_group,
584
                                   std::shared_ptr<TxnLazyCommitter> txn_lazy_committer)
585
        : txn_kv_(std::move(txn_kv)),
586
          instance_id_(instance.instance_id()),
587
          instance_info_(instance),
588
          inverted_index_id_cache_(std::make_unique<InvertedIndexIdCache>(instance_id_, txn_kv_)),
589
          _thread_pool_group(std::move(thread_pool_group)),
590
          txn_lazy_committer_(std::move(txn_lazy_committer)),
591
          delete_bitmap_lock_white_list_(std::make_shared<DeleteBitmapLockWhiteList>()),
592
132
          resource_mgr_(std::make_shared<ResourceManager>(txn_kv_)) {
593
132
    delete_bitmap_lock_white_list_->init();
594
132
    resource_mgr_->init();
595
596
132
    snapshot_manager_ = create_snapshot_manager(txn_kv_);
597
598
    // Since the recycler's resource manager could not be notified when instance info changes,
599
    // we need to refresh the instance info here to ensure the resource manager has the latest info.
600
132
    txn_lazy_committer_->resource_manager()->refresh_instance(instance_id_, instance);
601
132
};
602
603
132
InstanceRecycler::~InstanceRecycler() = default;
604
605
116
int InstanceRecycler::init_obj_store_accessors() {
606
116
    for (const auto& obj_info : instance_info_.obj_info()) {
607
76
#ifdef UNIT_TEST
608
76
        auto accessor = std::make_shared<MockAccessor>();
609
#else
610
        auto s3_conf = S3Conf::from_obj_store_info(obj_info);
611
        if (!s3_conf) {
612
            LOG(WARNING) << "failed to init object accessor, instance_id=" << instance_id_;
613
            return -1;
614
        }
615
616
        std::shared_ptr<S3Accessor> accessor;
617
        int ret = S3Accessor::create(std::move(*s3_conf), &accessor);
618
        if (ret != 0) {
619
            LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
620
                         << " resource_id=" << obj_info.id();
621
            return ret;
622
        }
623
#endif
624
76
        accessor_map_.emplace(obj_info.id(), std::move(accessor));
625
76
    }
626
627
116
    return 0;
628
116
}
629
630
116
int InstanceRecycler::init_storage_vault_accessors() {
631
116
    if (instance_info_.resource_ids().empty()) {
632
109
        return 0;
633
109
    }
634
635
7
    FullRangeGetOptions opts(txn_kv_);
636
7
    opts.prefetch = true;
637
7
    auto it = txn_kv_->full_range_get(storage_vault_key({instance_id_, ""}),
638
7
                                      storage_vault_key({instance_id_, "\xff"}), std::move(opts));
639
640
25
    for (auto kv = it->next(); kv.has_value(); kv = it->next()) {
641
18
        auto [k, v] = *kv;
642
18
        StorageVaultPB vault;
643
18
        if (!vault.ParseFromArray(v.data(), v.size())) {
644
0
            LOG(WARNING) << "malformed storage vault, unable to deserialize key=" << hex(k);
645
0
            return -1;
646
0
        }
647
18
        std::string recycler_storage_vault_white_list = accumulate(
648
18
                config::recycler_storage_vault_white_list.begin(),
649
18
                config::recycler_storage_vault_white_list.end(), std::string(),
650
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28init_storage_vault_accessorsEvENK3$_0clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES8_
Line
Count
Source
650
24
                [](std::string a, std::string b) { return a + (a.empty() ? "" : ",") + b; });
651
18
        LOG_INFO("config::recycler_storage_vault_white_list")
652
18
                .tag("", recycler_storage_vault_white_list);
653
18
        if (!config::recycler_storage_vault_white_list.empty()) {
654
8
            if (auto it = std::find(config::recycler_storage_vault_white_list.begin(),
655
8
                                    config::recycler_storage_vault_white_list.end(), vault.name());
656
8
                it == config::recycler_storage_vault_white_list.end()) {
657
2
                LOG_WARNING(
658
2
                        "failed to init accessor for vault because this vault is not in "
659
2
                        "config::recycler_storage_vault_white_list. ")
660
2
                        .tag(" vault name:", vault.name())
661
2
                        .tag(" config::recycler_storage_vault_white_list:",
662
2
                             recycler_storage_vault_white_list);
663
2
                continue;
664
2
            }
665
8
        }
666
16
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
667
16
                                 &accessor_map_, &vault);
668
16
        if (vault.has_hdfs_info()) {
669
9
#ifdef ENABLE_HDFS_STORAGE_VAULT
670
9
            auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
671
9
            int ret = accessor->init();
672
9
            if (ret != 0) {
673
4
                LOG(WARNING) << "failed to init hdfs accessor. instance_id=" << instance_id_
674
4
                             << " resource_id=" << vault.id() << " name=" << vault.name()
675
4
                             << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
676
4
                continue;
677
4
            }
678
5
            LOG(INFO) << "succeed to init hdfs accessor. instance_id=" << instance_id_
679
5
                      << " resource_id=" << vault.id() << " name=" << vault.name()
680
5
                      << " hdfs_vault=" << vault.hdfs_info().ShortDebugString();
681
5
            accessor_map_.emplace(vault.id(), std::move(accessor));
682
#else
683
            LOG(ERROR) << "HDFS is disabled (via the ENABLE_HDFS_STORAGE_VAULT build option), "
684
                       << "but HDFS storage vaults were detected";
685
#endif
686
7
        } else if (vault.has_obj_info()) {
687
7
            auto s3_conf = S3Conf::from_obj_store_info(vault.obj_info());
688
7
            if (!s3_conf) {
689
1
                LOG(WARNING) << "failed to init object accessor, invalid conf, instance_id="
690
1
                             << instance_id_ << " s3_vault=" << vault.obj_info().ShortDebugString();
691
1
                continue;
692
1
            }
693
694
6
            std::shared_ptr<S3Accessor> accessor;
695
6
            int ret = S3Accessor::create(*s3_conf, &accessor);
696
6
            if (ret != 0) {
697
0
                LOG(WARNING) << "failed to init s3 accessor. instance_id=" << instance_id_
698
0
                             << " resource_id=" << vault.id() << " name=" << vault.name()
699
0
                             << " ret=" << ret
700
0
                             << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
701
0
                continue;
702
0
            }
703
6
            LOG(INFO) << "succeed to init s3 accessor. instance_id=" << instance_id_
704
6
                      << " resource_id=" << vault.id() << " name=" << vault.name() << " ret=" << ret
705
6
                      << " s3_vault=" << encryt_sk(vault.obj_info().ShortDebugString());
706
6
            accessor_map_.emplace(vault.id(), std::move(accessor));
707
6
        }
708
16
    }
709
710
7
    if (!it->is_valid()) {
711
0
        LOG_WARNING("failed to get storage vault kv");
712
0
        return -1;
713
0
    }
714
715
7
    if (accessor_map_.empty()) {
716
1
        LOG(WARNING) << "no accessors for instance=" << instance_id_;
717
1
        return -2;
718
1
    }
719
6
    LOG_INFO("finish init instance recycler number_accessors={} instance=", accessor_map_.size(),
720
6
             instance_id_);
721
722
6
    return 0;
723
7
}
724
725
116
int InstanceRecycler::init() {
726
116
    int ret = init_obj_store_accessors();
727
116
    if (ret != 0) {
728
0
        return ret;
729
0
    }
730
731
116
    return init_storage_vault_accessors();
732
116
}
733
734
template <typename... Func>
735
120
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
120
    return [funcs...]() {
737
120
        return [](std::initializer_list<int> ret_vals) {
738
120
            int i = 0;
739
140
            for (int ret : ret_vals) {
740
140
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
140
            }
744
120
            return i;
745
120
        }({funcs()...});
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
20
            for (int ret : ret_vals) {
740
20
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
20
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESC_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
20
            for (int ret : ret_vals) {
740
20
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
20
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
recycler_test.cpp:_ZZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEvENKUlSt16initializer_listIiEE_clESB_
Line
Count
Source
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
0
                    i = ret;
742
0
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
120
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
recycler_test.cpp:_ZZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_ENKUlvE_clEv
Line
Count
Source
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
120
}
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Unexecuted instantiation: recycler.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_2EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_3EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_4ZNS2_10do_recycleEvE3$_5EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_6EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_7EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_8EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE3$_9ZNS2_10do_recycleEvE4$_10EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_11EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_12EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_13EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_14EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
recycler_test.cpp:_ZN5doris5cloud12task_wrapperIJZNS0_16InstanceRecycler10do_recycleEvE4$_15EEESt8functionIFivEEDpT_
Line
Count
Source
735
10
auto task_wrapper(Func... funcs) -> std::function<int()> {
736
10
    return [funcs...]() {
737
10
        return [](std::initializer_list<int> ret_vals) {
738
10
            int i = 0;
739
10
            for (int ret : ret_vals) {
740
10
                if (ret != 0) {
741
10
                    i = ret;
742
10
                }
743
10
            }
744
10
            return i;
745
10
        }({funcs()...});
746
10
    };
747
10
}
748
749
10
int InstanceRecycler::do_recycle() {
750
10
    TEST_SYNC_POINT("InstanceRecycler.do_recycle");
751
10
    tablet_metrics_context_.reset();
752
10
    segment_metrics_context_.reset();
753
10
    DORIS_CLOUD_DEFER {
754
10
        tablet_metrics_context_.finish_report();
755
10
        segment_metrics_context_.finish_report();
756
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_0clEv
Line
Count
Source
753
10
    DORIS_CLOUD_DEFER {
754
10
        tablet_metrics_context_.finish_report();
755
10
        segment_metrics_context_.finish_report();
756
10
    };
757
10
    if (instance_info_.status() == InstanceInfoPB::DELETED) {
758
0
        int res = recycle_cluster_snapshots();
759
0
        if (res != 0) {
760
0
            return -1;
761
0
        }
762
0
        return recycle_deleted_instance();
763
10
    } else if (instance_info_.status() == InstanceInfoPB::NORMAL) {
764
10
        SyncExecutor<int> sync_executor(_thread_pool_group.group_recycle_function_pool,
765
10
                                        fmt::format("instance id {}", instance_id_),
766
120
                                        [](int r) { return r != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_1clEi
Line
Count
Source
766
120
                                        [](int r) { return r != 0; });
767
10
        sync_executor
768
10
                .add(task_wrapper(
769
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_2clEv
Line
Count
Source
769
10
                        [this]() { return InstanceRecycler::recycle_cluster_snapshots(); }))
770
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_3clEv
Line
Count
Source
770
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_operation_logs(); }))
771
10
                .add(task_wrapper( // dropped table and dropped partition need to be recycled in series
772
                                   // becase they may both recycle the same set of tablets
773
                        // recycle dropped table or idexes(mv, rollup)
774
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_4clEv
Line
Count
Source
774
10
                        [this]() -> int { return InstanceRecycler::recycle_indexes(); },
775
                        // recycle dropped partitions
776
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_5clEv
Line
Count
Source
776
10
                        [this]() -> int { return InstanceRecycler::recycle_partitions(); }))
777
10
                .add(task_wrapper(
778
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_6clEv
Line
Count
Source
778
10
                        [this]() -> int { return InstanceRecycler::recycle_tmp_rowsets(); }))
779
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_7clEv
Line
Count
Source
779
10
                .add(task_wrapper([this]() -> int { return InstanceRecycler::recycle_rowsets(); }))
780
10
                .add(task_wrapper(
781
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_8clEv
Line
Count
Source
781
10
                        [this]() -> int { return InstanceRecycler::recycle_packed_files(); }))
782
10
                .add(task_wrapper(
783
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK3$_9clEv
Line
Count
Source
783
10
                        [this]() { return InstanceRecycler::abort_timeout_txn(); },
784
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_10clEv
Line
Count
Source
784
10
                        [this]() { return InstanceRecycler::recycle_expired_txn_label(); }))
785
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_11clEv
Line
Count
Source
785
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_copy_jobs(); }))
786
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_12clEv
Line
Count
Source
786
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_stage(); }))
787
10
                .add(task_wrapper(
788
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_13clEv
Line
Count
Source
788
10
                        [this]() { return InstanceRecycler::recycle_expired_stage_objects(); }))
789
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_14clEv
Line
Count
Source
789
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_versions(); }))
790
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler10do_recycleEvENK4$_15clEv
Line
Count
Source
790
10
                .add(task_wrapper([this]() { return InstanceRecycler::recycle_restore_jobs(); }));
791
10
        bool finished = true;
792
10
        std::vector<int> rets = sync_executor.when_all(&finished);
793
120
        for (int ret : rets) {
794
120
            if (ret != 0) {
795
0
                return ret;
796
0
            }
797
120
        }
798
10
        return finished ? 0 : -1;
799
10
    } else {
800
0
        LOG(WARNING) << "invalid instance status: " << instance_info_.status()
801
0
                     << " instance_id=" << instance_id_;
802
0
        return -1;
803
0
    }
804
10
}
805
806
/**
807
* 1. delete all remote data
808
* 2. delete all kv
809
* 3. remove instance kv
810
*/
811
5
int InstanceRecycler::recycle_deleted_instance() {
812
5
    LOG_WARNING("begin to recycle deleted instance").tag("instance_id", instance_id_);
813
814
5
    int ret = 0;
815
5
    auto start_time = steady_clock::now();
816
817
5
    DORIS_CLOUD_DEFER {
818
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
819
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
820
5
                     << " recycle deleted instance, cost=" << cost
821
5
                     << "s, instance_id=" << instance_id_;
822
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_0clEv
Line
Count
Source
817
5
    DORIS_CLOUD_DEFER {
818
5
        auto cost = duration<float>(steady_clock::now() - start_time).count();
819
5
        LOG(WARNING) << (ret == 0 ? "successfully" : "failed to")
820
5
                     << " recycle deleted instance, cost=" << cost
821
5
                     << "s, instance_id=" << instance_id_;
822
5
    };
823
824
    // Step 1: Recycle tmp rowsets (contains ref count but txn is not committed)
825
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
826
5
        int res = recycle_tmp_rowsets();
827
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
828
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
829
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
830
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
831
            // and cannot be recycled.
832
5
            res = recycle_tmp_rowsets();
833
5
        }
834
5
        return res;
835
5
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_deleted_instanceEvENK3$_1clEv
Line
Count
Source
825
5
    auto recycle_tmp_rowsets_with_mark_delete_enabled = [&]() -> int {
826
5
        int res = recycle_tmp_rowsets();
827
5
        if (res == 0 && config::enable_mark_delete_rowset_before_recycle) {
828
            // If mark_delete_rowset_before_recycle is enabled, we will mark delete rowsets before recycling them,
829
            // so we need to recycle tmp rowsets again to make sure all rowsets in recycle space are marked for
830
            // deletion, otherwise we may meet some corner cases that some rowsets are not marked for deletion
831
            // and cannot be recycled.
832
5
            res = recycle_tmp_rowsets();
833
5
        }
834
5
        return res;
835
5
    };
836
5
    if (recycle_tmp_rowsets_with_mark_delete_enabled() != 0) {
837
0
        LOG_WARNING("failed to recycle tmp rowsets").tag("instance_id", instance_id_);
838
0
        ret = -1;
839
0
        return -1;
840
0
    }
841
842
    // Step 2: Recycle versioned rowsets in recycle space (already marked for deletion)
843
5
    if (recycle_versioned_rowsets() != 0) {
844
0
        LOG_WARNING("failed to recycle versioned rowsets").tag("instance_id", instance_id_);
845
0
        ret = -1;
846
0
        return -1;
847
0
    }
848
849
    // Step 3: Recycle operation logs (can recycle logs not referenced by snapshots)
850
5
    if (recycle_operation_logs() != 0) {
851
0
        LOG_WARNING("failed to recycle operation logs").tag("instance_id", instance_id_);
852
0
        ret = -1;
853
0
        return -1;
854
0
    }
855
856
    // Step 4: Check if there are still cluster snapshots
857
5
    bool has_snapshots = false;
858
5
    if (has_cluster_snapshots(&has_snapshots) != 0) {
859
0
        LOG(WARNING) << "check instance cluster snapshots failed, instance_id=" << instance_id_;
860
0
        ret = -1;
861
0
        return -1;
862
5
    } else if (has_snapshots) {
863
1
        LOG(INFO) << "instance has cluster snapshots, skip recycling, instance_id=" << instance_id_;
864
1
        return 0;
865
1
    }
866
867
4
    bool snapshot_enabled = instance_info().has_snapshot_switch_status() &&
868
4
                            instance_info().snapshot_switch_status() !=
869
1
                                    SnapshotSwitchStatus::SNAPSHOT_SWITCH_DISABLED;
870
4
    if (snapshot_enabled) {
871
1
        bool has_unrecycled_rowsets = false;
872
1
        if (recycle_ref_rowsets(&has_unrecycled_rowsets) != 0) {
873
0
            LOG_WARNING("failed to recycle ref rowsets").tag("instance_id", instance_id_);
874
0
            ret = -1;
875
0
            return -1;
876
1
        } else if (has_unrecycled_rowsets) {
877
0
            LOG_INFO("instance has referenced rowsets, skip recycling")
878
0
                    .tag("instance_id", instance_id_);
879
0
            return ret;
880
0
        }
881
3
    } else { // delete all remote data if snapshot is disabled
882
3
        for (auto& [_, accessor] : accessor_map_) {
883
3
            if (stopped()) {
884
0
                return ret;
885
0
            }
886
887
3
            LOG(INFO) << "begin to delete all objects in " << accessor->uri();
888
3
            int del_ret = accessor->delete_all();
889
3
            if (del_ret == 0) {
890
3
                LOG(INFO) << "successfully delete all objects in " << accessor->uri();
891
3
            } else if (del_ret != 1) { // no need to log, because S3Accessor has logged this error
892
                // If `del_ret == 1`, it can be considered that the object data has been recycled by cloud platform,
893
                // so the recycling has been successful.
894
0
                ret = -1;
895
0
            }
896
3
        }
897
898
3
        if (ret != 0) {
899
0
            LOG(WARNING) << "failed to delete all data of deleted instance=" << instance_id_;
900
0
            return ret;
901
0
        }
902
3
    }
903
904
    // Check successor instance, if exists, skip deleting kv because successor instance may still need the data in kv
905
4
    if (instance_info_.has_successor_instance_id() &&
906
4
        !instance_info_.successor_instance_id().empty()) {
907
0
        std::string key = instance_key(instance_info_.successor_instance_id());
908
0
        std::unique_ptr<Transaction> txn;
909
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
910
0
        if (err != TxnErrorCode::TXN_OK) {
911
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_
912
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
913
0
                         << " err=" << err;
914
0
            ret = -1;
915
0
            return -1;
916
0
        }
917
918
0
        std::string value;
919
0
        err = txn->get(key, &value);
920
0
        if (err == TxnErrorCode::TXN_OK) {
921
0
            LOG(INFO) << "instance successor instance is still exist, skip deleting kv,"
922
0
                      << " instance_id=" << instance_id_
923
0
                      << " successor_instance_id=" << instance_info_.successor_instance_id();
924
0
            return 0;
925
0
        } else if (err != TxnErrorCode::TXN_KEY_NOT_FOUND) {
926
0
            LOG(WARNING) << "failed to get successor instance, instance_id=" << instance_id_
927
0
                         << " successor_instance_id=" << instance_info_.successor_instance_id()
928
0
                         << " err=" << err;
929
0
            ret = -1;
930
0
            return -1;
931
0
        }
932
0
    }
933
934
    // delete all kv
935
4
    std::unique_ptr<Transaction> txn;
936
4
    TxnErrorCode err = txn_kv_->create_txn(&txn);
937
4
    if (err != TxnErrorCode::TXN_OK) {
938
0
        LOG(WARNING) << "failed to create txn";
939
0
        ret = -1;
940
0
        return -1;
941
0
    }
942
4
    LOG(INFO) << "begin to delete all kv, instance_id=" << instance_id_;
943
    // delete kv before deleting objects to prevent the checker from misjudging data loss
944
4
    std::string start_txn_key = txn_key_prefix(instance_id_);
945
4
    std::string end_txn_key = txn_key_prefix(instance_id_ + '\x00');
946
4
    txn->remove(start_txn_key, end_txn_key);
947
4
    std::string start_version_key = version_key_prefix(instance_id_);
948
4
    std::string end_version_key = version_key_prefix(instance_id_ + '\x00');
949
4
    txn->remove(start_version_key, end_version_key);
950
4
    std::string start_meta_key = meta_key_prefix(instance_id_);
951
4
    std::string end_meta_key = meta_key_prefix(instance_id_ + '\x00');
952
4
    txn->remove(start_meta_key, end_meta_key);
953
4
    std::string start_recycle_key = recycle_key_prefix(instance_id_);
954
4
    std::string end_recycle_key = recycle_key_prefix(instance_id_ + '\x00');
955
4
    txn->remove(start_recycle_key, end_recycle_key);
956
4
    std::string start_stats_tablet_key = stats_tablet_key({instance_id_, 0, 0, 0, 0});
957
4
    std::string end_stats_tablet_key = stats_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
958
4
    txn->remove(start_stats_tablet_key, end_stats_tablet_key);
959
4
    std::string start_copy_key = copy_key_prefix(instance_id_);
960
4
    std::string end_copy_key = copy_key_prefix(instance_id_ + '\x00');
961
4
    txn->remove(start_copy_key, end_copy_key);
962
    // should not remove job key range, because we need to reserve job recycle kv
963
    // 0:instance_id  1:table_id  2:index_id  3:part_id  4:tablet_id
964
4
    std::string start_job_tablet_key = job_tablet_key({instance_id_, 0, 0, 0, 0});
965
4
    std::string end_job_tablet_key = job_tablet_key({instance_id_, INT64_MAX, 0, 0, 0});
966
4
    txn->remove(start_job_tablet_key, end_job_tablet_key);
967
4
    StorageVaultKeyInfo key_info0 {instance_id_, ""};
968
4
    StorageVaultKeyInfo key_info1 {instance_id_, "\xff"};
969
4
    std::string start_vault_key = storage_vault_key(key_info0);
970
4
    std::string end_vault_key = storage_vault_key(key_info1);
971
4
    txn->remove(start_vault_key, end_vault_key);
972
4
    std::string versioned_version_key_start = versioned::version_key_prefix(instance_id_);
973
4
    std::string versioned_version_key_end = versioned::version_key_prefix(instance_id_ + '\x00');
974
4
    txn->remove(versioned_version_key_start, versioned_version_key_end);
975
4
    std::string versioned_index_key_start = versioned::index_key_prefix(instance_id_);
976
4
    std::string versioned_index_key_end = versioned::index_key_prefix(instance_id_ + '\x00');
977
4
    txn->remove(versioned_index_key_start, versioned_index_key_end);
978
4
    std::string versioned_stats_tablet_key_start = versioned::stats_key_prefix(instance_id_);
979
4
    std::string versioned_stats_tablet_key_end = versioned::stats_key_prefix(instance_id_ + '\x00');
980
4
    txn->remove(versioned_stats_tablet_key_start, versioned_stats_tablet_key_end);
981
4
    std::string versioned_meta_key_start = versioned::meta_key_prefix(instance_id_);
982
4
    std::string versioned_meta_key_end = versioned::meta_key_prefix(instance_id_ + '\x00');
983
4
    txn->remove(versioned_meta_key_start, versioned_meta_key_end);
984
4
    std::string versioned_data_key_start = versioned::data_key_prefix(instance_id_);
985
4
    std::string versioned_data_key_end = versioned::data_key_prefix(instance_id_ + '\x00');
986
4
    txn->remove(versioned_data_key_start, versioned_data_key_end);
987
4
    std::string versioned_log_key_start = versioned::log_key_prefix(instance_id_);
988
4
    std::string versioned_log_key_end = versioned::log_key_prefix(instance_id_ + '\x00');
989
4
    txn->remove(versioned_log_key_start, versioned_log_key_end);
990
4
    err = txn->commit();
991
4
    if (err != TxnErrorCode::TXN_OK) {
992
0
        LOG(WARNING) << "failed to delete all kv, instance_id=" << instance_id_ << ", err=" << err;
993
0
        ret = -1;
994
0
    }
995
996
4
    if (ret == 0) {
997
        // remove instance kv
998
        // ATTN: MUST ensure that cloud platform won't regenerate the same instance id
999
4
        err = txn_kv_->create_txn(&txn);
1000
4
        if (err != TxnErrorCode::TXN_OK) {
1001
0
            LOG(WARNING) << "failed to create txn";
1002
0
            ret = -1;
1003
0
            return ret;
1004
0
        }
1005
4
        std::string key;
1006
4
        instance_key({instance_id_}, &key);
1007
4
        txn->atomic_add(system_meta_service_instance_update_key(), 1);
1008
4
        txn->remove(key);
1009
4
        err = txn->commit();
1010
4
        if (err != TxnErrorCode::TXN_OK) {
1011
0
            LOG(WARNING) << "failed to delete instance kv, instance_id=" << instance_id_
1012
0
                         << " err=" << err;
1013
0
            ret = -1;
1014
0
        }
1015
4
    }
1016
4
    return ret;
1017
4
}
1018
1019
int InstanceRecycler::check_rowset_exists(int64_t tablet_id, const std::string& rowset_id,
1020
9
                                          bool* exists, PackedFileRecycleStats* stats) {
1021
9
    if (exists == nullptr) {
1022
0
        return -1;
1023
0
    }
1024
9
    *exists = false;
1025
1026
9
    std::string begin = meta_rowset_key({instance_id_, tablet_id, 0});
1027
9
    std::string end = meta_rowset_key({instance_id_, tablet_id + 1, 0});
1028
9
    std::string scan_begin = begin;
1029
1030
9
    while (true) {
1031
9
        std::unique_ptr<RangeGetIterator> it_range;
1032
9
        int get_ret = txn_get(txn_kv_.get(), scan_begin, end, it_range);
1033
9
        if (get_ret < 0) {
1034
0
            LOG_WARNING("failed to scan rowset metas when recycling packed file")
1035
0
                    .tag("instance_id", instance_id_)
1036
0
                    .tag("tablet_id", tablet_id)
1037
0
                    .tag("ret", get_ret);
1038
0
            return -1;
1039
0
        }
1040
9
        if (get_ret == 1 || it_range == nullptr || !it_range->has_next()) {
1041
6
            return 0;
1042
6
        }
1043
1044
3
        std::string last_key;
1045
3
        while (it_range->has_next()) {
1046
3
            auto [k, v] = it_range->next();
1047
3
            last_key.assign(k.data(), k.size());
1048
3
            doris::RowsetMetaCloudPB rowset_meta;
1049
3
            if (!rowset_meta.ParseFromArray(v.data(), v.size())) {
1050
0
                LOG_WARNING("malformed rowset meta when checking packed file rowset existence")
1051
0
                        .tag("instance_id", instance_id_)
1052
0
                        .tag("tablet_id", tablet_id)
1053
0
                        .tag("key", hex(k));
1054
0
                continue;
1055
0
            }
1056
3
            if (stats) {
1057
3
                ++stats->rowset_scan_count;
1058
3
            }
1059
3
            if (rowset_meta.rowset_id_v2() == rowset_id) {
1060
3
                *exists = true;
1061
3
                return 0;
1062
3
            }
1063
3
        }
1064
1065
0
        if (!it_range->more()) {
1066
0
            return 0;
1067
0
        }
1068
1069
        // Continue scanning from the next key to keep each transaction short.
1070
0
        scan_begin = std::move(last_key);
1071
0
        scan_begin.push_back('\x00');
1072
0
    }
1073
9
}
1074
1075
int InstanceRecycler::check_recycle_and_tmp_rowset_exists(int64_t tablet_id,
1076
                                                          const std::string& rowset_id,
1077
                                                          int64_t txn_id, bool* recycle_exists,
1078
11
                                                          bool* tmp_exists) {
1079
11
    if (recycle_exists == nullptr || tmp_exists == nullptr) {
1080
0
        return -1;
1081
0
    }
1082
11
    *recycle_exists = false;
1083
11
    *tmp_exists = false;
1084
1085
11
    if (txn_id <= 0) {
1086
0
        LOG_WARNING("invalid txn id when checking recycle/tmp rowset existence")
1087
0
                .tag("instance_id", instance_id_)
1088
0
                .tag("tablet_id", tablet_id)
1089
0
                .tag("rowset_id", rowset_id)
1090
0
                .tag("txn_id", txn_id);
1091
0
        return -1;
1092
0
    }
1093
1094
11
    std::unique_ptr<Transaction> txn;
1095
11
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1096
11
    if (err != TxnErrorCode::TXN_OK) {
1097
0
        LOG_WARNING("failed to create txn when checking recycle/tmp rowset existence")
1098
0
                .tag("instance_id", instance_id_)
1099
0
                .tag("tablet_id", tablet_id)
1100
0
                .tag("rowset_id", rowset_id)
1101
0
                .tag("txn_id", txn_id)
1102
0
                .tag("err", err);
1103
0
        return -1;
1104
0
    }
1105
1106
11
    std::string recycle_key = recycle_rowset_key({instance_id_, tablet_id, rowset_id});
1107
11
    auto ret = key_exists(txn.get(), recycle_key, true);
1108
11
    if (ret == TxnErrorCode::TXN_OK) {
1109
1
        *recycle_exists = true;
1110
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1111
0
        LOG_WARNING("failed to check recycle rowset existence")
1112
0
                .tag("instance_id", instance_id_)
1113
0
                .tag("tablet_id", tablet_id)
1114
0
                .tag("rowset_id", rowset_id)
1115
0
                .tag("key", hex(recycle_key))
1116
0
                .tag("err", ret);
1117
0
        return -1;
1118
0
    }
1119
1120
11
    std::string tmp_key = meta_rowset_tmp_key({instance_id_, txn_id, tablet_id});
1121
11
    ret = key_exists(txn.get(), tmp_key, true);
1122
11
    if (ret == TxnErrorCode::TXN_OK) {
1123
1
        *tmp_exists = true;
1124
10
    } else if (ret != TxnErrorCode::TXN_KEY_NOT_FOUND) {
1125
0
        LOG_WARNING("failed to check tmp rowset existence")
1126
0
                .tag("instance_id", instance_id_)
1127
0
                .tag("tablet_id", tablet_id)
1128
0
                .tag("txn_id", txn_id)
1129
0
                .tag("key", hex(tmp_key))
1130
0
                .tag("err", ret);
1131
0
        return -1;
1132
0
    }
1133
1134
11
    return 0;
1135
11
}
1136
1137
std::pair<std::string, std::shared_ptr<StorageVaultAccessor>>
1138
8
InstanceRecycler::resolve_packed_file_accessor(const std::string& hint) {
1139
8
    if (!hint.empty()) {
1140
8
        if (auto it = accessor_map_.find(hint); it != accessor_map_.end()) {
1141
8
            return {hint, it->second};
1142
8
        }
1143
8
    }
1144
1145
0
    return {"", nullptr};
1146
8
}
1147
1148
int InstanceRecycler::correct_packed_file_info(cloud::PackedFileInfoPB* packed_info, bool* changed,
1149
                                               const std::string& packed_file_path,
1150
3
                                               PackedFileRecycleStats* stats) {
1151
3
    bool local_changed = false;
1152
3
    int64_t left_num = 0;
1153
3
    int64_t left_bytes = 0;
1154
3
    bool all_small_files_confirmed = true;
1155
3
    LOG(INFO) << "begin to correct file: " << packed_file_path;
1156
1157
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1158
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1159
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1160
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1161
14
        LOG_INFO("packed slice correction status")
1162
14
                .tag("instance_id", instance_id_)
1163
14
                .tag("packed_file_path", packed_file_path)
1164
14
                .tag("small_file_path", file.path())
1165
14
                .tag("tablet_id", tablet_id)
1166
14
                .tag("rowset_id", rowset_id)
1167
14
                .tag("txn_id", txn_id)
1168
14
                .tag("size", file.size())
1169
14
                .tag("deleted", file.deleted())
1170
14
                .tag("corrected", file.corrected())
1171
14
                .tag("confirmed_this_round", confirmed_this_round);
1172
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24correct_packed_file_infoEPNS0_16PackedFileInfoPBEPbRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPNS1_22PackedFileRecycleStatsEENK3$_0clERKNS0_13PackedSlicePBEb
Line
Count
Source
1157
14
    auto log_small_file_status = [&](const cloud::PackedSlicePB& file, bool confirmed_this_round) {
1158
14
        int64_t tablet_id = file.has_tablet_id() ? file.tablet_id() : int64_t {-1};
1159
14
        std::string rowset_id = file.has_rowset_id() ? file.rowset_id() : std::string {};
1160
14
        int64_t txn_id = file.has_txn_id() ? file.txn_id() : int64_t {0};
1161
14
        LOG_INFO("packed slice correction status")
1162
14
                .tag("instance_id", instance_id_)
1163
14
                .tag("packed_file_path", packed_file_path)
1164
14
                .tag("small_file_path", file.path())
1165
14
                .tag("tablet_id", tablet_id)
1166
14
                .tag("rowset_id", rowset_id)
1167
14
                .tag("txn_id", txn_id)
1168
14
                .tag("size", file.size())
1169
14
                .tag("deleted", file.deleted())
1170
14
                .tag("corrected", file.corrected())
1171
14
                .tag("confirmed_this_round", confirmed_this_round);
1172
14
    };
1173
1174
17
    for (int i = 0; i < packed_info->slices_size(); ++i) {
1175
14
        auto* small_file = packed_info->mutable_slices(i);
1176
14
        if (small_file->deleted()) {
1177
3
            log_small_file_status(*small_file, small_file->corrected());
1178
3
            continue;
1179
3
        }
1180
1181
11
        if (small_file->corrected()) {
1182
0
            left_num++;
1183
0
            left_bytes += small_file->size();
1184
0
            log_small_file_status(*small_file, true);
1185
0
            continue;
1186
0
        }
1187
1188
11
        if (!small_file->has_tablet_id() || !small_file->has_rowset_id()) {
1189
0
            LOG_WARNING("packed file small file missing identifiers during correction")
1190
0
                    .tag("instance_id", instance_id_)
1191
0
                    .tag("small_file_path", small_file->path())
1192
0
                    .tag("index", i);
1193
0
            return -1;
1194
0
        }
1195
1196
11
        int64_t tablet_id = small_file->tablet_id();
1197
11
        const std::string& rowset_id = small_file->rowset_id();
1198
11
        if (!small_file->has_txn_id() || small_file->txn_id() <= 0) {
1199
0
            LOG_WARNING("packed file small file missing valid txn id during correction")
1200
0
                    .tag("instance_id", instance_id_)
1201
0
                    .tag("small_file_path", small_file->path())
1202
0
                    .tag("index", i)
1203
0
                    .tag("tablet_id", tablet_id)
1204
0
                    .tag("rowset_id", rowset_id)
1205
0
                    .tag("has_txn_id", small_file->has_txn_id())
1206
0
                    .tag("txn_id", small_file->has_txn_id() ? small_file->txn_id() : 0);
1207
0
            return -1;
1208
0
        }
1209
11
        int64_t txn_id = small_file->txn_id();
1210
11
        bool recycle_exists = false;
1211
11
        bool tmp_exists = false;
1212
11
        if (check_recycle_and_tmp_rowset_exists(tablet_id, rowset_id, txn_id, &recycle_exists,
1213
11
                                                &tmp_exists) != 0) {
1214
0
            return -1;
1215
0
        }
1216
1217
11
        bool small_file_confirmed = false;
1218
11
        if (tmp_exists) {
1219
1
            left_num++;
1220
1
            left_bytes += small_file->size();
1221
1
            small_file_confirmed = true;
1222
10
        } else if (recycle_exists) {
1223
1
            left_num++;
1224
1
            left_bytes += small_file->size();
1225
            // keep small_file_confirmed=false so the packed file remains uncorrected
1226
9
        } else {
1227
9
            bool rowset_exists = false;
1228
9
            if (check_rowset_exists(tablet_id, rowset_id, &rowset_exists, stats) != 0) {
1229
0
                return -1;
1230
0
            }
1231
1232
9
            if (!rowset_exists) {
1233
6
                if (!small_file->deleted()) {
1234
6
                    small_file->set_deleted(true);
1235
6
                    local_changed = true;
1236
6
                }
1237
6
                if (!small_file->corrected()) {
1238
6
                    small_file->set_corrected(true);
1239
6
                    local_changed = true;
1240
6
                }
1241
6
                small_file_confirmed = true;
1242
6
            } else {
1243
3
                left_num++;
1244
3
                left_bytes += small_file->size();
1245
3
                small_file_confirmed = true;
1246
3
            }
1247
9
        }
1248
1249
11
        if (!small_file_confirmed) {
1250
1
            all_small_files_confirmed = false;
1251
1
        }
1252
1253
11
        if (small_file->corrected() != small_file_confirmed) {
1254
4
            small_file->set_corrected(small_file_confirmed);
1255
4
            local_changed = true;
1256
4
        }
1257
1258
11
        log_small_file_status(*small_file, small_file_confirmed);
1259
11
    }
1260
1261
3
    if (packed_info->remaining_slice_bytes() != left_bytes) {
1262
3
        packed_info->set_remaining_slice_bytes(left_bytes);
1263
3
        local_changed = true;
1264
3
    }
1265
3
    if (packed_info->ref_cnt() != left_num) {
1266
3
        auto old_ref_cnt = packed_info->ref_cnt();
1267
3
        packed_info->set_ref_cnt(left_num);
1268
3
        LOG_INFO("corrected packed file ref count")
1269
3
                .tag("instance_id", instance_id_)
1270
3
                .tag("resource_id", packed_info->resource_id())
1271
3
                .tag("packed_file_path", packed_file_path)
1272
3
                .tag("old_ref_cnt", old_ref_cnt)
1273
3
                .tag("new_ref_cnt", left_num);
1274
3
        local_changed = true;
1275
3
    }
1276
3
    if (packed_info->corrected() != all_small_files_confirmed) {
1277
2
        packed_info->set_corrected(all_small_files_confirmed);
1278
2
        local_changed = true;
1279
2
    }
1280
3
    if (left_num == 0 && packed_info->state() != cloud::PackedFileInfoPB::RECYCLING) {
1281
1
        packed_info->set_state(cloud::PackedFileInfoPB::RECYCLING);
1282
1
        local_changed = true;
1283
1
    }
1284
1285
3
    if (changed != nullptr) {
1286
3
        *changed = local_changed;
1287
3
    }
1288
3
    return 0;
1289
3
}
1290
1291
int InstanceRecycler::process_single_packed_file(const std::string& packed_key,
1292
                                                 const std::string& packed_file_path,
1293
4
                                                 PackedFileRecycleStats* stats) {
1294
4
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
1295
4
    bool correction_ok = false;
1296
4
    cloud::PackedFileInfoPB packed_info;
1297
1298
4
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
1299
4
        if (stopped()) {
1300
0
            LOG_WARNING("recycler stopped before processing packed file")
1301
0
                    .tag("instance_id", instance_id_)
1302
0
                    .tag("packed_file_path", packed_file_path)
1303
0
                    .tag("attempt", attempt);
1304
0
            return -1;
1305
0
        }
1306
1307
4
        std::unique_ptr<Transaction> txn;
1308
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
1309
4
        if (err != TxnErrorCode::TXN_OK) {
1310
0
            LOG_WARNING("failed to create txn when processing packed file")
1311
0
                    .tag("instance_id", instance_id_)
1312
0
                    .tag("packed_file_path", packed_file_path)
1313
0
                    .tag("attempt", attempt)
1314
0
                    .tag("err", err);
1315
0
            return -1;
1316
0
        }
1317
1318
4
        std::string packed_val;
1319
4
        err = txn->get(packed_key, &packed_val);
1320
4
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1321
0
            return 0;
1322
0
        }
1323
4
        if (err != TxnErrorCode::TXN_OK) {
1324
0
            LOG_WARNING("failed to get packed file kv")
1325
0
                    .tag("instance_id", instance_id_)
1326
0
                    .tag("packed_file_path", packed_file_path)
1327
0
                    .tag("attempt", attempt)
1328
0
                    .tag("err", err);
1329
0
            return -1;
1330
0
        }
1331
1332
4
        if (!packed_info.ParseFromString(packed_val)) {
1333
0
            LOG_WARNING("failed to parse packed file info")
1334
0
                    .tag("instance_id", instance_id_)
1335
0
                    .tag("packed_file_path", packed_file_path)
1336
0
                    .tag("attempt", attempt);
1337
0
            return -1;
1338
0
        }
1339
1340
4
        int64_t now_sec = ::time(nullptr);
1341
4
        bool corrected = packed_info.corrected();
1342
4
        bool due = config::force_immediate_recycle ||
1343
4
                   now_sec - packed_info.created_at_sec() >=
1344
4
                           config::packed_file_correction_delay_seconds;
1345
1346
4
        if (!corrected && due) {
1347
3
            bool changed = false;
1348
3
            if (correct_packed_file_info(&packed_info, &changed, packed_file_path, stats) != 0) {
1349
0
                LOG_WARNING("correct_packed_file_info failed")
1350
0
                        .tag("instance_id", instance_id_)
1351
0
                        .tag("packed_file_path", packed_file_path)
1352
0
                        .tag("attempt", attempt);
1353
0
                return -1;
1354
0
            }
1355
3
            if (changed) {
1356
3
                std::string updated;
1357
3
                if (!packed_info.SerializeToString(&updated)) {
1358
0
                    LOG_WARNING("failed to serialize packed file info after correction")
1359
0
                            .tag("instance_id", instance_id_)
1360
0
                            .tag("packed_file_path", packed_file_path)
1361
0
                            .tag("attempt", attempt);
1362
0
                    return -1;
1363
0
                }
1364
3
                txn->put(packed_key, updated);
1365
3
                err = txn->commit();
1366
3
                if (err == TxnErrorCode::TXN_OK) {
1367
3
                    if (stats) {
1368
3
                        ++stats->num_corrected;
1369
3
                    }
1370
3
                } else {
1371
0
                    if (err == TxnErrorCode::TXN_CONFLICT && attempt < max_retry_times) {
1372
0
                        LOG_WARNING(
1373
0
                                "failed to commit correction for packed file due to conflict, "
1374
0
                                "retrying")
1375
0
                                .tag("instance_id", instance_id_)
1376
0
                                .tag("packed_file_path", packed_file_path)
1377
0
                                .tag("attempt", attempt);
1378
0
                        sleep_for_packed_file_retry();
1379
0
                        packed_info.Clear();
1380
0
                        continue;
1381
0
                    }
1382
0
                    LOG_WARNING("failed to commit correction for packed file")
1383
0
                            .tag("instance_id", instance_id_)
1384
0
                            .tag("packed_file_path", packed_file_path)
1385
0
                            .tag("attempt", attempt)
1386
0
                            .tag("err", err);
1387
0
                    return -1;
1388
0
                }
1389
3
            }
1390
3
        }
1391
1392
4
        correction_ok = true;
1393
4
        break;
1394
4
    }
1395
1396
4
    if (!correction_ok) {
1397
0
        return -1;
1398
0
    }
1399
1400
4
    if (!(packed_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1401
4
          packed_info.ref_cnt() == 0)) {
1402
3
        return 0;
1403
3
    }
1404
1405
1
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
1406
0
        LOG_WARNING("packed file missing resource id when recycling")
1407
0
                .tag("instance_id", instance_id_)
1408
0
                .tag("packed_file_path", packed_file_path);
1409
0
        return -1;
1410
0
    }
1411
1
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
1412
1
    if (!accessor) {
1413
0
        LOG_WARNING("no accessor available to delete packed file")
1414
0
                .tag("instance_id", instance_id_)
1415
0
                .tag("packed_file_path", packed_file_path)
1416
0
                .tag("resource_id", packed_info.resource_id());
1417
0
        return -1;
1418
0
    }
1419
1
    int del_ret = accessor->delete_file(packed_file_path);
1420
1
    if (del_ret != 0 && del_ret != 1) {
1421
0
        LOG_WARNING("failed to delete packed file")
1422
0
                .tag("instance_id", instance_id_)
1423
0
                .tag("packed_file_path", packed_file_path)
1424
0
                .tag("resource_id", resource_id)
1425
0
                .tag("ret", del_ret);
1426
0
        return -1;
1427
0
    }
1428
1
    if (del_ret == 1) {
1429
0
        LOG_INFO("packed file already removed")
1430
0
                .tag("instance_id", instance_id_)
1431
0
                .tag("packed_file_path", packed_file_path)
1432
0
                .tag("resource_id", resource_id);
1433
1
    } else {
1434
1
        LOG_INFO("deleted packed file")
1435
1
                .tag("instance_id", instance_id_)
1436
1
                .tag("packed_file_path", packed_file_path)
1437
1
                .tag("resource_id", resource_id);
1438
1
    }
1439
1440
1
    for (int del_attempt = 1; del_attempt <= max_retry_times; ++del_attempt) {
1441
1
        std::unique_ptr<Transaction> del_txn;
1442
1
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
1443
1
        if (err != TxnErrorCode::TXN_OK) {
1444
0
            LOG_WARNING("failed to create txn when removing packed file kv")
1445
0
                    .tag("instance_id", instance_id_)
1446
0
                    .tag("packed_file_path", packed_file_path)
1447
0
                    .tag("del_attempt", del_attempt)
1448
0
                    .tag("err", err);
1449
0
            return -1;
1450
0
        }
1451
1452
1
        std::string latest_val;
1453
1
        err = del_txn->get(packed_key, &latest_val);
1454
1
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1455
0
            return 0;
1456
0
        }
1457
1
        if (err != TxnErrorCode::TXN_OK) {
1458
0
            LOG_WARNING("failed to re-read packed file kv before removal")
1459
0
                    .tag("instance_id", instance_id_)
1460
0
                    .tag("packed_file_path", packed_file_path)
1461
0
                    .tag("del_attempt", del_attempt)
1462
0
                    .tag("err", err);
1463
0
            return -1;
1464
0
        }
1465
1466
1
        cloud::PackedFileInfoPB latest_info;
1467
1
        if (!latest_info.ParseFromString(latest_val)) {
1468
0
            LOG_WARNING("failed to parse packed file info before removal")
1469
0
                    .tag("instance_id", instance_id_)
1470
0
                    .tag("packed_file_path", packed_file_path)
1471
0
                    .tag("del_attempt", del_attempt);
1472
0
            return -1;
1473
0
        }
1474
1475
1
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
1476
1
              latest_info.ref_cnt() == 0)) {
1477
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
1478
0
                    .tag("instance_id", instance_id_)
1479
0
                    .tag("packed_file_path", packed_file_path)
1480
0
                    .tag("del_attempt", del_attempt);
1481
0
            return 0;
1482
0
        }
1483
1484
1
        del_txn->remove(packed_key);
1485
1
        err = del_txn->commit();
1486
1
        if (err == TxnErrorCode::TXN_OK) {
1487
1
            if (stats) {
1488
1
                ++stats->num_deleted;
1489
1
                stats->bytes_deleted += static_cast<int64_t>(packed_key.size()) +
1490
1
                                        static_cast<int64_t>(latest_val.size());
1491
1
                if (del_ret == 0 || del_ret == 1) {
1492
1
                    ++stats->num_object_deleted;
1493
1
                    int64_t object_size = latest_info.total_slice_bytes();
1494
1
                    if (object_size <= 0) {
1495
0
                        object_size = packed_info.total_slice_bytes();
1496
0
                    }
1497
1
                    stats->bytes_object_deleted += object_size;
1498
1
                }
1499
1
            }
1500
1
            LOG_INFO("removed packed file metadata")
1501
1
                    .tag("instance_id", instance_id_)
1502
1
                    .tag("packed_file_path", packed_file_path);
1503
1
            return 0;
1504
1
        }
1505
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
1506
0
            if (del_attempt >= max_retry_times) {
1507
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
1508
0
                        .tag("instance_id", instance_id_)
1509
0
                        .tag("packed_file_path", packed_file_path)
1510
0
                        .tag("del_attempt", del_attempt);
1511
0
                return -1;
1512
0
            }
1513
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
1514
0
                    .tag("instance_id", instance_id_)
1515
0
                    .tag("packed_file_path", packed_file_path)
1516
0
                    .tag("del_attempt", del_attempt);
1517
0
            sleep_for_packed_file_retry();
1518
0
            continue;
1519
0
        }
1520
0
        LOG_WARNING("failed to remove packed file kv")
1521
0
                .tag("instance_id", instance_id_)
1522
0
                .tag("packed_file_path", packed_file_path)
1523
0
                .tag("del_attempt", del_attempt)
1524
0
                .tag("err", err);
1525
0
        return -1;
1526
0
    }
1527
1528
0
    return -1;
1529
1
}
1530
1531
int InstanceRecycler::handle_packed_file_kv(std::string_view key, std::string_view /*value*/,
1532
4
                                            PackedFileRecycleStats* stats, int* ret) {
1533
4
    if (stats) {
1534
4
        ++stats->num_scanned;
1535
4
    }
1536
4
    std::string packed_file_path;
1537
4
    if (!decode_packed_file_key(key, &packed_file_path)) {
1538
0
        LOG_WARNING("failed to decode packed file key")
1539
0
                .tag("instance_id", instance_id_)
1540
0
                .tag("key", hex(key));
1541
0
        if (stats) {
1542
0
            ++stats->num_failed;
1543
0
        }
1544
0
        if (ret) {
1545
0
            *ret = -1;
1546
0
        }
1547
0
        return 0;
1548
0
    }
1549
1550
4
    std::string packed_key(key);
1551
4
    int process_ret = process_single_packed_file(packed_key, packed_file_path, stats);
1552
4
    if (process_ret != 0) {
1553
0
        if (stats) {
1554
0
            ++stats->num_failed;
1555
0
        }
1556
0
        if (ret) {
1557
0
            *ret = -1;
1558
0
        }
1559
0
    }
1560
4
    return 0;
1561
4
}
1562
1563
int64_t calculate_rowset_expired_time(const std::string& instance_id_, const RecycleRowsetPB& rs,
1564
9.77k
                                      int64_t* earlest_ts /* rowset earliest expiration ts */) {
1565
9.77k
    if (config::force_immediate_recycle) {
1566
15
        return 0L;
1567
15
    }
1568
    // RecycleRowsetPB created by compacted or dropped rowset has no expiration time, and will be recycled when exceed retention time
1569
9.75k
    int64_t expiration = rs.expiration() > 0 ? rs.expiration() : rs.creation_time();
1570
9.75k
    int64_t retention_seconds = config::retention_seconds;
1571
9.75k
    if (rs.type() == RecycleRowsetPB::COMPACT || rs.type() == RecycleRowsetPB::DROP) {
1572
7.80k
        retention_seconds = std::min(config::compacted_rowset_retention_seconds, retention_seconds);
1573
7.80k
    }
1574
9.75k
    int64_t final_expiration = expiration + retention_seconds;
1575
9.75k
    if (*earlest_ts > final_expiration) {
1576
7
        *earlest_ts = final_expiration;
1577
7
        g_bvar_recycler_recycle_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1578
7
    }
1579
9.75k
    return final_expiration;
1580
9.77k
}
1581
1582
int64_t calculate_partition_expired_time(
1583
        const std::string& instance_id_, const RecyclePartitionPB& partition_meta_pb,
1584
9
        int64_t* earlest_ts /* partition earliest expiration ts */) {
1585
9
    if (config::force_immediate_recycle) {
1586
3
        return 0L;
1587
3
    }
1588
6
    int64_t expiration = partition_meta_pb.expiration() > 0 ? partition_meta_pb.expiration()
1589
6
                                                            : partition_meta_pb.creation_time();
1590
6
    int64_t retention_seconds = config::retention_seconds;
1591
6
    if (partition_meta_pb.state() == RecyclePartitionPB::DROPPED) {
1592
6
        retention_seconds =
1593
6
                std::min(config::dropped_partition_retention_seconds, retention_seconds);
1594
6
    }
1595
6
    int64_t final_expiration = expiration + retention_seconds;
1596
6
    if (*earlest_ts > final_expiration) {
1597
2
        *earlest_ts = final_expiration;
1598
2
        g_bvar_recycler_recycle_partition_earlest_ts.put(instance_id_, *earlest_ts);
1599
2
    }
1600
6
    return final_expiration;
1601
9
}
1602
1603
int64_t calculate_index_expired_time(const std::string& instance_id_,
1604
                                     const RecycleIndexPB& index_meta_pb,
1605
10
                                     int64_t* earlest_ts /* index earliest expiration ts */) {
1606
10
    if (config::force_immediate_recycle) {
1607
4
        return 0L;
1608
4
    }
1609
6
    int64_t expiration = index_meta_pb.expiration() > 0 ? index_meta_pb.expiration()
1610
6
                                                        : index_meta_pb.creation_time();
1611
6
    int64_t retention_seconds = config::retention_seconds;
1612
6
    if (index_meta_pb.state() == RecycleIndexPB::DROPPED) {
1613
6
        retention_seconds = std::min(config::dropped_index_retention_seconds, retention_seconds);
1614
6
    }
1615
6
    int64_t final_expiration = expiration + retention_seconds;
1616
6
    if (*earlest_ts > final_expiration) {
1617
2
        *earlest_ts = final_expiration;
1618
2
        g_bvar_recycler_recycle_index_earlest_ts.put(instance_id_, *earlest_ts);
1619
2
    }
1620
6
    return final_expiration;
1621
10
}
1622
1623
int64_t calculate_tmp_rowset_expired_time(
1624
        const std::string& instance_id_, const doris::RowsetMetaCloudPB& tmp_rowset_meta_pb,
1625
106k
        int64_t* earlest_ts /* tmp_rowset earliest expiration ts */) {
1626
    // ATTN: `txn_expiration` should > 0, however we use `creation_time` + a large `retention_time` (> 1 day in production environment)
1627
    //  when `txn_expiration` <= 0 in some unexpected situation (usually when there are bugs). This is usually safe, coz loading
1628
    //  duration or timeout always < `retention_time` in practice.
1629
106k
    int64_t expiration = tmp_rowset_meta_pb.txn_expiration() > 0
1630
106k
                                 ? tmp_rowset_meta_pb.txn_expiration()
1631
106k
                                 : tmp_rowset_meta_pb.creation_time();
1632
106k
    expiration = config::force_immediate_recycle ? 0 : expiration;
1633
106k
    int64_t final_expiration = expiration + config::retention_seconds;
1634
106k
    if (*earlest_ts > final_expiration) {
1635
24
        *earlest_ts = final_expiration;
1636
24
        g_bvar_recycler_recycle_tmp_rowset_earlest_ts.put(instance_id_, *earlest_ts);
1637
24
    }
1638
106k
    return final_expiration;
1639
106k
}
1640
1641
int64_t calculate_txn_expired_time(const std::string& instance_id_, const RecycleTxnPB& txn_meta_pb,
1642
30.0k
                                   int64_t* earlest_ts /* txn earliest expiration ts */) {
1643
30.0k
    int64_t final_expiration = txn_meta_pb.creation_time() + config::label_keep_max_second * 1000L;
1644
30.0k
    if (*earlest_ts > final_expiration / 1000) {
1645
8
        *earlest_ts = final_expiration / 1000;
1646
8
        g_bvar_recycler_recycle_expired_txn_label_earlest_ts.put(instance_id_, *earlest_ts);
1647
8
    }
1648
30.0k
    return final_expiration;
1649
30.0k
}
1650
1651
int64_t calculate_restore_job_expired_time(
1652
        const std::string& instance_id_, const RestoreJobCloudPB& restore_job,
1653
41
        int64_t* earlest_ts /* restore job earliest expiration ts */) {
1654
41
    if (config::force_immediate_recycle || restore_job.state() == RestoreJobCloudPB::DROPPED ||
1655
41
        restore_job.state() == RestoreJobCloudPB::COMPLETED ||
1656
41
        restore_job.state() == RestoreJobCloudPB::RECYCLING) {
1657
        // final state, recycle immediately
1658
41
        return 0L;
1659
41
    }
1660
    // not final state, wait much longer than the FE's timeout(1 day)
1661
0
    int64_t last_modified_s =
1662
0
            restore_job.has_mtime_s() ? restore_job.mtime_s() : restore_job.ctime_s();
1663
0
    int64_t expiration = restore_job.expired_at_s() > 0
1664
0
                                 ? last_modified_s + restore_job.expired_at_s()
1665
0
                                 : last_modified_s;
1666
0
    int64_t final_expiration = expiration + config::retention_seconds;
1667
0
    if (*earlest_ts > final_expiration) {
1668
0
        *earlest_ts = final_expiration;
1669
0
        g_bvar_recycler_recycle_restore_job_earlest_ts.put(instance_id_, *earlest_ts);
1670
0
    }
1671
0
    return final_expiration;
1672
41
}
1673
1674
2
int InstanceRecycler::abort_txn_for_related_rowset(int64_t txn_id) {
1675
2
    AbortTxnRequest req;
1676
2
    TxnInfoPB txn_info;
1677
2
    MetaServiceCode code = MetaServiceCode::OK;
1678
2
    std::string msg;
1679
2
    std::stringstream ss;
1680
2
    std::unique_ptr<Transaction> txn;
1681
2
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1682
2
    if (err != TxnErrorCode::TXN_OK) {
1683
0
        LOG_WARNING("failed to create txn").tag("err", err);
1684
0
        return -1;
1685
0
    }
1686
1687
    // get txn index
1688
2
    TxnIndexPB txn_idx_pb;
1689
2
    auto index_key = txn_index_key({instance_id_, txn_id});
1690
2
    std::string index_val;
1691
2
    err = txn->get(index_key, &index_val);
1692
2
    if (err != TxnErrorCode::TXN_OK) {
1693
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1694
            // maybe recycled
1695
0
            LOG_INFO("txn index not found, txn_id={} instance_id={}", txn_id, instance_id_)
1696
0
                    .tag("key", hex(index_key))
1697
0
                    .tag("txn_id", txn_id);
1698
0
            return 0;
1699
0
        }
1700
0
        LOG_WARNING("failed to get txn index")
1701
0
                .tag("err", err)
1702
0
                .tag("key", hex(index_key))
1703
0
                .tag("txn_id", txn_id);
1704
0
        return -1;
1705
0
    }
1706
2
    if (!txn_idx_pb.ParseFromString(index_val)) {
1707
0
        LOG_WARNING("failed to parse txn index")
1708
0
                .tag("err", err)
1709
0
                .tag("key", hex(index_key))
1710
0
                .tag("txn_id", txn_id);
1711
0
        return -1;
1712
0
    }
1713
1714
2
    auto info_key = txn_info_key({instance_id_, txn_idx_pb.tablet_index().db_id(), txn_id});
1715
2
    std::string info_val;
1716
2
    err = txn->get(info_key, &info_val);
1717
2
    if (err != TxnErrorCode::TXN_OK) {
1718
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1719
            // maybe recycled
1720
0
            LOG_INFO("txn info not found, txn_id={} instance_id={}", txn_id, instance_id_)
1721
0
                    .tag("key", hex(info_key))
1722
0
                    .tag("txn_id", txn_id);
1723
0
            return 0;
1724
0
        }
1725
0
        LOG_WARNING("failed to get txn info")
1726
0
                .tag("err", err)
1727
0
                .tag("key", hex(info_key))
1728
0
                .tag("txn_id", txn_id);
1729
0
        return -1;
1730
0
    }
1731
2
    if (!txn_info.ParseFromString(info_val)) {
1732
0
        LOG_WARNING("failed to parse txn info")
1733
0
                .tag("err", err)
1734
0
                .tag("key", hex(info_key))
1735
0
                .tag("txn_id", txn_id);
1736
0
        return -1;
1737
0
    }
1738
1739
2
    if (txn_info.status() != TxnStatusPB::TXN_STATUS_PREPARED) {
1740
0
        LOG_INFO("txn is not prepared status, txn_id={} status={}", txn_id, txn_info.status())
1741
0
                .tag("key", hex(info_key))
1742
0
                .tag("txn_id", txn_id);
1743
0
        return 0;
1744
0
    }
1745
1746
2
    req.set_txn_id(txn_id);
1747
1748
2
    LOG(INFO) << "begin abort txn for related rowset, txn_id=" << txn_id
1749
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString();
1750
1751
2
    _abort_txn(instance_id_, &req, txn.get(), txn_info, ss, code, msg);
1752
2
    err = txn->commit();
1753
2
    if (err != TxnErrorCode::TXN_OK) {
1754
0
        code = cast_as<ErrCategory::COMMIT>(err);
1755
0
        ss << "failed to commit kv txn, txn_id=" << txn_info.txn_id() << " err=" << err;
1756
0
        msg = ss.str();
1757
0
        return -1;
1758
0
    }
1759
1760
2
    LOG(INFO) << "finish abort txn for related rowset, txn_id=" << txn_id
1761
2
              << " instance_id=" << instance_id_ << " txn_info=" << txn_info.ShortDebugString()
1762
2
              << " code=" << code << " msg=" << msg;
1763
1764
2
    return 0;
1765
2
}
1766
1767
4
int InstanceRecycler::abort_job_for_related_rowset(const RowsetMetaCloudPB& rowset_meta) {
1768
4
    FinishTabletJobRequest req;
1769
4
    FinishTabletJobResponse res;
1770
4
    req.set_action(FinishTabletJobRequest::ABORT);
1771
4
    MetaServiceCode code = MetaServiceCode::OK;
1772
4
    std::string msg;
1773
4
    std::stringstream ss;
1774
1775
4
    TabletIndexPB tablet_idx;
1776
4
    int ret = get_tablet_idx(txn_kv_.get(), instance_id_, rowset_meta.tablet_id(), tablet_idx);
1777
4
    if (ret == 1) {
1778
        // tablet maybe recycled, directly return 0
1779
1
        return 0;
1780
3
    } else if (ret != 0) {
1781
0
        LOG(WARNING) << "failed to get tablet index, tablet_id=" << rowset_meta.tablet_id()
1782
0
                     << " instance_id=" << instance_id_ << " ret=" << ret;
1783
0
        return ret;
1784
0
    }
1785
1786
3
    std::unique_ptr<Transaction> txn;
1787
3
    TxnErrorCode err = txn_kv_->create_txn(&txn);
1788
3
    if (err != TxnErrorCode::TXN_OK) {
1789
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id_ << " err=" << err;
1790
0
        return -1;
1791
0
    }
1792
1793
3
    std::string job_key =
1794
3
            job_tablet_key({instance_id_, tablet_idx.table_id(), tablet_idx.index_id(),
1795
3
                            tablet_idx.partition_id(), tablet_idx.tablet_id()});
1796
3
    std::string job_val;
1797
3
    err = txn->get(job_key, &job_val);
1798
3
    if (err != TxnErrorCode::TXN_OK) {
1799
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
1800
0
            LOG(INFO) << "job not exists, instance_id=" << instance_id_
1801
0
                      << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1802
0
            return 0;
1803
0
        }
1804
0
        LOG(WARNING) << "failed to get job, instance_id=" << instance_id_
1805
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " err=" << err
1806
0
                     << " key=" << hex(job_key);
1807
0
        return -1;
1808
0
    }
1809
1810
3
    TabletJobInfoPB job_pb;
1811
3
    if (!job_pb.ParseFromString(job_val)) {
1812
0
        LOG(WARNING) << "failed to parse job, instance_id=" << instance_id_
1813
0
                     << " tablet_id=" << tablet_idx.tablet_id() << " key=" << hex(job_key);
1814
0
        return -1;
1815
0
    }
1816
1817
3
    std::string job_id {};
1818
3
    if (!job_pb.compaction().empty()) {
1819
2
        for (const auto& c : job_pb.compaction()) {
1820
2
            if (c.id() == rowset_meta.job_id()) {
1821
2
                job_id = c.id();
1822
2
                break;
1823
2
            }
1824
2
        }
1825
2
    } else if (job_pb.has_schema_change()) {
1826
1
        job_id = job_pb.schema_change().id();
1827
1
    }
1828
1829
3
    if (!job_id.empty() && rowset_meta.job_id() == job_id) {
1830
3
        LOG(INFO) << "begin to abort job for related rowset, job_id=" << rowset_meta.job_id()
1831
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id();
1832
3
        req.mutable_job()->CopyFrom(job_pb);
1833
3
        req.set_action(FinishTabletJobRequest::ABORT);
1834
3
        _finish_tablet_job(&req, &res, instance_id_, txn, txn_kv_.get(),
1835
3
                           delete_bitmap_lock_white_list_.get(), resource_mgr_.get(), code, msg,
1836
3
                           ss);
1837
3
        if (code != MetaServiceCode::OK) {
1838
0
            LOG(WARNING) << "failed to abort job, instance_id=" << instance_id_
1839
0
                         << " tablet_id=" << tablet_idx.tablet_id() << " code=" << code
1840
0
                         << " msg=" << msg;
1841
0
            return -1;
1842
0
        }
1843
3
        LOG(INFO) << "finish abort job for related rowset, job_id=" << rowset_meta.job_id()
1844
3
                  << " instance_id=" << instance_id_ << " tablet_id=" << tablet_idx.tablet_id()
1845
3
                  << " code=" << code << " msg=" << msg;
1846
3
    } else {
1847
        // clang-format off
1848
0
        LOG(INFO) << "there is no job for related rowset, directly recycle rowset data"
1849
0
                  << ", instance_id=" << instance_id_ 
1850
0
                  << ", tablet_id=" << tablet_idx.tablet_id() 
1851
0
                  << ", job_id=" << job_id
1852
0
                  << ", rowset_id=" << rowset_meta.rowset_id_v2();
1853
        // clang-format on
1854
0
    }
1855
1856
3
    return 0;
1857
3
}
1858
1859
template <typename T>
1860
57.7k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1861
57.7k
    RowsetMetaCloudPB* rs_meta;
1862
57.7k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1863
1864
57.7k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1865
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1866
        // we do not need to check the job or txn state
1867
        // because tmp_rowset_key already exists when this key is generated.
1868
3.75k
        rowset_type = rowset_meta_pb.type();
1869
3.75k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1870
54.0k
    } else {
1871
54.0k
        rs_meta = &rowset_meta_pb;
1872
54.0k
    }
1873
1874
57.7k
    DCHECK(rs_meta != nullptr);
1875
1876
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1877
    // we need skip them because the related txn has been finished
1878
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1879
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1880
57.7k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1881
54.6k
        if (rs_meta->has_load_id()) {
1882
            // load
1883
2
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1884
54.6k
        } else if (rs_meta->has_job_id()) {
1885
            // compaction / schema change
1886
3
            return abort_job_for_related_rowset(*rs_meta);
1887
3
        }
1888
54.6k
    }
1889
1890
57.7k
    return 0;
1891
57.7k
}
_ZN5doris5cloud16InstanceRecycler28abort_txn_or_job_for_recycleINS0_15RecycleRowsetPBEEEiRT_
Line
Count
Source
1860
3.75k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1861
3.75k
    RowsetMetaCloudPB* rs_meta;
1862
3.75k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1863
1864
3.75k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1865
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1866
        // we do not need to check the job or txn state
1867
        // because tmp_rowset_key already exists when this key is generated.
1868
3.75k
        rowset_type = rowset_meta_pb.type();
1869
3.75k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1870
3.75k
    } else {
1871
3.75k
        rs_meta = &rowset_meta_pb;
1872
3.75k
    }
1873
1874
3.75k
    DCHECK(rs_meta != nullptr);
1875
1876
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1877
    // we need skip them because the related txn has been finished
1878
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1879
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1880
3.75k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1881
652
        if (rs_meta->has_load_id()) {
1882
            // load
1883
1
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1884
651
        } else if (rs_meta->has_job_id()) {
1885
            // compaction / schema change
1886
1
            return abort_job_for_related_rowset(*rs_meta);
1887
1
        }
1888
652
    }
1889
1890
3.75k
    return 0;
1891
3.75k
}
_ZN5doris5cloud16InstanceRecycler28abort_txn_or_job_for_recycleINS_17RowsetMetaCloudPBEEEiRT_
Line
Count
Source
1860
54.0k
int InstanceRecycler::abort_txn_or_job_for_recycle(T& rowset_meta_pb) {
1861
54.0k
    RowsetMetaCloudPB* rs_meta;
1862
54.0k
    RecycleRowsetPB::Type rowset_type = RecycleRowsetPB::PREPARE;
1863
1864
54.0k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1865
        // For keys that are not in the RecycleRowsetPB::PREPARE state
1866
        // we do not need to check the job or txn state
1867
        // because tmp_rowset_key already exists when this key is generated.
1868
54.0k
        rowset_type = rowset_meta_pb.type();
1869
54.0k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1870
54.0k
    } else {
1871
54.0k
        rs_meta = &rowset_meta_pb;
1872
54.0k
    }
1873
1874
54.0k
    DCHECK(rs_meta != nullptr);
1875
1876
    // compaction/sc will generate recycle_rowset_key for each input rowset with load_id
1877
    // we need skip them because the related txn has been finished
1878
    // load_rowset1 load_rowset2 => pick for compaction => compact_rowset
1879
    // compact_rowset1 compact_rowset2 => pick for compaction/sc job => new_rowset
1880
54.0k
    if (rowset_type == RecycleRowsetPB::PREPARE) {
1881
54.0k
        if (rs_meta->has_load_id()) {
1882
            // load
1883
1
            return abort_txn_for_related_rowset(rs_meta->txn_id());
1884
54.0k
        } else if (rs_meta->has_job_id()) {
1885
            // compaction / schema change
1886
2
            return abort_job_for_related_rowset(*rs_meta);
1887
2
        }
1888
54.0k
    }
1889
1890
54.0k
    return 0;
1891
54.0k
}
1892
1893
template <typename T>
1894
int mark_rowset_as_recycled(TxnKv* txn_kv, const std::string& instance_id, std::string_view key,
1895
113k
                            T& rowset_meta_pb) {
1896
113k
    RowsetMetaCloudPB* rs_meta;
1897
1898
113k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1899
106k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1900
106k
    } else {
1901
106k
        rs_meta = &rowset_meta_pb;
1902
106k
    }
1903
1904
113k
    bool need_write_back = false;
1905
113k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1906
55.7k
        need_write_back = true;
1907
55.7k
        rs_meta->set_is_recycled(true);
1908
55.7k
    }
1909
1910
113k
    if (need_write_back) {
1911
55.7k
        std::unique_ptr<Transaction> txn;
1912
55.7k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1913
55.7k
        if (err != TxnErrorCode::TXN_OK) {
1914
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1915
0
            return -1;
1916
0
        }
1917
        // double check becase of new transaction
1918
55.7k
        T rowset_meta;
1919
55.7k
        std::string val;
1920
55.7k
        err = txn->get(key, &val);
1921
55.7k
        if (!rowset_meta.ParseFromString(val)) {
1922
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1923
0
            return -1;
1924
0
        }
1925
55.7k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1926
52.0k
            rs_meta = rowset_meta.mutable_rowset_meta();
1927
52.0k
        } else {
1928
52.0k
            rs_meta = &rowset_meta;
1929
52.0k
        }
1930
55.7k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1931
0
            return 0;
1932
0
        }
1933
55.7k
        rs_meta->set_is_recycled(true);
1934
55.7k
        val.clear();
1935
55.7k
        rowset_meta.SerializeToString(&val);
1936
55.7k
        txn->put(key, val);
1937
55.7k
        err = txn->commit();
1938
55.7k
        if (err != TxnErrorCode::TXN_OK) {
1939
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1940
0
            return -1;
1941
0
        }
1942
55.7k
    }
1943
113k
    return need_write_back ? 1 : 0;
1944
113k
}
_ZN5doris5cloud23mark_rowset_as_recycledINS0_15RecycleRowsetPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS8_ERT_
Line
Count
Source
1895
7.50k
                            T& rowset_meta_pb) {
1896
7.50k
    RowsetMetaCloudPB* rs_meta;
1897
1898
7.50k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1899
7.50k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1900
7.50k
    } else {
1901
7.50k
        rs_meta = &rowset_meta_pb;
1902
7.50k
    }
1903
1904
7.50k
    bool need_write_back = false;
1905
7.50k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1906
3.75k
        need_write_back = true;
1907
3.75k
        rs_meta->set_is_recycled(true);
1908
3.75k
    }
1909
1910
7.50k
    if (need_write_back) {
1911
3.75k
        std::unique_ptr<Transaction> txn;
1912
3.75k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1913
3.75k
        if (err != TxnErrorCode::TXN_OK) {
1914
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1915
0
            return -1;
1916
0
        }
1917
        // double check becase of new transaction
1918
3.75k
        T rowset_meta;
1919
3.75k
        std::string val;
1920
3.75k
        err = txn->get(key, &val);
1921
3.75k
        if (!rowset_meta.ParseFromString(val)) {
1922
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1923
0
            return -1;
1924
0
        }
1925
3.75k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1926
3.75k
            rs_meta = rowset_meta.mutable_rowset_meta();
1927
3.75k
        } else {
1928
3.75k
            rs_meta = &rowset_meta;
1929
3.75k
        }
1930
3.75k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1931
0
            return 0;
1932
0
        }
1933
3.75k
        rs_meta->set_is_recycled(true);
1934
3.75k
        val.clear();
1935
3.75k
        rowset_meta.SerializeToString(&val);
1936
3.75k
        txn->put(key, val);
1937
3.75k
        err = txn->commit();
1938
3.75k
        if (err != TxnErrorCode::TXN_OK) {
1939
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1940
0
            return -1;
1941
0
        }
1942
3.75k
    }
1943
7.50k
    return need_write_back ? 1 : 0;
1944
7.50k
}
_ZN5doris5cloud23mark_rowset_as_recycledINS_17RowsetMetaCloudPBEEEiPNS0_5TxnKvERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS8_ERT_
Line
Count
Source
1895
106k
                            T& rowset_meta_pb) {
1896
106k
    RowsetMetaCloudPB* rs_meta;
1897
1898
106k
    if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1899
106k
        rs_meta = rowset_meta_pb.mutable_rowset_meta();
1900
106k
    } else {
1901
106k
        rs_meta = &rowset_meta_pb;
1902
106k
    }
1903
1904
106k
    bool need_write_back = false;
1905
106k
    if ((!rs_meta->has_is_recycled() || !rs_meta->is_recycled())) {
1906
52.0k
        need_write_back = true;
1907
52.0k
        rs_meta->set_is_recycled(true);
1908
52.0k
    }
1909
1910
106k
    if (need_write_back) {
1911
52.0k
        std::unique_ptr<Transaction> txn;
1912
52.0k
        TxnErrorCode err = txn_kv->create_txn(&txn);
1913
52.0k
        if (err != TxnErrorCode::TXN_OK) {
1914
0
            LOG(WARNING) << "failed to create txn, instance_id=" << instance_id;
1915
0
            return -1;
1916
0
        }
1917
        // double check becase of new transaction
1918
52.0k
        T rowset_meta;
1919
52.0k
        std::string val;
1920
52.0k
        err = txn->get(key, &val);
1921
52.0k
        if (!rowset_meta.ParseFromString(val)) {
1922
0
            LOG(WARNING) << "failed to parse rs_meta, instance_id=" << instance_id;
1923
0
            return -1;
1924
0
        }
1925
52.0k
        if constexpr (std::is_same_v<T, RecycleRowsetPB>) {
1926
52.0k
            rs_meta = rowset_meta.mutable_rowset_meta();
1927
52.0k
        } else {
1928
52.0k
            rs_meta = &rowset_meta;
1929
52.0k
        }
1930
52.0k
        if ((rs_meta->has_is_recycled() && rs_meta->is_recycled())) {
1931
0
            return 0;
1932
0
        }
1933
52.0k
        rs_meta->set_is_recycled(true);
1934
52.0k
        val.clear();
1935
52.0k
        rowset_meta.SerializeToString(&val);
1936
52.0k
        txn->put(key, val);
1937
52.0k
        err = txn->commit();
1938
52.0k
        if (err != TxnErrorCode::TXN_OK) {
1939
0
            LOG(WARNING) << "failed to commit txn, instance_id=" << instance_id;
1940
0
            return -1;
1941
0
        }
1942
52.0k
    }
1943
106k
    return need_write_back ? 1 : 0;
1944
106k
}
1945
1946
1
int InstanceRecycler::recycle_ref_rowsets(bool* has_unrecycled_rowsets) {
1947
1
    const std::string task_name = "recycle_ref_rowsets";
1948
1
    *has_unrecycled_rowsets = false;
1949
1950
1
    std::string data_rowset_ref_count_key_start =
1951
1
            versioned::data_rowset_ref_count_key({instance_id_, 0, ""});
1952
1
    std::string data_rowset_ref_count_key_end =
1953
1
            versioned::data_rowset_ref_count_key({instance_id_, INT64_MAX, ""});
1954
1955
1
    LOG_WARNING("begin to recycle ref rowsets").tag("instance_id", instance_id_);
1956
1957
1
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
1958
1
    register_recycle_task(task_name, start_time);
1959
1960
1
    DORIS_CLOUD_DEFER {
1961
1
        unregister_recycle_task(task_name);
1962
1
        int64_t cost =
1963
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1964
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
1965
1
                .tag("instance_id", instance_id_);
1966
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_0clEv
Line
Count
Source
1960
1
    DORIS_CLOUD_DEFER {
1961
1
        unregister_recycle_task(task_name);
1962
1
        int64_t cost =
1963
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
1964
1
        LOG_WARNING("recycle ref rowsets finished, cost={}s", cost)
1965
1
                .tag("instance_id", instance_id_);
1966
1
    };
1967
1968
    // Phase 1: Scan to collect all tablet_ids that have rowset ref counts
1969
1
    std::set<int64_t> tablets_with_refs;
1970
1
    int64_t num_scanned = 0;
1971
1972
1
    auto scan_func = [&](std::string_view k, std::string_view v) -> int {
1973
0
        ++num_scanned;
1974
0
        int64_t tablet_id;
1975
0
        std::string rowset_id;
1976
0
        std::string_view key(k);
1977
0
        if (!versioned::decode_data_rowset_ref_count_key(&key, &tablet_id, &rowset_id)) {
1978
0
            LOG_WARNING("failed to decode data rowset ref count key").tag("key", hex(k));
1979
0
            return 0; // Continue scanning
1980
0
        }
1981
1982
0
        tablets_with_refs.insert(tablet_id);
1983
0
        return 0;
1984
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_ref_rowsetsEPbENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES7_
1985
1986
1
    if (scan_and_recycle(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end,
1987
1
                         std::move(scan_func)) != 0) {
1988
0
        LOG_WARNING("failed to scan data rowset ref count keys");
1989
0
        return -1;
1990
0
    }
1991
1992
1
    LOG_INFO("collected {} tablets with rowset refs, scanned {} ref count keys",
1993
1
             tablets_with_refs.size(), num_scanned)
1994
1
            .tag("instance_id", instance_id_);
1995
1996
    // Phase 2: Recycle each tablet
1997
1
    int64_t num_recycled_tablets = 0;
1998
1
    for (int64_t tablet_id : tablets_with_refs) {
1999
0
        if (stopped()) {
2000
0
            LOG_INFO("recycler stopped, skip remaining tablets")
2001
0
                    .tag("instance_id", instance_id_)
2002
0
                    .tag("tablets_processed", num_recycled_tablets)
2003
0
                    .tag("tablets_remaining", tablets_with_refs.size() - num_recycled_tablets);
2004
0
            break;
2005
0
        }
2006
2007
0
        RecyclerMetricsContext metrics_context(instance_id_, task_name);
2008
0
        if (recycle_versioned_tablet(tablet_id, metrics_context) != 0) {
2009
0
            LOG_WARNING("failed to recycle tablet")
2010
0
                    .tag("instance_id", instance_id_)
2011
0
                    .tag("tablet_id", tablet_id);
2012
0
            return -1;
2013
0
        }
2014
0
        ++num_recycled_tablets;
2015
0
    }
2016
2017
1
    LOG_INFO("recycled {} tablets", num_recycled_tablets)
2018
1
            .tag("instance_id", instance_id_)
2019
1
            .tag("total_tablets", tablets_with_refs.size());
2020
2021
    // Phase 3: Scan again to check if any ref count keys still exist
2022
1
    std::unique_ptr<Transaction> txn;
2023
1
    TxnErrorCode err = txn_kv_->create_txn(&txn);
2024
1
    if (err != TxnErrorCode::TXN_OK) {
2025
0
        LOG_WARNING("failed to create txn for final check")
2026
0
                .tag("instance_id", instance_id_)
2027
0
                .tag("err", err);
2028
0
        return -1;
2029
0
    }
2030
2031
1
    std::unique_ptr<RangeGetIterator> iter;
2032
1
    err = txn->get(data_rowset_ref_count_key_start, data_rowset_ref_count_key_end, &iter, true);
2033
1
    if (err != TxnErrorCode::TXN_OK) {
2034
0
        LOG_WARNING("failed to create range iterator for final check")
2035
0
                .tag("instance_id", instance_id_)
2036
0
                .tag("err", err);
2037
0
        return -1;
2038
0
    }
2039
2040
1
    *has_unrecycled_rowsets = iter->has_next();
2041
1
    if (*has_unrecycled_rowsets) {
2042
0
        LOG_INFO("still has unrecycled rowsets after recycle_ref_rowsets")
2043
0
                .tag("instance_id", instance_id_);
2044
0
    }
2045
2046
1
    return 0;
2047
1
}
2048
2049
17
int InstanceRecycler::recycle_indexes() {
2050
17
    const std::string task_name = "recycle_indexes";
2051
17
    int64_t num_scanned = 0;
2052
17
    int64_t num_expired = 0;
2053
17
    int64_t num_recycled = 0;
2054
17
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2055
2056
17
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
2057
17
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
2058
17
    std::string index_key0;
2059
17
    std::string index_key1;
2060
17
    recycle_index_key(index_key_info0, &index_key0);
2061
17
    recycle_index_key(index_key_info1, &index_key1);
2062
2063
17
    LOG_WARNING("begin to recycle indexes").tag("instance_id", instance_id_);
2064
2065
17
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2066
17
    register_recycle_task(task_name, start_time);
2067
2068
17
    DORIS_CLOUD_DEFER {
2069
17
        unregister_recycle_task(task_name);
2070
17
        int64_t cost =
2071
17
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2072
17
        metrics_context.finish_report();
2073
17
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2074
17
                .tag("instance_id", instance_id_)
2075
17
                .tag("num_scanned", num_scanned)
2076
17
                .tag("num_expired", num_expired)
2077
17
                .tag("num_recycled", num_recycled);
2078
17
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2068
2
    DORIS_CLOUD_DEFER {
2069
2
        unregister_recycle_task(task_name);
2070
2
        int64_t cost =
2071
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2072
2
        metrics_context.finish_report();
2073
2
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2074
2
                .tag("instance_id", instance_id_)
2075
2
                .tag("num_scanned", num_scanned)
2076
2
                .tag("num_expired", num_expired)
2077
2
                .tag("num_recycled", num_recycled);
2078
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_0clEv
Line
Count
Source
2068
15
    DORIS_CLOUD_DEFER {
2069
15
        unregister_recycle_task(task_name);
2070
15
        int64_t cost =
2071
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2072
15
        metrics_context.finish_report();
2073
15
        LOG_WARNING("recycle indexes finished, cost={}s", cost)
2074
15
                .tag("instance_id", instance_id_)
2075
15
                .tag("num_scanned", num_scanned)
2076
15
                .tag("num_expired", num_expired)
2077
15
                .tag("num_recycled", num_recycled);
2078
15
    };
2079
2080
17
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2081
2082
    // Elements in `index_keys` has the same lifetime as `it` in `scan_and_recycle`
2083
17
    std::vector<std::string_view> index_keys;
2084
17
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2085
10
        ++num_scanned;
2086
10
        RecycleIndexPB index_pb;
2087
10
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2088
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2089
0
            return -1;
2090
0
        }
2091
10
        int64_t current_time = ::time(nullptr);
2092
10
        if (current_time <
2093
10
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2094
0
            return 0;
2095
0
        }
2096
10
        ++num_expired;
2097
        // decode index_id
2098
10
        auto k1 = k;
2099
10
        k1.remove_prefix(1);
2100
10
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2101
10
        decode_key(&k1, &out);
2102
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2103
10
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2104
10
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2105
10
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2106
10
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2107
        // Change state to RECYCLING
2108
10
        std::unique_ptr<Transaction> txn;
2109
10
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2110
10
        if (err != TxnErrorCode::TXN_OK) {
2111
0
            LOG_WARNING("failed to create txn").tag("err", err);
2112
0
            return -1;
2113
0
        }
2114
10
        std::string val;
2115
10
        err = txn->get(k, &val);
2116
10
        if (err ==
2117
10
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2118
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2119
0
            return 0;
2120
0
        }
2121
10
        if (err != TxnErrorCode::TXN_OK) {
2122
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2123
0
            return -1;
2124
0
        }
2125
10
        index_pb.Clear();
2126
10
        if (!index_pb.ParseFromString(val)) {
2127
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2128
0
            return -1;
2129
0
        }
2130
10
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2131
9
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2132
9
            txn->put(k, index_pb.SerializeAsString());
2133
9
            err = txn->commit();
2134
9
            if (err != TxnErrorCode::TXN_OK) {
2135
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2136
0
                return -1;
2137
0
            }
2138
9
        }
2139
10
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2140
1
            LOG_WARNING("failed to recycle tablets under index")
2141
1
                    .tag("table_id", index_pb.table_id())
2142
1
                    .tag("instance_id", instance_id_)
2143
1
                    .tag("index_id", index_id);
2144
1
            return -1;
2145
1
        }
2146
2147
9
        if (index_pb.has_db_id()) {
2148
            // Recycle the versioned keys
2149
3
            std::unique_ptr<Transaction> txn;
2150
3
            err = txn_kv_->create_txn(&txn);
2151
3
            if (err != TxnErrorCode::TXN_OK) {
2152
0
                LOG_WARNING("failed to create txn").tag("err", err);
2153
0
                return -1;
2154
0
            }
2155
3
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2156
3
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2157
3
            std::string index_inverted_key = versioned::index_inverted_key(
2158
3
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2159
3
            versioned_remove_all(txn.get(), meta_key);
2160
3
            txn->remove(index_key);
2161
3
            txn->remove(index_inverted_key);
2162
3
            err = txn->commit();
2163
3
            if (err != TxnErrorCode::TXN_OK) {
2164
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2165
0
                return -1;
2166
0
            }
2167
3
        }
2168
2169
9
        metrics_context.total_recycled_num = ++num_recycled;
2170
9
        metrics_context.report();
2171
9
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2172
9
        index_keys.push_back(k);
2173
9
        return 0;
2174
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2084
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2085
2
        ++num_scanned;
2086
2
        RecycleIndexPB index_pb;
2087
2
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2088
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2089
0
            return -1;
2090
0
        }
2091
2
        int64_t current_time = ::time(nullptr);
2092
2
        if (current_time <
2093
2
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2094
0
            return 0;
2095
0
        }
2096
2
        ++num_expired;
2097
        // decode index_id
2098
2
        auto k1 = k;
2099
2
        k1.remove_prefix(1);
2100
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2101
2
        decode_key(&k1, &out);
2102
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2103
2
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2104
2
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2105
2
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2106
2
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2107
        // Change state to RECYCLING
2108
2
        std::unique_ptr<Transaction> txn;
2109
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2110
2
        if (err != TxnErrorCode::TXN_OK) {
2111
0
            LOG_WARNING("failed to create txn").tag("err", err);
2112
0
            return -1;
2113
0
        }
2114
2
        std::string val;
2115
2
        err = txn->get(k, &val);
2116
2
        if (err ==
2117
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2118
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2119
0
            return 0;
2120
0
        }
2121
2
        if (err != TxnErrorCode::TXN_OK) {
2122
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2123
0
            return -1;
2124
0
        }
2125
2
        index_pb.Clear();
2126
2
        if (!index_pb.ParseFromString(val)) {
2127
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2128
0
            return -1;
2129
0
        }
2130
2
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2131
1
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2132
1
            txn->put(k, index_pb.SerializeAsString());
2133
1
            err = txn->commit();
2134
1
            if (err != TxnErrorCode::TXN_OK) {
2135
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2136
0
                return -1;
2137
0
            }
2138
1
        }
2139
2
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2140
1
            LOG_WARNING("failed to recycle tablets under index")
2141
1
                    .tag("table_id", index_pb.table_id())
2142
1
                    .tag("instance_id", instance_id_)
2143
1
                    .tag("index_id", index_id);
2144
1
            return -1;
2145
1
        }
2146
2147
1
        if (index_pb.has_db_id()) {
2148
            // Recycle the versioned keys
2149
1
            std::unique_ptr<Transaction> txn;
2150
1
            err = txn_kv_->create_txn(&txn);
2151
1
            if (err != TxnErrorCode::TXN_OK) {
2152
0
                LOG_WARNING("failed to create txn").tag("err", err);
2153
0
                return -1;
2154
0
            }
2155
1
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2156
1
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2157
1
            std::string index_inverted_key = versioned::index_inverted_key(
2158
1
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2159
1
            versioned_remove_all(txn.get(), meta_key);
2160
1
            txn->remove(index_key);
2161
1
            txn->remove(index_inverted_key);
2162
1
            err = txn->commit();
2163
1
            if (err != TxnErrorCode::TXN_OK) {
2164
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2165
0
                return -1;
2166
0
            }
2167
1
        }
2168
2169
1
        metrics_context.total_recycled_num = ++num_recycled;
2170
1
        metrics_context.report();
2171
1
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2172
1
        index_keys.push_back(k);
2173
1
        return 0;
2174
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2084
8
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2085
8
        ++num_scanned;
2086
8
        RecycleIndexPB index_pb;
2087
8
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
2088
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2089
0
            return -1;
2090
0
        }
2091
8
        int64_t current_time = ::time(nullptr);
2092
8
        if (current_time <
2093
8
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
2094
0
            return 0;
2095
0
        }
2096
8
        ++num_expired;
2097
        // decode index_id
2098
8
        auto k1 = k;
2099
8
        k1.remove_prefix(1);
2100
8
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2101
8
        decode_key(&k1, &out);
2102
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
2103
8
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
2104
8
        LOG(INFO) << "begin to recycle index, instance_id=" << instance_id_
2105
8
                  << " table_id=" << index_pb.table_id() << " index_id=" << index_id
2106
8
                  << " state=" << RecycleIndexPB::State_Name(index_pb.state());
2107
        // Change state to RECYCLING
2108
8
        std::unique_ptr<Transaction> txn;
2109
8
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2110
8
        if (err != TxnErrorCode::TXN_OK) {
2111
0
            LOG_WARNING("failed to create txn").tag("err", err);
2112
0
            return -1;
2113
0
        }
2114
8
        std::string val;
2115
8
        err = txn->get(k, &val);
2116
8
        if (err ==
2117
8
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2118
0
            LOG_INFO("index {} has been recycled or committed", index_id);
2119
0
            return 0;
2120
0
        }
2121
8
        if (err != TxnErrorCode::TXN_OK) {
2122
0
            LOG_WARNING("failed to get kv").tag("key", hex(k)).tag("err", err);
2123
0
            return -1;
2124
0
        }
2125
8
        index_pb.Clear();
2126
8
        if (!index_pb.ParseFromString(val)) {
2127
0
            LOG_WARNING("malformed recycle index value").tag("key", hex(k));
2128
0
            return -1;
2129
0
        }
2130
8
        if (index_pb.state() != RecycleIndexPB::RECYCLING) {
2131
8
            index_pb.set_state(RecycleIndexPB::RECYCLING);
2132
8
            txn->put(k, index_pb.SerializeAsString());
2133
8
            err = txn->commit();
2134
8
            if (err != TxnErrorCode::TXN_OK) {
2135
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2136
0
                return -1;
2137
0
            }
2138
8
        }
2139
8
        if (recycle_tablets(index_pb.table_id(), index_id, metrics_context) != 0) {
2140
0
            LOG_WARNING("failed to recycle tablets under index")
2141
0
                    .tag("table_id", index_pb.table_id())
2142
0
                    .tag("instance_id", instance_id_)
2143
0
                    .tag("index_id", index_id);
2144
0
            return -1;
2145
0
        }
2146
2147
8
        if (index_pb.has_db_id()) {
2148
            // Recycle the versioned keys
2149
2
            std::unique_ptr<Transaction> txn;
2150
2
            err = txn_kv_->create_txn(&txn);
2151
2
            if (err != TxnErrorCode::TXN_OK) {
2152
0
                LOG_WARNING("failed to create txn").tag("err", err);
2153
0
                return -1;
2154
0
            }
2155
2
            std::string meta_key = versioned::meta_index_key({instance_id_, index_id});
2156
2
            std::string index_key = versioned::index_index_key({instance_id_, index_id});
2157
2
            std::string index_inverted_key = versioned::index_inverted_key(
2158
2
                    {instance_id_, index_pb.db_id(), index_pb.table_id(), index_id});
2159
2
            versioned_remove_all(txn.get(), meta_key);
2160
2
            txn->remove(index_key);
2161
2
            txn->remove(index_inverted_key);
2162
2
            err = txn->commit();
2163
2
            if (err != TxnErrorCode::TXN_OK) {
2164
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2165
0
                return -1;
2166
0
            }
2167
2
        }
2168
2169
8
        metrics_context.total_recycled_num = ++num_recycled;
2170
8
        metrics_context.report();
2171
8
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2172
8
        index_keys.push_back(k);
2173
8
        return 0;
2174
8
    };
2175
2176
17
    auto loop_done = [&index_keys, this]() -> int {
2177
6
        if (index_keys.empty()) return 0;
2178
5
        DORIS_CLOUD_DEFER {
2179
5
            index_keys.clear();
2180
5
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2178
1
        DORIS_CLOUD_DEFER {
2179
1
            index_keys.clear();
2180
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2178
4
        DORIS_CLOUD_DEFER {
2179
4
            index_keys.clear();
2180
4
        };
2181
5
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2182
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2183
0
            return -1;
2184
0
        }
2185
5
        return 0;
2186
5
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2176
2
    auto loop_done = [&index_keys, this]() -> int {
2177
2
        if (index_keys.empty()) return 0;
2178
1
        DORIS_CLOUD_DEFER {
2179
1
            index_keys.clear();
2180
1
        };
2181
1
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2182
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2183
0
            return -1;
2184
0
        }
2185
1
        return 0;
2186
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_indexesEvENK3$_1clEv
Line
Count
Source
2176
4
    auto loop_done = [&index_keys, this]() -> int {
2177
4
        if (index_keys.empty()) return 0;
2178
4
        DORIS_CLOUD_DEFER {
2179
4
            index_keys.clear();
2180
4
        };
2181
4
        if (0 != txn_remove(txn_kv_.get(), index_keys)) {
2182
0
            LOG(WARNING) << "failed to delete recycle index kv, instance_id=" << instance_id_;
2183
0
            return -1;
2184
0
        }
2185
4
        return 0;
2186
4
    };
2187
2188
17
    if (config::enable_recycler_stats_metrics) {
2189
0
        scan_and_statistics_indexes();
2190
0
    }
2191
    // recycle_func and loop_done for scan and recycle
2192
17
    return scan_and_recycle(index_key0, index_key1, std::move(recycle_func), std::move(loop_done));
2193
17
}
2194
2195
bool check_lazy_txn_finished(std::shared_ptr<TxnKv> txn_kv, const std::string instance_id,
2196
8.24k
                             int64_t tablet_id) {
2197
8.24k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("check_lazy_txn_finished::bypass_check", true);
2198
2199
8.24k
    std::unique_ptr<Transaction> txn;
2200
8.24k
    TxnErrorCode err = txn_kv->create_txn(&txn);
2201
8.24k
    if (err != TxnErrorCode::TXN_OK) {
2202
0
        LOG(WARNING) << "failed to create txn, instance_id=" << instance_id
2203
0
                     << " tablet_id=" << tablet_id << " err=" << err;
2204
0
        return false;
2205
0
    }
2206
2207
8.24k
    std::string tablet_idx_key = meta_tablet_idx_key({instance_id, tablet_id});
2208
8.24k
    std::string tablet_idx_val;
2209
8.24k
    err = txn->get(tablet_idx_key, &tablet_idx_val);
2210
8.24k
    if (TxnErrorCode::TXN_OK != err) {
2211
0
        LOG(WARNING) << "failed to get tablet index, instance_id=" << instance_id
2212
0
                     << " tablet_id=" << tablet_id << " err=" << err
2213
0
                     << " key=" << hex(tablet_idx_key);
2214
0
        return false;
2215
0
    }
2216
2217
8.24k
    TabletIndexPB tablet_idx_pb;
2218
8.24k
    if (!tablet_idx_pb.ParseFromString(tablet_idx_val)) {
2219
0
        LOG(WARNING) << "failed to parse tablet_idx_pb, instance_id=" << instance_id
2220
0
                     << " tablet_id=" << tablet_id;
2221
0
        return false;
2222
0
    }
2223
2224
8.24k
    if (!tablet_idx_pb.has_db_id()) {
2225
        // In the previous version, the db_id was not set in the index_pb.
2226
        // If updating to the version which enable txn lazy commit, the db_id will be set.
2227
0
        LOG(INFO) << "txn index has no db_id, tablet_id=" << tablet_id
2228
0
                  << " instance_id=" << instance_id
2229
0
                  << " tablet_idx_pb=" << tablet_idx_pb.ShortDebugString();
2230
0
        return true;
2231
0
    }
2232
2233
8.24k
    std::string ver_val;
2234
8.24k
    std::string ver_key =
2235
8.24k
            partition_version_key({instance_id, tablet_idx_pb.db_id(), tablet_idx_pb.table_id(),
2236
8.24k
                                   tablet_idx_pb.partition_id()});
2237
8.24k
    err = txn->get(ver_key, &ver_val);
2238
2239
8.24k
    if (TxnErrorCode::TXN_KEY_NOT_FOUND == err) {
2240
204
        LOG(INFO) << ""
2241
204
                     "partition version not found, instance_id="
2242
204
                  << instance_id << " db_id=" << tablet_idx_pb.db_id()
2243
204
                  << " table_id=" << tablet_idx_pb.table_id()
2244
204
                  << " partition_id=" << tablet_idx_pb.partition_id() << " tablet_id=" << tablet_id
2245
204
                  << " key=" << hex(ver_key);
2246
204
        return true;
2247
204
    }
2248
2249
8.03k
    if (TxnErrorCode::TXN_OK != err) {
2250
0
        LOG(WARNING) << "failed to get partition version, instance_id=" << instance_id
2251
0
                     << " db_id=" << tablet_idx_pb.db_id()
2252
0
                     << " table_id=" << tablet_idx_pb.table_id()
2253
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2254
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key) << " err=" << err;
2255
0
        return false;
2256
0
    }
2257
2258
8.03k
    VersionPB version_pb;
2259
8.03k
    if (!version_pb.ParseFromString(ver_val)) {
2260
0
        LOG(WARNING) << "failed to parse version_pb, instance_id=" << instance_id
2261
0
                     << " db_id=" << tablet_idx_pb.db_id()
2262
0
                     << " table_id=" << tablet_idx_pb.table_id()
2263
0
                     << " partition_id=" << tablet_idx_pb.partition_id()
2264
0
                     << " tablet_id=" << tablet_id << " key=" << hex(ver_key);
2265
0
        return false;
2266
0
    }
2267
2268
8.03k
    if (version_pb.pending_txn_ids_size() > 0) {
2269
4.00k
        TEST_SYNC_POINT_CALLBACK("check_lazy_txn_finished::txn_not_finished");
2270
4.00k
        DCHECK(version_pb.pending_txn_ids_size() == 1);
2271
4.00k
        LOG(WARNING) << "lazy txn not finished, instance_id=" << instance_id
2272
4.00k
                     << " db_id=" << tablet_idx_pb.db_id()
2273
4.00k
                     << " table_id=" << tablet_idx_pb.table_id()
2274
4.00k
                     << " partition_id=" << tablet_idx_pb.partition_id()
2275
4.00k
                     << " tablet_id=" << tablet_id << " txn_id=" << version_pb.pending_txn_ids(0)
2276
4.00k
                     << " key=" << hex(ver_key);
2277
4.00k
        return false;
2278
4.00k
    }
2279
4.03k
    return true;
2280
8.03k
}
2281
2282
15
int InstanceRecycler::recycle_partitions() {
2283
15
    const std::string task_name = "recycle_partitions";
2284
15
    int64_t num_scanned = 0;
2285
15
    int64_t num_expired = 0;
2286
15
    int64_t num_recycled = 0;
2287
15
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
2288
2289
15
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
2290
15
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
2291
15
    std::string part_key0;
2292
15
    std::string part_key1;
2293
15
    recycle_partition_key(part_key_info0, &part_key0);
2294
15
    recycle_partition_key(part_key_info1, &part_key1);
2295
2296
15
    LOG_WARNING("begin to recycle partitions").tag("instance_id", instance_id_);
2297
2298
15
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
2299
15
    register_recycle_task(task_name, start_time);
2300
2301
15
    DORIS_CLOUD_DEFER {
2302
15
        unregister_recycle_task(task_name);
2303
15
        int64_t cost =
2304
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2305
15
        metrics_context.finish_report();
2306
15
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2307
15
                .tag("instance_id", instance_id_)
2308
15
                .tag("num_scanned", num_scanned)
2309
15
                .tag("num_expired", num_expired)
2310
15
                .tag("num_recycled", num_recycled);
2311
15
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2301
2
    DORIS_CLOUD_DEFER {
2302
2
        unregister_recycle_task(task_name);
2303
2
        int64_t cost =
2304
2
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2305
2
        metrics_context.finish_report();
2306
2
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2307
2
                .tag("instance_id", instance_id_)
2308
2
                .tag("num_scanned", num_scanned)
2309
2
                .tag("num_expired", num_expired)
2310
2
                .tag("num_recycled", num_recycled);
2311
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_0clEv
Line
Count
Source
2301
13
    DORIS_CLOUD_DEFER {
2302
13
        unregister_recycle_task(task_name);
2303
13
        int64_t cost =
2304
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
2305
13
        metrics_context.finish_report();
2306
13
        LOG_WARNING("recycle partitions finished, cost={}s", cost)
2307
13
                .tag("instance_id", instance_id_)
2308
13
                .tag("num_scanned", num_scanned)
2309
13
                .tag("num_expired", num_expired)
2310
13
                .tag("num_recycled", num_recycled);
2311
13
    };
2312
2313
15
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
2314
2315
    // Elements in `partition_keys` has the same lifetime as `it` in `scan_and_recycle`
2316
15
    std::vector<std::string_view> partition_keys;
2317
15
    std::vector<std::string> partition_version_keys;
2318
15
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2319
9
        ++num_scanned;
2320
9
        RecyclePartitionPB part_pb;
2321
9
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2322
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2323
0
            return -1;
2324
0
        }
2325
9
        int64_t current_time = ::time(nullptr);
2326
9
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2327
9
                                                            &earlest_ts)) { // not expired
2328
0
            return 0;
2329
0
        }
2330
9
        ++num_expired;
2331
        // decode partition_id
2332
9
        auto k1 = k;
2333
9
        k1.remove_prefix(1);
2334
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2335
9
        decode_key(&k1, &out);
2336
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2337
9
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2338
9
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2339
9
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2340
9
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2341
        // Change state to RECYCLING
2342
9
        std::unique_ptr<Transaction> txn;
2343
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2344
9
        if (err != TxnErrorCode::TXN_OK) {
2345
0
            LOG_WARNING("failed to create txn").tag("err", err);
2346
0
            return -1;
2347
0
        }
2348
9
        std::string val;
2349
9
        err = txn->get(k, &val);
2350
9
        if (err ==
2351
9
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2352
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2353
0
            return 0;
2354
0
        }
2355
9
        if (err != TxnErrorCode::TXN_OK) {
2356
0
            LOG_WARNING("failed to get kv");
2357
0
            return -1;
2358
0
        }
2359
9
        part_pb.Clear();
2360
9
        if (!part_pb.ParseFromString(val)) {
2361
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2362
0
            return -1;
2363
0
        }
2364
        // Partitions with PREPARED state MUST have no data
2365
9
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2366
8
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2367
8
            txn->put(k, part_pb.SerializeAsString());
2368
8
            err = txn->commit();
2369
8
            if (err != TxnErrorCode::TXN_OK) {
2370
0
                LOG_WARNING("failed to commit txn: {}", err);
2371
0
                return -1;
2372
0
            }
2373
8
        }
2374
2375
9
        int ret = 0;
2376
33
        for (int64_t index_id : part_pb.index_id()) {
2377
33
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2378
1
                LOG_WARNING("failed to recycle tablets under partition")
2379
1
                        .tag("table_id", part_pb.table_id())
2380
1
                        .tag("instance_id", instance_id_)
2381
1
                        .tag("index_id", index_id)
2382
1
                        .tag("partition_id", partition_id);
2383
1
                ret = -1;
2384
1
            }
2385
33
        }
2386
9
        if (ret == 0 && part_pb.has_db_id()) {
2387
            // Recycle the versioned keys
2388
8
            std::unique_ptr<Transaction> txn;
2389
8
            err = txn_kv_->create_txn(&txn);
2390
8
            if (err != TxnErrorCode::TXN_OK) {
2391
0
                LOG_WARNING("failed to create txn").tag("err", err);
2392
0
                return -1;
2393
0
            }
2394
8
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2395
8
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2396
8
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2397
8
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2398
8
            std::string partition_version_key =
2399
8
                    versioned::partition_version_key({instance_id_, partition_id});
2400
8
            versioned_remove_all(txn.get(), meta_key);
2401
8
            txn->remove(index_key);
2402
8
            txn->remove(inverted_index_key);
2403
8
            versioned_remove_all(txn.get(), partition_version_key);
2404
8
            err = txn->commit();
2405
8
            if (err != TxnErrorCode::TXN_OK) {
2406
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2407
0
                return -1;
2408
0
            }
2409
8
        }
2410
2411
9
        if (ret == 0) {
2412
8
            ++num_recycled;
2413
8
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2414
8
            partition_keys.push_back(k);
2415
8
            if (part_pb.db_id() > 0) {
2416
8
                partition_version_keys.push_back(partition_version_key(
2417
8
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2418
8
            }
2419
8
            metrics_context.total_recycled_num = num_recycled;
2420
8
            metrics_context.report();
2421
8
        }
2422
9
        return ret;
2423
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2318
2
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2319
2
        ++num_scanned;
2320
2
        RecyclePartitionPB part_pb;
2321
2
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2322
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2323
0
            return -1;
2324
0
        }
2325
2
        int64_t current_time = ::time(nullptr);
2326
2
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2327
2
                                                            &earlest_ts)) { // not expired
2328
0
            return 0;
2329
0
        }
2330
2
        ++num_expired;
2331
        // decode partition_id
2332
2
        auto k1 = k;
2333
2
        k1.remove_prefix(1);
2334
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2335
2
        decode_key(&k1, &out);
2336
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2337
2
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2338
2
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2339
2
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2340
2
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2341
        // Change state to RECYCLING
2342
2
        std::unique_ptr<Transaction> txn;
2343
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2344
2
        if (err != TxnErrorCode::TXN_OK) {
2345
0
            LOG_WARNING("failed to create txn").tag("err", err);
2346
0
            return -1;
2347
0
        }
2348
2
        std::string val;
2349
2
        err = txn->get(k, &val);
2350
2
        if (err ==
2351
2
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2352
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2353
0
            return 0;
2354
0
        }
2355
2
        if (err != TxnErrorCode::TXN_OK) {
2356
0
            LOG_WARNING("failed to get kv");
2357
0
            return -1;
2358
0
        }
2359
2
        part_pb.Clear();
2360
2
        if (!part_pb.ParseFromString(val)) {
2361
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2362
0
            return -1;
2363
0
        }
2364
        // Partitions with PREPARED state MUST have no data
2365
2
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2366
1
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2367
1
            txn->put(k, part_pb.SerializeAsString());
2368
1
            err = txn->commit();
2369
1
            if (err != TxnErrorCode::TXN_OK) {
2370
0
                LOG_WARNING("failed to commit txn: {}", err);
2371
0
                return -1;
2372
0
            }
2373
1
        }
2374
2375
2
        int ret = 0;
2376
2
        for (int64_t index_id : part_pb.index_id()) {
2377
2
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2378
1
                LOG_WARNING("failed to recycle tablets under partition")
2379
1
                        .tag("table_id", part_pb.table_id())
2380
1
                        .tag("instance_id", instance_id_)
2381
1
                        .tag("index_id", index_id)
2382
1
                        .tag("partition_id", partition_id);
2383
1
                ret = -1;
2384
1
            }
2385
2
        }
2386
2
        if (ret == 0 && part_pb.has_db_id()) {
2387
            // Recycle the versioned keys
2388
1
            std::unique_ptr<Transaction> txn;
2389
1
            err = txn_kv_->create_txn(&txn);
2390
1
            if (err != TxnErrorCode::TXN_OK) {
2391
0
                LOG_WARNING("failed to create txn").tag("err", err);
2392
0
                return -1;
2393
0
            }
2394
1
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2395
1
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2396
1
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2397
1
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2398
1
            std::string partition_version_key =
2399
1
                    versioned::partition_version_key({instance_id_, partition_id});
2400
1
            versioned_remove_all(txn.get(), meta_key);
2401
1
            txn->remove(index_key);
2402
1
            txn->remove(inverted_index_key);
2403
1
            versioned_remove_all(txn.get(), partition_version_key);
2404
1
            err = txn->commit();
2405
1
            if (err != TxnErrorCode::TXN_OK) {
2406
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2407
0
                return -1;
2408
0
            }
2409
1
        }
2410
2411
2
        if (ret == 0) {
2412
1
            ++num_recycled;
2413
1
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2414
1
            partition_keys.push_back(k);
2415
1
            if (part_pb.db_id() > 0) {
2416
1
                partition_version_keys.push_back(partition_version_key(
2417
1
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2418
1
            }
2419
1
            metrics_context.total_recycled_num = num_recycled;
2420
1
            metrics_context.report();
2421
1
        }
2422
2
        return ret;
2423
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2318
7
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2319
7
        ++num_scanned;
2320
7
        RecyclePartitionPB part_pb;
2321
7
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
2322
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2323
0
            return -1;
2324
0
        }
2325
7
        int64_t current_time = ::time(nullptr);
2326
7
        if (current_time < calculate_partition_expired_time(instance_id_, part_pb,
2327
7
                                                            &earlest_ts)) { // not expired
2328
0
            return 0;
2329
0
        }
2330
7
        ++num_expired;
2331
        // decode partition_id
2332
7
        auto k1 = k;
2333
7
        k1.remove_prefix(1);
2334
7
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2335
7
        decode_key(&k1, &out);
2336
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
2337
7
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
2338
7
        LOG(INFO) << "begin to recycle partition, instance_id=" << instance_id_
2339
7
                  << " table_id=" << part_pb.table_id() << " partition_id=" << partition_id
2340
7
                  << " state=" << RecyclePartitionPB::State_Name(part_pb.state());
2341
        // Change state to RECYCLING
2342
7
        std::unique_ptr<Transaction> txn;
2343
7
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2344
7
        if (err != TxnErrorCode::TXN_OK) {
2345
0
            LOG_WARNING("failed to create txn").tag("err", err);
2346
0
            return -1;
2347
0
        }
2348
7
        std::string val;
2349
7
        err = txn->get(k, &val);
2350
7
        if (err ==
2351
7
            TxnErrorCode::TXN_KEY_NOT_FOUND) { // UNKNOWN, maybe recycled or committed, skip it
2352
0
            LOG_INFO("partition {} has been recycled or committed", partition_id);
2353
0
            return 0;
2354
0
        }
2355
7
        if (err != TxnErrorCode::TXN_OK) {
2356
0
            LOG_WARNING("failed to get kv");
2357
0
            return -1;
2358
0
        }
2359
7
        part_pb.Clear();
2360
7
        if (!part_pb.ParseFromString(val)) {
2361
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
2362
0
            return -1;
2363
0
        }
2364
        // Partitions with PREPARED state MUST have no data
2365
7
        if (part_pb.state() != RecyclePartitionPB::RECYCLING) {
2366
7
            part_pb.set_state(RecyclePartitionPB::RECYCLING);
2367
7
            txn->put(k, part_pb.SerializeAsString());
2368
7
            err = txn->commit();
2369
7
            if (err != TxnErrorCode::TXN_OK) {
2370
0
                LOG_WARNING("failed to commit txn: {}", err);
2371
0
                return -1;
2372
0
            }
2373
7
        }
2374
2375
7
        int ret = 0;
2376
31
        for (int64_t index_id : part_pb.index_id()) {
2377
31
            if (recycle_tablets(part_pb.table_id(), index_id, metrics_context, partition_id) != 0) {
2378
0
                LOG_WARNING("failed to recycle tablets under partition")
2379
0
                        .tag("table_id", part_pb.table_id())
2380
0
                        .tag("instance_id", instance_id_)
2381
0
                        .tag("index_id", index_id)
2382
0
                        .tag("partition_id", partition_id);
2383
0
                ret = -1;
2384
0
            }
2385
31
        }
2386
7
        if (ret == 0 && part_pb.has_db_id()) {
2387
            // Recycle the versioned keys
2388
7
            std::unique_ptr<Transaction> txn;
2389
7
            err = txn_kv_->create_txn(&txn);
2390
7
            if (err != TxnErrorCode::TXN_OK) {
2391
0
                LOG_WARNING("failed to create txn").tag("err", err);
2392
0
                return -1;
2393
0
            }
2394
7
            std::string meta_key = versioned::meta_partition_key({instance_id_, partition_id});
2395
7
            std::string index_key = versioned::partition_index_key({instance_id_, partition_id});
2396
7
            std::string inverted_index_key = versioned::partition_inverted_index_key(
2397
7
                    {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id});
2398
7
            std::string partition_version_key =
2399
7
                    versioned::partition_version_key({instance_id_, partition_id});
2400
7
            versioned_remove_all(txn.get(), meta_key);
2401
7
            txn->remove(index_key);
2402
7
            txn->remove(inverted_index_key);
2403
7
            versioned_remove_all(txn.get(), partition_version_key);
2404
7
            err = txn->commit();
2405
7
            if (err != TxnErrorCode::TXN_OK) {
2406
0
                LOG_WARNING("failed to commit txn").tag("err", err);
2407
0
                return -1;
2408
0
            }
2409
7
        }
2410
2411
7
        if (ret == 0) {
2412
7
            ++num_recycled;
2413
7
            check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
2414
7
            partition_keys.push_back(k);
2415
7
            if (part_pb.db_id() > 0) {
2416
7
                partition_version_keys.push_back(partition_version_key(
2417
7
                        {instance_id_, part_pb.db_id(), part_pb.table_id(), partition_id}));
2418
7
            }
2419
7
            metrics_context.total_recycled_num = num_recycled;
2420
7
            metrics_context.report();
2421
7
        }
2422
7
        return ret;
2423
7
    };
2424
2425
15
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2426
5
        if (partition_keys.empty()) return 0;
2427
4
        DORIS_CLOUD_DEFER {
2428
4
            partition_keys.clear();
2429
4
            partition_version_keys.clear();
2430
4
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2427
1
        DORIS_CLOUD_DEFER {
2428
1
            partition_keys.clear();
2429
1
            partition_version_keys.clear();
2430
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2427
3
        DORIS_CLOUD_DEFER {
2428
3
            partition_keys.clear();
2429
3
            partition_version_keys.clear();
2430
3
        };
2431
4
        std::unique_ptr<Transaction> txn;
2432
4
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2433
4
        if (err != TxnErrorCode::TXN_OK) {
2434
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2435
0
            return -1;
2436
0
        }
2437
8
        for (auto& k : partition_keys) {
2438
8
            txn->remove(k);
2439
8
        }
2440
8
        for (auto& k : partition_version_keys) {
2441
8
            txn->remove(k);
2442
8
        }
2443
4
        err = txn->commit();
2444
4
        if (err != TxnErrorCode::TXN_OK) {
2445
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2446
0
                         << " err=" << err;
2447
0
            return -1;
2448
0
        }
2449
4
        return 0;
2450
4
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2425
2
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2426
2
        if (partition_keys.empty()) return 0;
2427
1
        DORIS_CLOUD_DEFER {
2428
1
            partition_keys.clear();
2429
1
            partition_version_keys.clear();
2430
1
        };
2431
1
        std::unique_ptr<Transaction> txn;
2432
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2433
1
        if (err != TxnErrorCode::TXN_OK) {
2434
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2435
0
            return -1;
2436
0
        }
2437
1
        for (auto& k : partition_keys) {
2438
1
            txn->remove(k);
2439
1
        }
2440
1
        for (auto& k : partition_version_keys) {
2441
1
            txn->remove(k);
2442
1
        }
2443
1
        err = txn->commit();
2444
1
        if (err != TxnErrorCode::TXN_OK) {
2445
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2446
0
                         << " err=" << err;
2447
0
            return -1;
2448
0
        }
2449
1
        return 0;
2450
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18recycle_partitionsEvENK3$_1clEv
Line
Count
Source
2425
3
    auto loop_done = [&partition_keys, &partition_version_keys, this]() -> int {
2426
3
        if (partition_keys.empty()) return 0;
2427
3
        DORIS_CLOUD_DEFER {
2428
3
            partition_keys.clear();
2429
3
            partition_version_keys.clear();
2430
3
        };
2431
3
        std::unique_ptr<Transaction> txn;
2432
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2433
3
        if (err != TxnErrorCode::TXN_OK) {
2434
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
2435
0
            return -1;
2436
0
        }
2437
7
        for (auto& k : partition_keys) {
2438
7
            txn->remove(k);
2439
7
        }
2440
7
        for (auto& k : partition_version_keys) {
2441
7
            txn->remove(k);
2442
7
        }
2443
3
        err = txn->commit();
2444
3
        if (err != TxnErrorCode::TXN_OK) {
2445
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_
2446
0
                         << " err=" << err;
2447
0
            return -1;
2448
0
        }
2449
3
        return 0;
2450
3
    };
2451
2452
15
    if (config::enable_recycler_stats_metrics) {
2453
0
        scan_and_statistics_partitions();
2454
0
    }
2455
    // recycle_func and loop_done for scan and recycle
2456
15
    return scan_and_recycle(part_key0, part_key1, std::move(recycle_func), std::move(loop_done));
2457
15
}
2458
2459
14
int InstanceRecycler::recycle_versions() {
2460
14
    if (should_recycle_versioned_keys()) {
2461
2
        return recycle_orphan_partitions();
2462
2
    }
2463
2464
12
    int64_t num_scanned = 0;
2465
12
    int64_t num_recycled = 0;
2466
12
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
2467
2468
12
    LOG_WARNING("begin to recycle table and partition versions").tag("instance_id", instance_id_);
2469
2470
12
    auto start_time = steady_clock::now();
2471
2472
12
    DORIS_CLOUD_DEFER {
2473
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2474
12
        metrics_context.finish_report();
2475
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2476
12
                .tag("instance_id", instance_id_)
2477
12
                .tag("num_scanned", num_scanned)
2478
12
                .tag("num_recycled", num_recycled);
2479
12
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_0clEv
Line
Count
Source
2472
12
    DORIS_CLOUD_DEFER {
2473
12
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2474
12
        metrics_context.finish_report();
2475
12
        LOG_WARNING("recycle table and partition versions finished, cost={}s", cost)
2476
12
                .tag("instance_id", instance_id_)
2477
12
                .tag("num_scanned", num_scanned)
2478
12
                .tag("num_recycled", num_recycled);
2479
12
    };
2480
2481
12
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
2482
12
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
2483
12
    int64_t last_scanned_table_id = 0;
2484
12
    bool is_recycled = false; // Is last scanned kv recycled
2485
12
    auto recycle_func = [&num_scanned, &num_recycled, &last_scanned_table_id, &is_recycled,
2486
12
                         &metrics_context, this](std::string_view k, std::string_view) {
2487
2
        ++num_scanned;
2488
2
        auto k1 = k;
2489
2
        k1.remove_prefix(1);
2490
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2491
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2492
2
        decode_key(&k1, &out);
2493
2
        DCHECK_EQ(out.size(), 6) << k;
2494
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2495
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2496
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2497
0
            return 0;
2498
0
        }
2499
2
        last_scanned_table_id = table_id;
2500
2
        is_recycled = false;
2501
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2502
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2503
2
        std::unique_ptr<Transaction> txn;
2504
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2505
2
        if (err != TxnErrorCode::TXN_OK) {
2506
0
            return -1;
2507
0
        }
2508
2
        std::unique_ptr<RangeGetIterator> iter;
2509
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2510
2
        if (err != TxnErrorCode::TXN_OK) {
2511
0
            return -1;
2512
0
        }
2513
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2514
1
            return 0;
2515
1
        }
2516
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2517
        // 1. Remove all partition version kvs of this table
2518
1
        auto partition_version_key_begin =
2519
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2520
1
        auto partition_version_key_end =
2521
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2522
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2523
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2524
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2525
1
                     << " table_id=" << table_id;
2526
        // 2. Remove the table version kv of this table
2527
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2528
1
        txn->remove(tbl_version_key);
2529
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2530
        // 3. Remove mow delete bitmap update lock and tablet job lock
2531
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2532
1
        txn->remove(lock_key);
2533
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2534
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2535
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2536
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2537
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2538
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2539
1
                     << " table_id=" << table_id;
2540
1
        err = txn->commit();
2541
1
        if (err != TxnErrorCode::TXN_OK) {
2542
0
            return -1;
2543
0
        }
2544
1
        metrics_context.total_recycled_num = ++num_recycled;
2545
1
        metrics_context.report();
2546
1
        is_recycled = true;
2547
1
        return 0;
2548
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16recycle_versionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2486
2
                         &metrics_context, this](std::string_view k, std::string_view) {
2487
2
        ++num_scanned;
2488
2
        auto k1 = k;
2489
2
        k1.remove_prefix(1);
2490
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
2491
2
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
2492
2
        decode_key(&k1, &out);
2493
2
        DCHECK_EQ(out.size(), 6) << k;
2494
2
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
2495
2
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
2496
0
            num_recycled += is_recycled;         // Version kv of this table has been recycled
2497
0
            return 0;
2498
0
        }
2499
2
        last_scanned_table_id = table_id;
2500
2
        is_recycled = false;
2501
2
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
2502
2
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
2503
2
        std::unique_ptr<Transaction> txn;
2504
2
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2505
2
        if (err != TxnErrorCode::TXN_OK) {
2506
0
            return -1;
2507
0
        }
2508
2
        std::unique_ptr<RangeGetIterator> iter;
2509
2
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
2510
2
        if (err != TxnErrorCode::TXN_OK) {
2511
0
            return -1;
2512
0
        }
2513
2
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
2514
1
            return 0;
2515
1
        }
2516
1
        auto db_id = std::get<int64_t>(std::get<0>(out[3]));
2517
        // 1. Remove all partition version kvs of this table
2518
1
        auto partition_version_key_begin =
2519
1
                partition_version_key({instance_id_, db_id, table_id, 0});
2520
1
        auto partition_version_key_end =
2521
1
                partition_version_key({instance_id_, db_id, table_id, INT64_MAX});
2522
1
        txn->remove(partition_version_key_begin, partition_version_key_end);
2523
1
        LOG(WARNING) << "remove partition version kv, begin=" << hex(partition_version_key_begin)
2524
1
                     << " end=" << hex(partition_version_key_end) << " db_id=" << db_id
2525
1
                     << " table_id=" << table_id;
2526
        // 2. Remove the table version kv of this table
2527
1
        auto tbl_version_key = table_version_key({instance_id_, db_id, table_id});
2528
1
        txn->remove(tbl_version_key);
2529
1
        LOG(WARNING) << "remove table version kv " << hex(tbl_version_key);
2530
        // 3. Remove mow delete bitmap update lock and tablet job lock
2531
1
        std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2532
1
        txn->remove(lock_key);
2533
1
        LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2534
1
        std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2535
1
        std::string tablet_job_key_end = mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2536
1
        txn->remove(tablet_job_key_begin, tablet_job_key_end);
2537
1
        LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2538
1
                     << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2539
1
                     << " table_id=" << table_id;
2540
1
        err = txn->commit();
2541
1
        if (err != TxnErrorCode::TXN_OK) {
2542
0
            return -1;
2543
0
        }
2544
1
        metrics_context.total_recycled_num = ++num_recycled;
2545
1
        metrics_context.report();
2546
1
        is_recycled = true;
2547
1
        return 0;
2548
1
    };
2549
2550
12
    if (config::enable_recycler_stats_metrics) {
2551
0
        scan_and_statistics_versions();
2552
0
    }
2553
    // recycle_func and loop_done for scan and recycle
2554
12
    return scan_and_recycle(version_key_begin, version_key_end, std::move(recycle_func));
2555
14
}
2556
2557
3
int InstanceRecycler::recycle_orphan_partitions() {
2558
3
    int64_t num_scanned = 0;
2559
3
    int64_t num_recycled = 0;
2560
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_orphan_partitions");
2561
2562
3
    LOG_WARNING("begin to recycle orphan table and partition versions")
2563
3
            .tag("instance_id", instance_id_);
2564
2565
3
    auto start_time = steady_clock::now();
2566
2567
3
    DORIS_CLOUD_DEFER {
2568
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2569
3
        metrics_context.finish_report();
2570
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2571
3
                .tag("instance_id", instance_id_)
2572
3
                .tag("num_scanned", num_scanned)
2573
3
                .tag("num_recycled", num_recycled);
2574
3
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_0clEv
Line
Count
Source
2567
3
    DORIS_CLOUD_DEFER {
2568
3
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2569
3
        metrics_context.finish_report();
2570
3
        LOG_WARNING("recycle orphan table and partition versions finished, cost={}s", cost)
2571
3
                .tag("instance_id", instance_id_)
2572
3
                .tag("num_scanned", num_scanned)
2573
3
                .tag("num_recycled", num_recycled);
2574
3
    };
2575
2576
3
    bool is_empty_table = false;        // whether the table has no indexes
2577
3
    bool is_table_kvs_recycled = false; // whether the table related kvs have been recycled
2578
3
    int64_t current_table_id = 0;       // current scanning table id
2579
3
    auto recycle_func = [&num_scanned, &num_recycled, &metrics_context, &is_empty_table,
2580
3
                         &current_table_id, &is_table_kvs_recycled,
2581
3
                         this](std::string_view k, std::string_view) {
2582
2
        ++num_scanned;
2583
2584
2
        std::string_view k1(k);
2585
2
        int64_t db_id, table_id, partition_id;
2586
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2587
2
                                                            &partition_id)) {
2588
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2589
0
            return -1;
2590
2
        } else if (table_id != current_table_id) {
2591
2
            current_table_id = table_id;
2592
2
            is_table_kvs_recycled = false;
2593
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2594
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2595
2
            if (err != TxnErrorCode::TXN_OK) {
2596
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2597
0
                             << " table_id=" << table_id << " err=" << err;
2598
0
                return -1;
2599
0
            }
2600
2
        }
2601
2602
2
        if (!is_empty_table) {
2603
            // table is not empty, skip recycle
2604
1
            return 0;
2605
1
        }
2606
2607
1
        std::unique_ptr<Transaction> txn;
2608
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2609
1
        if (err != TxnErrorCode::TXN_OK) {
2610
0
            return -1;
2611
0
        }
2612
2613
        // 1. Remove all partition related kvs
2614
1
        std::string partition_meta_key =
2615
1
                versioned::meta_partition_key({instance_id_, partition_id});
2616
1
        std::string partition_index_key =
2617
1
                versioned::partition_index_key({instance_id_, partition_id});
2618
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2619
1
                {instance_id_, db_id, table_id, partition_id});
2620
1
        std::string partition_version_key =
2621
1
                versioned::partition_version_key({instance_id_, partition_id});
2622
1
        txn->remove(partition_index_key);
2623
1
        txn->remove(partition_inverted_key);
2624
1
        versioned_remove_all(txn.get(), partition_meta_key);
2625
1
        versioned_remove_all(txn.get(), partition_version_key);
2626
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2627
1
                     << " table_id=" << table_id << " db_id=" << db_id
2628
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2629
1
                     << " partition_version_key=" << hex(partition_version_key);
2630
2631
1
        if (!is_table_kvs_recycled) {
2632
1
            is_table_kvs_recycled = true;
2633
2634
            // 2. Remove the table version kv of this table
2635
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2636
1
            versioned_remove_all(txn.get(), table_version_key);
2637
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2638
            // 3. Remove mow delete bitmap update lock and tablet job lock
2639
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2640
1
            txn->remove(lock_key);
2641
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2642
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2643
1
            std::string tablet_job_key_end =
2644
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2645
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2646
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2647
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2648
1
                         << " table_id=" << table_id;
2649
1
        }
2650
2651
1
        err = txn->commit();
2652
1
        if (err != TxnErrorCode::TXN_OK) {
2653
0
            return -1;
2654
0
        }
2655
1
        metrics_context.total_recycled_num = ++num_recycled;
2656
1
        metrics_context.report();
2657
1
        return 0;
2658
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_orphan_partitionsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
2581
2
                         this](std::string_view k, std::string_view) {
2582
2
        ++num_scanned;
2583
2584
2
        std::string_view k1(k);
2585
2
        int64_t db_id, table_id, partition_id;
2586
2
        if (!versioned::decode_partition_inverted_index_key(&k1, &db_id, &table_id,
2587
2
                                                            &partition_id)) {
2588
0
            LOG(WARNING) << "malformed partition inverted index key " << hex(k);
2589
0
            return -1;
2590
2
        } else if (table_id != current_table_id) {
2591
2
            current_table_id = table_id;
2592
2
            is_table_kvs_recycled = false;
2593
2
            MetaReader meta_reader(instance_id_, txn_kv_.get());
2594
2
            TxnErrorCode err = meta_reader.has_no_indexes(db_id, table_id, &is_empty_table);
2595
2
            if (err != TxnErrorCode::TXN_OK) {
2596
0
                LOG(WARNING) << "failed to check whether table has no indexes, db_id=" << db_id
2597
0
                             << " table_id=" << table_id << " err=" << err;
2598
0
                return -1;
2599
0
            }
2600
2
        }
2601
2602
2
        if (!is_empty_table) {
2603
            // table is not empty, skip recycle
2604
1
            return 0;
2605
1
        }
2606
2607
1
        std::unique_ptr<Transaction> txn;
2608
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
2609
1
        if (err != TxnErrorCode::TXN_OK) {
2610
0
            return -1;
2611
0
        }
2612
2613
        // 1. Remove all partition related kvs
2614
1
        std::string partition_meta_key =
2615
1
                versioned::meta_partition_key({instance_id_, partition_id});
2616
1
        std::string partition_index_key =
2617
1
                versioned::partition_index_key({instance_id_, partition_id});
2618
1
        std::string partition_inverted_key = versioned::partition_inverted_index_key(
2619
1
                {instance_id_, db_id, table_id, partition_id});
2620
1
        std::string partition_version_key =
2621
1
                versioned::partition_version_key({instance_id_, partition_id});
2622
1
        txn->remove(partition_index_key);
2623
1
        txn->remove(partition_inverted_key);
2624
1
        versioned_remove_all(txn.get(), partition_meta_key);
2625
1
        versioned_remove_all(txn.get(), partition_version_key);
2626
1
        LOG(WARNING) << "remove partition related kvs, partition_id=" << partition_id
2627
1
                     << " table_id=" << table_id << " db_id=" << db_id
2628
1
                     << " partition_meta_key=" << hex(partition_meta_key)
2629
1
                     << " partition_version_key=" << hex(partition_version_key);
2630
2631
1
        if (!is_table_kvs_recycled) {
2632
1
            is_table_kvs_recycled = true;
2633
2634
            // 2. Remove the table version kv of this table
2635
1
            std::string table_version_key = versioned::table_version_key({instance_id_, table_id});
2636
1
            versioned_remove_all(txn.get(), table_version_key);
2637
1
            LOG(WARNING) << "remove table version kv " << hex(table_version_key);
2638
            // 3. Remove mow delete bitmap update lock and tablet job lock
2639
1
            std::string lock_key = meta_delete_bitmap_update_lock_key({instance_id_, table_id, -1});
2640
1
            txn->remove(lock_key);
2641
1
            LOG(WARNING) << "remove delete bitmap update lock kv " << hex(lock_key);
2642
1
            std::string tablet_job_key_begin = mow_tablet_job_key({instance_id_, table_id, 0});
2643
1
            std::string tablet_job_key_end =
2644
1
                    mow_tablet_job_key({instance_id_, table_id, INT64_MAX});
2645
1
            txn->remove(tablet_job_key_begin, tablet_job_key_end);
2646
1
            LOG(WARNING) << "remove mow tablet job kv, begin=" << hex(tablet_job_key_begin)
2647
1
                         << " end=" << hex(tablet_job_key_end) << " db_id=" << db_id
2648
1
                         << " table_id=" << table_id;
2649
1
        }
2650
2651
1
        err = txn->commit();
2652
1
        if (err != TxnErrorCode::TXN_OK) {
2653
0
            return -1;
2654
0
        }
2655
1
        metrics_context.total_recycled_num = ++num_recycled;
2656
1
        metrics_context.report();
2657
1
        return 0;
2658
1
    };
2659
2660
    // recycle_func and loop_done for scan and recycle
2661
3
    return scan_and_recycle(
2662
3
            versioned::partition_inverted_index_key({instance_id_, 0, 0, 0}),
2663
3
            versioned::partition_inverted_index_key({instance_id_, INT64_MAX, 0, 0}),
2664
3
            std::move(recycle_func));
2665
3
}
2666
2667
int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id,
2668
                                      RecyclerMetricsContext& metrics_context,
2669
49
                                      int64_t partition_id) {
2670
49
    bool is_multi_version =
2671
49
            instance_info_.has_multi_version_status() &&
2672
49
            instance_info_.multi_version_status() != MultiVersionStatus::MULTI_VERSION_DISABLED;
2673
49
    int64_t num_scanned = 0;
2674
49
    std::atomic_long num_recycled = 0;
2675
2676
49
    std::string tablet_key_begin, tablet_key_end;
2677
49
    std::string stats_key_begin, stats_key_end;
2678
49
    std::string job_key_begin, job_key_end;
2679
2680
49
    std::string tablet_belongs;
2681
49
    if (partition_id > 0) {
2682
        // recycle tablets in a partition belonging to the index
2683
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
2684
33
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
2685
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &stats_key_begin);
2686
33
        stats_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &stats_key_end);
2687
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &job_key_begin);
2688
33
        job_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &job_key_end);
2689
33
        tablet_belongs = "partition";
2690
33
    } else {
2691
        // recycle tablets in the index
2692
16
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
2693
16
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
2694
16
        stats_tablet_key({instance_id_, table_id, index_id, 0, 0}, &stats_key_begin);
2695
16
        stats_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &stats_key_end);
2696
16
        job_tablet_key({instance_id_, table_id, index_id, 0, 0}, &job_key_begin);
2697
16
        job_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &job_key_end);
2698
16
        tablet_belongs = "index";
2699
16
    }
2700
2701
49
    LOG_INFO("begin to recycle tablets of the " + tablet_belongs)
2702
49
            .tag("table_id", table_id)
2703
49
            .tag("index_id", index_id)
2704
49
            .tag("partition_id", partition_id);
2705
2706
49
    auto start_time = steady_clock::now();
2707
2708
49
    DORIS_CLOUD_DEFER {
2709
49
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2710
49
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2711
49
                .tag("instance_id", instance_id_)
2712
49
                .tag("table_id", table_id)
2713
49
                .tag("index_id", index_id)
2714
49
                .tag("partition_id", partition_id)
2715
49
                .tag("num_scanned", num_scanned)
2716
49
                .tag("num_recycled", num_recycled);
2717
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2708
4
    DORIS_CLOUD_DEFER {
2709
4
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2710
4
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2711
4
                .tag("instance_id", instance_id_)
2712
4
                .tag("table_id", table_id)
2713
4
                .tag("index_id", index_id)
2714
4
                .tag("partition_id", partition_id)
2715
4
                .tag("num_scanned", num_scanned)
2716
4
                .tag("num_recycled", num_recycled);
2717
4
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_0clEv
Line
Count
Source
2708
45
    DORIS_CLOUD_DEFER {
2709
45
        auto cost = duration<float>(steady_clock::now() - start_time).count();
2710
45
        LOG_INFO("recycle tablets of " + tablet_belongs + " finished, cost={}s", cost)
2711
45
                .tag("instance_id", instance_id_)
2712
45
                .tag("table_id", table_id)
2713
45
                .tag("index_id", index_id)
2714
45
                .tag("partition_id", partition_id)
2715
45
                .tag("num_scanned", num_scanned)
2716
45
                .tag("num_recycled", num_recycled);
2717
45
    };
2718
2719
    // The first string_view represents the tablet key which has been recycled
2720
    // The second bool represents whether the following fdb's tablet key deletion could be done using range move or not
2721
49
    using TabletKeyPair = std::pair<std::string_view, bool>;
2722
49
    SyncExecutor<TabletKeyPair> sync_executor(
2723
49
            _thread_pool_group.recycle_tablet_pool,
2724
49
            fmt::format("recycle tablets, tablet id {}, index id {}, partition id {}", table_id,
2725
49
                        index_id, partition_id),
2726
4.23k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2726
4.00k
            [](const TabletKeyPair& k) { return k.first.empty(); });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_2clERKSt4pairISt17basic_string_viewIcSt11char_traitsIcEEbE
Line
Count
Source
2726
237
            [](const TabletKeyPair& k) { return k.first.empty(); });
2727
2728
    // Elements in `tablet_keys` has the same lifetime as `it` in `scan_and_recycle`
2729
49
    std::vector<std::string> tablet_idx_keys;
2730
49
    std::vector<std::string> restore_job_keys;
2731
49
    std::vector<std::string> init_rs_keys;
2732
49
    std::vector<std::string> tablet_compact_stats_keys;
2733
49
    std::vector<std::string> tablet_load_stats_keys;
2734
49
    std::vector<std::string> versioned_meta_tablet_keys;
2735
8.24k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2736
8.24k
        bool use_range_remove = true;
2737
8.24k
        ++num_scanned;
2738
8.24k
        doris::TabletMetaCloudPB tablet_meta_pb;
2739
8.24k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2740
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2741
0
            use_range_remove = false;
2742
0
            return -1;
2743
0
        }
2744
8.24k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2745
2746
8.24k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2747
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2748
4.00k
            return -1;
2749
4.00k
        }
2750
2751
4.24k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2752
4.24k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2753
4.24k
        if (is_multi_version) {
2754
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2755
6
            tablet_compact_stats_keys.push_back(
2756
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2757
6
            tablet_load_stats_keys.push_back(
2758
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2759
6
            versioned_meta_tablet_keys.push_back(
2760
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2761
6
        }
2762
4.24k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2763
4.23k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2764
4.23k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2765
4.23k
            if (recycle_tablet(tid, metrics_context) != 0) {
2766
1
                LOG_WARNING("failed to recycle tablet")
2767
1
                        .tag("instance_id", instance_id_)
2768
1
                        .tag("tablet_id", tid);
2769
1
                range_move = false;
2770
1
                return {std::string_view(), range_move};
2771
1
            }
2772
4.23k
            ++num_recycled;
2773
4.23k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2774
4.23k
            return {k, range_move};
2775
4.23k
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2764
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2765
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2766
0
                LOG_WARNING("failed to recycle tablet")
2767
0
                        .tag("instance_id", instance_id_)
2768
0
                        .tag("tablet_id", tid);
2769
0
                range_move = false;
2770
0
                return {std::string_view(), range_move};
2771
0
            }
2772
4.00k
            ++num_recycled;
2773
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2774
4.00k
            return {k, range_move};
2775
4.00k
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENUlvE_clEv
Line
Count
Source
2764
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2765
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2766
1
                LOG_WARNING("failed to recycle tablet")
2767
1
                        .tag("instance_id", instance_id_)
2768
1
                        .tag("tablet_id", tid);
2769
1
                range_move = false;
2770
1
                return {std::string_view(), range_move};
2771
1
            }
2772
236
            ++num_recycled;
2773
236
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2774
236
            return {k, range_move};
2775
237
        });
2776
4.23k
        return 0;
2777
4.24k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2735
8.00k
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2736
8.00k
        bool use_range_remove = true;
2737
8.00k
        ++num_scanned;
2738
8.00k
        doris::TabletMetaCloudPB tablet_meta_pb;
2739
8.00k
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2740
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2741
0
            use_range_remove = false;
2742
0
            return -1;
2743
0
        }
2744
8.00k
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2745
2746
8.00k
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2747
4.00k
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2748
4.00k
            return -1;
2749
4.00k
        }
2750
2751
4.00k
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2752
4.00k
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2753
4.00k
        if (is_multi_version) {
2754
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2755
0
            tablet_compact_stats_keys.push_back(
2756
0
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2757
0
            tablet_load_stats_keys.push_back(
2758
0
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2759
0
            versioned_meta_tablet_keys.push_back(
2760
0
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2761
0
        }
2762
4.00k
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2763
4.00k
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2764
4.00k
                           &metrics_context, k]() mutable -> TabletKeyPair {
2765
4.00k
            if (recycle_tablet(tid, metrics_context) != 0) {
2766
4.00k
                LOG_WARNING("failed to recycle tablet")
2767
4.00k
                        .tag("instance_id", instance_id_)
2768
4.00k
                        .tag("tablet_id", tid);
2769
4.00k
                range_move = false;
2770
4.00k
                return {std::string_view(), range_move};
2771
4.00k
            }
2772
4.00k
            ++num_recycled;
2773
4.00k
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2774
4.00k
            return {k, range_move};
2775
4.00k
        });
2776
4.00k
        return 0;
2777
4.00k
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Line
Count
Source
2735
240
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
2736
240
        bool use_range_remove = true;
2737
240
        ++num_scanned;
2738
240
        doris::TabletMetaCloudPB tablet_meta_pb;
2739
240
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
2740
0
            LOG_WARNING("malformed tablet meta").tag("key", hex(k));
2741
0
            use_range_remove = false;
2742
0
            return -1;
2743
0
        }
2744
240
        int64_t tablet_id = tablet_meta_pb.tablet_id();
2745
2746
240
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
2747
0
            LOG(WARNING) << "lazy txn not finished tablet_id=" << tablet_meta_pb.tablet_id();
2748
0
            return -1;
2749
0
        }
2750
2751
240
        tablet_idx_keys.push_back(meta_tablet_idx_key({instance_id_, tablet_id}));
2752
240
        restore_job_keys.push_back(job_restore_tablet_key({instance_id_, tablet_id}));
2753
240
        if (is_multi_version) {
2754
            // The tablet index/inverted index are recycled in recycle_versioned_tablet.
2755
6
            tablet_compact_stats_keys.push_back(
2756
6
                    versioned::tablet_compact_stats_key({instance_id_, tablet_id}));
2757
6
            tablet_load_stats_keys.push_back(
2758
6
                    versioned::tablet_load_stats_key({instance_id_, tablet_id}));
2759
6
            versioned_meta_tablet_keys.push_back(
2760
6
                    versioned::meta_tablet_key({instance_id_, tablet_id}));
2761
6
        }
2762
240
        TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::bypass_check", false);
2763
237
        sync_executor.add([this, &num_recycled, tid = tablet_id, range_move = use_range_remove,
2764
237
                           &metrics_context, k]() mutable -> TabletKeyPair {
2765
237
            if (recycle_tablet(tid, metrics_context) != 0) {
2766
237
                LOG_WARNING("failed to recycle tablet")
2767
237
                        .tag("instance_id", instance_id_)
2768
237
                        .tag("tablet_id", tid);
2769
237
                range_move = false;
2770
237
                return {std::string_view(), range_move};
2771
237
            }
2772
237
            ++num_recycled;
2773
237
            LOG(INFO) << "recycle_tablets scan, key=" << (k.empty() ? "(empty)" : hex(k));
2774
237
            return {k, range_move};
2775
237
        });
2776
237
        return 0;
2777
240
    };
2778
2779
    // TODO(AlexYue): Add one ut to cover use_range_remove = false
2780
49
    auto loop_done = [&, this]() -> int {
2781
49
        bool finished = true;
2782
49
        auto tablet_keys = sync_executor.when_all(&finished);
2783
49
        if (!finished) {
2784
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2785
1
            return -1;
2786
1
        }
2787
48
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2788
46
        if (!tablet_keys.empty() &&
2789
46
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2789
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbEEEDaS7_
Line
Count
Source
2789
42
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2790
0
            return -1;
2791
0
        }
2792
        // sort the vector using key's order
2793
46
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2794
49.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2794
48.4k
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlRKT_RKT0_E_clISt4pairISt17basic_string_viewIcSt11char_traitsIcEEbESI_EEDaS7_SA_
Line
Count
Source
2794
944
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2795
46
        bool use_range_remove = true;
2796
4.23k
        for (auto& [_, remove] : tablet_keys) {
2797
4.23k
            if (!remove) {
2798
0
                use_range_remove = remove;
2799
0
                break;
2800
0
            }
2801
4.23k
        }
2802
46
        DORIS_CLOUD_DEFER {
2803
46
            tablet_idx_keys.clear();
2804
46
            restore_job_keys.clear();
2805
46
            init_rs_keys.clear();
2806
46
            tablet_compact_stats_keys.clear();
2807
46
            tablet_load_stats_keys.clear();
2808
46
            versioned_meta_tablet_keys.clear();
2809
46
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2802
2
        DORIS_CLOUD_DEFER {
2803
2
            tablet_idx_keys.clear();
2804
2
            restore_job_keys.clear();
2805
2
            init_rs_keys.clear();
2806
2
            tablet_compact_stats_keys.clear();
2807
2
            tablet_load_stats_keys.clear();
2808
2
            versioned_meta_tablet_keys.clear();
2809
2
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEvENKUlvE_clEv
Line
Count
Source
2802
44
        DORIS_CLOUD_DEFER {
2803
44
            tablet_idx_keys.clear();
2804
44
            restore_job_keys.clear();
2805
44
            init_rs_keys.clear();
2806
44
            tablet_compact_stats_keys.clear();
2807
44
            tablet_load_stats_keys.clear();
2808
44
            versioned_meta_tablet_keys.clear();
2809
44
        };
2810
46
        std::unique_ptr<Transaction> txn;
2811
46
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2812
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2813
0
            return -1;
2814
0
        }
2815
46
        std::string tablet_key_end;
2816
46
        if (!tablet_keys.empty()) {
2817
44
            if (use_range_remove) {
2818
44
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2819
44
                txn->remove(tablet_keys.front().first, tablet_key_end);
2820
44
            } else {
2821
0
                for (auto& [k, _] : tablet_keys) {
2822
0
                    txn->remove(k);
2823
0
                }
2824
0
            }
2825
44
        }
2826
46
        if (is_multi_version) {
2827
6
            for (auto& k : tablet_compact_stats_keys) {
2828
                // Remove all versions of tablet compact stats for recycled tablet
2829
6
                LOG_INFO("remove versioned tablet compact stats key")
2830
6
                        .tag("compact_stats_key", hex(k));
2831
6
                versioned_remove_all(txn.get(), k);
2832
6
            }
2833
6
            for (auto& k : tablet_load_stats_keys) {
2834
                // Remove all versions of tablet load stats for recycled tablet
2835
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2836
6
                versioned_remove_all(txn.get(), k);
2837
6
            }
2838
6
            for (auto& k : versioned_meta_tablet_keys) {
2839
                // Remove all versions of meta tablet for recycled tablet
2840
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2841
6
                versioned_remove_all(txn.get(), k);
2842
6
            }
2843
5
        }
2844
4.24k
        for (auto& k : tablet_idx_keys) {
2845
4.24k
            txn->remove(k);
2846
4.24k
        }
2847
4.24k
        for (auto& k : restore_job_keys) {
2848
4.24k
            txn->remove(k);
2849
4.24k
        }
2850
46
        for (auto& k : init_rs_keys) {
2851
0
            txn->remove(k);
2852
0
        }
2853
46
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2854
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2855
0
                         << ", err=" << err;
2856
0
            return -1;
2857
0
        }
2858
46
        return 0;
2859
46
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2780
4
    auto loop_done = [&, this]() -> int {
2781
4
        bool finished = true;
2782
4
        auto tablet_keys = sync_executor.when_all(&finished);
2783
4
        if (!finished) {
2784
0
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2785
0
            return -1;
2786
0
        }
2787
4
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2788
2
        if (!tablet_keys.empty() &&
2789
2
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2790
0
            return -1;
2791
0
        }
2792
        // sort the vector using key's order
2793
2
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2794
2
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2795
2
        bool use_range_remove = true;
2796
4.00k
        for (auto& [_, remove] : tablet_keys) {
2797
4.00k
            if (!remove) {
2798
0
                use_range_remove = remove;
2799
0
                break;
2800
0
            }
2801
4.00k
        }
2802
2
        DORIS_CLOUD_DEFER {
2803
2
            tablet_idx_keys.clear();
2804
2
            restore_job_keys.clear();
2805
2
            init_rs_keys.clear();
2806
2
            tablet_compact_stats_keys.clear();
2807
2
            tablet_load_stats_keys.clear();
2808
2
            versioned_meta_tablet_keys.clear();
2809
2
        };
2810
2
        std::unique_ptr<Transaction> txn;
2811
2
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2812
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2813
0
            return -1;
2814
0
        }
2815
2
        std::string tablet_key_end;
2816
2
        if (!tablet_keys.empty()) {
2817
2
            if (use_range_remove) {
2818
2
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2819
2
                txn->remove(tablet_keys.front().first, tablet_key_end);
2820
2
            } else {
2821
0
                for (auto& [k, _] : tablet_keys) {
2822
0
                    txn->remove(k);
2823
0
                }
2824
0
            }
2825
2
        }
2826
2
        if (is_multi_version) {
2827
0
            for (auto& k : tablet_compact_stats_keys) {
2828
                // Remove all versions of tablet compact stats for recycled tablet
2829
0
                LOG_INFO("remove versioned tablet compact stats key")
2830
0
                        .tag("compact_stats_key", hex(k));
2831
0
                versioned_remove_all(txn.get(), k);
2832
0
            }
2833
0
            for (auto& k : tablet_load_stats_keys) {
2834
                // Remove all versions of tablet load stats for recycled tablet
2835
0
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2836
0
                versioned_remove_all(txn.get(), k);
2837
0
            }
2838
0
            for (auto& k : versioned_meta_tablet_keys) {
2839
                // Remove all versions of meta tablet for recycled tablet
2840
0
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2841
0
                versioned_remove_all(txn.get(), k);
2842
0
            }
2843
0
        }
2844
4.00k
        for (auto& k : tablet_idx_keys) {
2845
4.00k
            txn->remove(k);
2846
4.00k
        }
2847
4.00k
        for (auto& k : restore_job_keys) {
2848
4.00k
            txn->remove(k);
2849
4.00k
        }
2850
2
        for (auto& k : init_rs_keys) {
2851
0
            txn->remove(k);
2852
0
        }
2853
2
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2854
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2855
0
                         << ", err=" << err;
2856
0
            return -1;
2857
0
        }
2858
2
        return 0;
2859
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_tabletsEllRNS0_22RecyclerMetricsContextElENK3$_1clEv
Line
Count
Source
2780
45
    auto loop_done = [&, this]() -> int {
2781
45
        bool finished = true;
2782
45
        auto tablet_keys = sync_executor.when_all(&finished);
2783
45
        if (!finished) {
2784
1
            LOG_WARNING("failed to recycle tablet").tag("instance_id", instance_id_);
2785
1
            return -1;
2786
1
        }
2787
44
        if (tablet_keys.empty() && tablet_idx_keys.empty()) return 0;
2788
44
        if (!tablet_keys.empty() &&
2789
44
            std::ranges::all_of(tablet_keys, [](const auto& k) { return k.first.empty(); })) {
2790
0
            return -1;
2791
0
        }
2792
        // sort the vector using key's order
2793
44
        std::sort(tablet_keys.begin(), tablet_keys.end(),
2794
44
                  [](const auto& prev, const auto& last) { return prev.first < last.first; });
2795
44
        bool use_range_remove = true;
2796
236
        for (auto& [_, remove] : tablet_keys) {
2797
236
            if (!remove) {
2798
0
                use_range_remove = remove;
2799
0
                break;
2800
0
            }
2801
236
        }
2802
44
        DORIS_CLOUD_DEFER {
2803
44
            tablet_idx_keys.clear();
2804
44
            restore_job_keys.clear();
2805
44
            init_rs_keys.clear();
2806
44
            tablet_compact_stats_keys.clear();
2807
44
            tablet_load_stats_keys.clear();
2808
44
            versioned_meta_tablet_keys.clear();
2809
44
        };
2810
44
        std::unique_ptr<Transaction> txn;
2811
44
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2812
0
            LOG(WARNING) << "failed to delete tablet meta kv, instance_id=" << instance_id_;
2813
0
            return -1;
2814
0
        }
2815
44
        std::string tablet_key_end;
2816
44
        if (!tablet_keys.empty()) {
2817
42
            if (use_range_remove) {
2818
42
                tablet_key_end = std::string(tablet_keys.back().first) + '\x00';
2819
42
                txn->remove(tablet_keys.front().first, tablet_key_end);
2820
42
            } else {
2821
0
                for (auto& [k, _] : tablet_keys) {
2822
0
                    txn->remove(k);
2823
0
                }
2824
0
            }
2825
42
        }
2826
44
        if (is_multi_version) {
2827
6
            for (auto& k : tablet_compact_stats_keys) {
2828
                // Remove all versions of tablet compact stats for recycled tablet
2829
6
                LOG_INFO("remove versioned tablet compact stats key")
2830
6
                        .tag("compact_stats_key", hex(k));
2831
6
                versioned_remove_all(txn.get(), k);
2832
6
            }
2833
6
            for (auto& k : tablet_load_stats_keys) {
2834
                // Remove all versions of tablet load stats for recycled tablet
2835
6
                LOG_INFO("remove versioned tablet load stats key").tag("load_stats_key", hex(k));
2836
6
                versioned_remove_all(txn.get(), k);
2837
6
            }
2838
6
            for (auto& k : versioned_meta_tablet_keys) {
2839
                // Remove all versions of meta tablet for recycled tablet
2840
6
                LOG_INFO("remove versioned meta tablet key").tag("meta_tablet_key", hex(k));
2841
6
                versioned_remove_all(txn.get(), k);
2842
6
            }
2843
5
        }
2844
239
        for (auto& k : tablet_idx_keys) {
2845
239
            txn->remove(k);
2846
239
        }
2847
239
        for (auto& k : restore_job_keys) {
2848
239
            txn->remove(k);
2849
239
        }
2850
44
        for (auto& k : init_rs_keys) {
2851
0
            txn->remove(k);
2852
0
        }
2853
44
        if (TxnErrorCode err = txn->commit(); err != TxnErrorCode::TXN_OK) {
2854
0
            LOG(WARNING) << "failed to delete kvs related to tablets, instance_id=" << instance_id_
2855
0
                         << ", err=" << err;
2856
0
            return -1;
2857
0
        }
2858
44
        return 0;
2859
44
    };
2860
2861
49
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(recycle_func),
2862
49
                               std::move(loop_done));
2863
49
    if (ret != 0) {
2864
3
        LOG(WARNING) << "failed to scan_and_recycle, instance_id=" << instance_id_;
2865
3
        return ret;
2866
3
    }
2867
2868
    // directly remove tablet stats and tablet jobs of these dropped index or partition
2869
46
    std::unique_ptr<Transaction> txn;
2870
46
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
2871
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_;
2872
0
        return -1;
2873
0
    }
2874
46
    txn->remove(stats_key_begin, stats_key_end);
2875
46
    LOG(WARNING) << "remove stats kv, begin=" << hex(stats_key_begin)
2876
46
                 << " end=" << hex(stats_key_end);
2877
46
    txn->remove(job_key_begin, job_key_end);
2878
46
    LOG(WARNING) << "remove job kv, begin=" << hex(job_key_begin) << " end=" << hex(job_key_end);
2879
46
    std::string schema_key_begin, schema_key_end;
2880
46
    std::string schema_dict_key;
2881
46
    std::string versioned_schema_key_begin, versioned_schema_key_end;
2882
46
    if (partition_id <= 0) {
2883
        // Delete schema kv of this index
2884
14
        meta_schema_key({instance_id_, index_id, 0}, &schema_key_begin);
2885
14
        meta_schema_key({instance_id_, index_id + 1, 0}, &schema_key_end);
2886
14
        txn->remove(schema_key_begin, schema_key_end);
2887
14
        LOG(WARNING) << "remove schema kv, begin=" << hex(schema_key_begin)
2888
14
                     << " end=" << hex(schema_key_end);
2889
14
        meta_schema_pb_dictionary_key({instance_id_, index_id}, &schema_dict_key);
2890
14
        txn->remove(schema_dict_key);
2891
14
        LOG(WARNING) << "remove schema dict kv, key=" << hex(schema_dict_key);
2892
14
        versioned::meta_schema_key({instance_id_, index_id, 0}, &versioned_schema_key_begin);
2893
14
        versioned::meta_schema_key({instance_id_, index_id + 1, 0}, &versioned_schema_key_end);
2894
14
        txn->remove(versioned_schema_key_begin, versioned_schema_key_end);
2895
14
        LOG(WARNING) << "remove versioned schema kv, begin=" << hex(versioned_schema_key_begin)
2896
14
                     << " end=" << hex(versioned_schema_key_end);
2897
14
    }
2898
2899
46
    TxnErrorCode err = txn->commit();
2900
46
    if (err != TxnErrorCode::TXN_OK) {
2901
0
        LOG(WARNING) << "failed to delete tablet job or stats key, instance_id=" << instance_id_
2902
0
                     << " err=" << err;
2903
0
        return -1;
2904
0
    }
2905
2906
46
    return ret;
2907
46
}
2908
2909
5.61k
int InstanceRecycler::delete_rowset_data(const RowsetMetaCloudPB& rs_meta_pb) {
2910
5.61k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("delete_rowset_data::bypass_check", true);
2911
5.61k
    int64_t num_segments = rs_meta_pb.num_segments();
2912
5.61k
    if (num_segments <= 0) return 0;
2913
2914
5.61k
    std::vector<std::string> file_paths;
2915
5.61k
    if (decrement_packed_file_ref_counts(rs_meta_pb) != 0) {
2916
0
        return -1;
2917
0
    }
2918
2919
    // Process inverted indexes
2920
5.61k
    std::vector<std::pair<int64_t, std::string>> index_ids;
2921
    // default format as v1.
2922
5.61k
    InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
2923
5.61k
    bool delete_rowset_data_by_prefix = false;
2924
5.61k
    if (rs_meta_pb.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
2925
        // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
2926
        // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
2927
0
        delete_rowset_data_by_prefix = true;
2928
5.61k
    } else if (rs_meta_pb.has_tablet_schema()) {
2929
10.0k
        for (const auto& index : rs_meta_pb.tablet_schema().index()) {
2930
10.0k
            if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
2931
10.0k
                index_ids.emplace_back(index.index_id(), index.index_suffix_name());
2932
10.0k
            }
2933
10.0k
        }
2934
4.80k
        if (rs_meta_pb.tablet_schema().has_inverted_index_storage_format()) {
2935
2.00k
            index_format = rs_meta_pb.tablet_schema().inverted_index_storage_format();
2936
2.00k
        }
2937
4.80k
    } else if (!rs_meta_pb.has_index_id() || !rs_meta_pb.has_schema_version()) {
2938
        // schema version and index id are not found, delete rowset data by prefix directly.
2939
0
        delete_rowset_data_by_prefix = true;
2940
809
    } else {
2941
        // otherwise, try to get schema kv
2942
809
        InvertedIndexInfo index_info;
2943
809
        int inverted_index_get_ret = inverted_index_id_cache_->get(
2944
809
                rs_meta_pb.index_id(), rs_meta_pb.schema_version(), index_info);
2945
809
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
2946
809
                                 &inverted_index_get_ret);
2947
809
        if (inverted_index_get_ret == 0) {
2948
809
            index_format = index_info.first;
2949
809
            index_ids = index_info.second;
2950
809
        } else if (inverted_index_get_ret == 1) {
2951
            // 1. Schema kv not found means tablet has been recycled
2952
            // Maybe some tablet recycle failed by some bugs
2953
            // We need to delete again to double check
2954
            // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
2955
            // because we are uncertain about the inverted index information.
2956
            // If there are inverted indexes, some data might not be deleted,
2957
            // but this is acceptable as we have made our best effort to delete the data.
2958
0
            LOG_INFO(
2959
0
                    "delete rowset data schema kv not found, need to delete again to double "
2960
0
                    "check")
2961
0
                    .tag("instance_id", instance_id_)
2962
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
2963
0
                    .tag("rowset", rs_meta_pb.ShortDebugString());
2964
            // Currently index_ids is guaranteed to be empty,
2965
            // but we clear it again here as a safeguard against future code changes
2966
            // that might cause index_ids to no longer be empty
2967
0
            index_format = InvertedIndexStorageFormatPB::V2;
2968
0
            index_ids.clear();
2969
0
        } else {
2970
            // failed to get schema kv, delete rowset data by prefix directly.
2971
0
            delete_rowset_data_by_prefix = true;
2972
0
        }
2973
809
    }
2974
2975
5.61k
    if (delete_rowset_data_by_prefix) {
2976
0
        return delete_rowset_data(rs_meta_pb.resource_id(), rs_meta_pb.tablet_id(),
2977
0
                                  rs_meta_pb.rowset_id_v2());
2978
0
    }
2979
2980
5.61k
    auto it = accessor_map_.find(rs_meta_pb.resource_id());
2981
5.61k
    if (it == accessor_map_.end()) {
2982
1.59k
        LOG_WARNING("instance has no such resource id")
2983
1.59k
                .tag("instance_id", instance_id_)
2984
1.59k
                .tag("resource_id", rs_meta_pb.resource_id());
2985
1.59k
        return -1;
2986
1.59k
    }
2987
4.01k
    auto& accessor = it->second;
2988
2989
4.01k
    int64_t tablet_id = rs_meta_pb.tablet_id();
2990
4.01k
    const auto& rowset_id = rs_meta_pb.rowset_id_v2();
2991
24.0k
    for (int64_t i = 0; i < num_segments; ++i) {
2992
20.0k
        file_paths.push_back(segment_path(tablet_id, rowset_id, i));
2993
20.0k
        if (index_format == InvertedIndexStorageFormatPB::V1) {
2994
40.0k
            for (const auto& index_id : index_ids) {
2995
40.0k
                file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i, index_id.first,
2996
40.0k
                                                            index_id.second));
2997
40.0k
            }
2998
20.0k
        } else if (!index_ids.empty()) {
2999
0
            file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3000
0
        }
3001
20.0k
    }
3002
3003
    // Process delete bitmap - check where it's stored.
3004
4.01k
    DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3005
4.01k
    if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3006
4.01k
                                                       &delete_bitmap_storage_type) != 0) {
3007
0
        LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3008
0
                .tag("instance_id", instance_id_)
3009
0
                .tag("tablet_id", tablet_id)
3010
0
                .tag("rowset_id", rowset_id);
3011
0
        return -1;
3012
0
    }
3013
4.01k
    if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3014
2.00k
        file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3015
2.00k
    }
3016
    // TODO(AlexYue): seems could do do batch
3017
4.01k
    return accessor->delete_files(file_paths);
3018
4.01k
}
3019
3020
62.3k
int InstanceRecycler::decrement_packed_file_ref_counts(const doris::RowsetMetaCloudPB& rs_meta_pb) {
3021
62.3k
    LOG_INFO("begin process_packed_file_location_index")
3022
62.3k
            .tag("instance_id", instance_id_)
3023
62.3k
            .tag("tablet_id", rs_meta_pb.tablet_id())
3024
62.3k
            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3025
62.3k
            .tag("index_map_size", rs_meta_pb.packed_slice_locations_size());
3026
62.3k
    const auto& index_map = rs_meta_pb.packed_slice_locations();
3027
62.3k
    if (index_map.empty()) {
3028
62.3k
        LOG_INFO("skip merge file update: empty merge_file_segment_index")
3029
62.3k
                .tag("instance_id", instance_id_)
3030
62.3k
                .tag("tablet_id", rs_meta_pb.tablet_id())
3031
62.3k
                .tag("rowset_id", rs_meta_pb.rowset_id_v2());
3032
62.3k
        return 0;
3033
62.3k
    }
3034
3035
13
    struct PackedSmallFileInfo {
3036
13
        std::string small_file_path;
3037
13
    };
3038
13
    std::unordered_map<std::string, std::vector<PackedSmallFileInfo>> packed_file_updates;
3039
13
    packed_file_updates.reserve(index_map.size());
3040
27
    for (const auto& [small_path, index_pb] : index_map) {
3041
27
        if (!index_pb.has_packed_file_path() || index_pb.packed_file_path().empty()) {
3042
0
            continue;
3043
0
        }
3044
27
        packed_file_updates[index_pb.packed_file_path()].push_back(
3045
27
                PackedSmallFileInfo {small_path});
3046
27
    }
3047
13
    if (packed_file_updates.empty()) {
3048
0
        LOG_INFO("skip packed file update: no valid merge_file_path in merge_file_segment_index")
3049
0
                .tag("instance_id", instance_id_)
3050
0
                .tag("tablet_id", rs_meta_pb.tablet_id())
3051
0
                .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3052
0
                .tag("index_map_size", index_map.size());
3053
0
        return 0;
3054
0
    }
3055
3056
13
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3057
13
    int ret = 0;
3058
24
    for (auto& [packed_file_path, small_files] : packed_file_updates) {
3059
24
        if (small_files.empty()) {
3060
0
            continue;
3061
0
        }
3062
3063
24
        bool success = false;
3064
24
        for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3065
24
            std::unique_ptr<Transaction> txn;
3066
24
            TxnErrorCode err = txn_kv_->create_txn(&txn);
3067
24
            if (err != TxnErrorCode::TXN_OK) {
3068
0
                LOG_WARNING("failed to create txn when updating packed file ref count")
3069
0
                        .tag("instance_id", instance_id_)
3070
0
                        .tag("packed_file_path", packed_file_path)
3071
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3072
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3073
0
                        .tag("err", err);
3074
0
                ret = -1;
3075
0
                break;
3076
0
            }
3077
3078
24
            std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3079
24
            std::string packed_val;
3080
24
            err = txn->get(packed_key, &packed_val);
3081
24
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3082
0
                LOG_WARNING("packed file info not found when recycling rowset")
3083
0
                        .tag("instance_id", instance_id_)
3084
0
                        .tag("packed_file_path", packed_file_path)
3085
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3086
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3087
0
                        .tag("key", hex(packed_key))
3088
0
                        .tag("tablet id", rs_meta_pb.tablet_id());
3089
                // Skip this packed file entry and continue with others
3090
0
                success = true;
3091
0
                break;
3092
0
            }
3093
24
            if (err != TxnErrorCode::TXN_OK) {
3094
0
                LOG_WARNING("failed to get packed file info when recycling rowset")
3095
0
                        .tag("instance_id", instance_id_)
3096
0
                        .tag("packed_file_path", packed_file_path)
3097
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3098
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3099
0
                        .tag("err", err);
3100
0
                ret = -1;
3101
0
                break;
3102
0
            }
3103
3104
24
            cloud::PackedFileInfoPB packed_info;
3105
24
            if (!packed_info.ParseFromString(packed_val)) {
3106
0
                LOG_WARNING("failed to parse packed file info when recycling rowset")
3107
0
                        .tag("instance_id", instance_id_)
3108
0
                        .tag("packed_file_path", packed_file_path)
3109
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3110
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3111
0
                ret = -1;
3112
0
                break;
3113
0
            }
3114
3115
24
            LOG_INFO("packed file update check")
3116
24
                    .tag("instance_id", instance_id_)
3117
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3118
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3119
24
                    .tag("merged_file_path", packed_file_path)
3120
24
                    .tag("requested_small_files", small_files.size())
3121
24
                    .tag("merge_entries", packed_info.slices_size());
3122
3123
24
            auto* small_file_entries = packed_info.mutable_slices();
3124
24
            int64_t changed_files = 0;
3125
24
            int64_t missing_entries = 0;
3126
24
            int64_t already_deleted = 0;
3127
27
            for (const auto& small_file_info : small_files) {
3128
27
                bool found = false;
3129
87
                for (auto& small_file_entry : *small_file_entries) {
3130
87
                    if (small_file_entry.path() == small_file_info.small_file_path) {
3131
27
                        if (!small_file_entry.deleted()) {
3132
27
                            small_file_entry.set_deleted(true);
3133
27
                            if (!small_file_entry.corrected()) {
3134
27
                                small_file_entry.set_corrected(true);
3135
27
                            }
3136
27
                            ++changed_files;
3137
27
                        } else {
3138
0
                            ++already_deleted;
3139
0
                        }
3140
27
                        found = true;
3141
27
                        break;
3142
27
                    }
3143
87
                }
3144
27
                if (!found) {
3145
0
                    ++missing_entries;
3146
0
                    LOG_WARNING("packed file info missing small file entry")
3147
0
                            .tag("instance_id", instance_id_)
3148
0
                            .tag("packed_file_path", packed_file_path)
3149
0
                            .tag("small_file_path", small_file_info.small_file_path)
3150
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3151
0
                            .tag("tablet_id", rs_meta_pb.tablet_id());
3152
0
                }
3153
27
            }
3154
3155
24
            if (changed_files == 0) {
3156
0
                LOG_INFO("skip merge file update: no merge entries changed")
3157
0
                        .tag("instance_id", instance_id_)
3158
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3159
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3160
0
                        .tag("merged_file_path", packed_file_path)
3161
0
                        .tag("missing_entries", missing_entries)
3162
0
                        .tag("already_deleted", already_deleted)
3163
0
                        .tag("requested_small_files", small_files.size())
3164
0
                        .tag("merge_entries", packed_info.slices_size());
3165
0
                success = true;
3166
0
                break;
3167
0
            }
3168
3169
            // Calculate remaining files
3170
24
            int64_t left_file_count = 0;
3171
24
            int64_t left_file_bytes = 0;
3172
141
            for (const auto& small_file_entry : packed_info.slices()) {
3173
141
                if (!small_file_entry.deleted()) {
3174
57
                    ++left_file_count;
3175
57
                    left_file_bytes += small_file_entry.size();
3176
57
                }
3177
141
            }
3178
24
            packed_info.set_remaining_slice_bytes(left_file_bytes);
3179
24
            packed_info.set_ref_cnt(left_file_count);
3180
24
            LOG_INFO("updated packed file reference info")
3181
24
                    .tag("instance_id", instance_id_)
3182
24
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3183
24
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3184
24
                    .tag("packed_file_path", packed_file_path)
3185
24
                    .tag("ref_cnt", left_file_count)
3186
24
                    .tag("left_file_bytes", left_file_bytes);
3187
3188
24
            if (left_file_count == 0) {
3189
7
                packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3190
7
            }
3191
3192
24
            std::string updated_val;
3193
24
            if (!packed_info.SerializeToString(&updated_val)) {
3194
0
                LOG_WARNING("failed to serialize packed file info when recycling rowset")
3195
0
                        .tag("instance_id", instance_id_)
3196
0
                        .tag("packed_file_path", packed_file_path)
3197
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3198
0
                        .tag("tablet_id", rs_meta_pb.tablet_id());
3199
0
                ret = -1;
3200
0
                break;
3201
0
            }
3202
3203
24
            txn->put(packed_key, updated_val);
3204
24
            err = txn->commit();
3205
24
            if (err == TxnErrorCode::TXN_OK) {
3206
24
                success = true;
3207
24
                if (left_file_count == 0) {
3208
7
                    LOG_INFO("packed file ready to delete, deleting immediately")
3209
7
                            .tag("instance_id", instance_id_)
3210
7
                            .tag("packed_file_path", packed_file_path);
3211
7
                    if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3212
0
                        ret = -1;
3213
0
                    }
3214
7
                }
3215
24
                break;
3216
24
            }
3217
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
3218
0
                if (attempt >= max_retry_times) {
3219
0
                    LOG_WARNING("packed file info update conflict after max retry")
3220
0
                            .tag("instance_id", instance_id_)
3221
0
                            .tag("packed_file_path", packed_file_path)
3222
0
                            .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3223
0
                            .tag("tablet_id", rs_meta_pb.tablet_id())
3224
0
                            .tag("changed_files", changed_files)
3225
0
                            .tag("attempt", attempt);
3226
0
                    ret = -1;
3227
0
                    break;
3228
0
                }
3229
0
                LOG_WARNING("packed file info update conflict, retrying")
3230
0
                        .tag("instance_id", instance_id_)
3231
0
                        .tag("packed_file_path", packed_file_path)
3232
0
                        .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3233
0
                        .tag("tablet_id", rs_meta_pb.tablet_id())
3234
0
                        .tag("changed_files", changed_files)
3235
0
                        .tag("attempt", attempt);
3236
0
                sleep_for_packed_file_retry();
3237
0
                continue;
3238
0
            }
3239
3240
0
            LOG_WARNING("failed to commit packed file info update")
3241
0
                    .tag("instance_id", instance_id_)
3242
0
                    .tag("packed_file_path", packed_file_path)
3243
0
                    .tag("rowset_id", rs_meta_pb.rowset_id_v2())
3244
0
                    .tag("tablet_id", rs_meta_pb.tablet_id())
3245
0
                    .tag("err", err)
3246
0
                    .tag("changed_files", changed_files);
3247
0
            ret = -1;
3248
0
            break;
3249
0
        }
3250
3251
24
        if (!success) {
3252
0
            ret = -1;
3253
0
        }
3254
24
    }
3255
3256
13
    return ret;
3257
13
}
3258
3259
int InstanceRecycler::decrement_delete_bitmap_packed_file_ref_counts(
3260
        int64_t tablet_id, const std::string& rowset_id,
3261
58.2k
        DeleteBitmapStorageType* out_storage_type) {
3262
58.2k
    if (out_storage_type) {
3263
58.2k
        *out_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3264
58.2k
    }
3265
3266
    // Get delete bitmap storage info from FDB
3267
58.2k
    std::string dbm_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
3268
58.2k
    std::unique_ptr<Transaction> txn;
3269
58.2k
    TxnErrorCode err = txn_kv_->create_txn(&txn);
3270
58.2k
    if (err != TxnErrorCode::TXN_OK) {
3271
0
        LOG_WARNING("failed to create txn when getting delete bitmap storage")
3272
0
                .tag("instance_id", instance_id_)
3273
0
                .tag("tablet_id", tablet_id)
3274
0
                .tag("rowset_id", rowset_id)
3275
0
                .tag("err", err);
3276
0
        return -1;
3277
0
    }
3278
3279
58.2k
    std::string dbm_val;
3280
58.2k
    err = txn->get(dbm_key, &dbm_val);
3281
58.2k
    if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3282
        // No delete bitmap for this rowset, nothing to do
3283
4.63k
        LOG_INFO("delete bitmap not found, skip packed file ref count decrement")
3284
4.63k
                .tag("instance_id", instance_id_)
3285
4.63k
                .tag("tablet_id", tablet_id)
3286
4.63k
                .tag("rowset_id", rowset_id);
3287
4.63k
        return 0;
3288
4.63k
    }
3289
53.5k
    if (err != TxnErrorCode::TXN_OK) {
3290
0
        LOG_WARNING("failed to get delete bitmap storage")
3291
0
                .tag("instance_id", instance_id_)
3292
0
                .tag("tablet_id", tablet_id)
3293
0
                .tag("rowset_id", rowset_id)
3294
0
                .tag("err", err);
3295
0
        return -1;
3296
0
    }
3297
3298
53.5k
    DeleteBitmapStoragePB storage;
3299
53.5k
    if (!storage.ParseFromString(dbm_val)) {
3300
0
        LOG_WARNING("failed to parse delete bitmap storage")
3301
0
                .tag("instance_id", instance_id_)
3302
0
                .tag("tablet_id", tablet_id)
3303
0
                .tag("rowset_id", rowset_id);
3304
0
        return -1;
3305
0
    }
3306
3307
53.5k
    if (storage.store_in_fdb()) {
3308
0
        if (out_storage_type) {
3309
0
            *out_storage_type = DeleteBitmapStorageType::IN_FDB;
3310
0
        }
3311
0
        return 0;
3312
0
    }
3313
3314
    // Check if delete bitmap is stored in standalone file.
3315
53.5k
    if (!storage.has_packed_slice_location() ||
3316
53.5k
        storage.packed_slice_location().packed_file_path().empty()) {
3317
53.5k
        if (out_storage_type) {
3318
53.5k
            *out_storage_type = DeleteBitmapStorageType::STANDALONE_FILE;
3319
53.5k
        }
3320
53.5k
        return 0;
3321
53.5k
    }
3322
3323
0
    if (out_storage_type) {
3324
0
        *out_storage_type = DeleteBitmapStorageType::PACKED_FILE;
3325
0
    }
3326
3327
0
    const auto& packed_loc = storage.packed_slice_location();
3328
0
    const std::string& packed_file_path = packed_loc.packed_file_path();
3329
3330
0
    LOG_INFO("decrementing delete bitmap packed file ref count")
3331
0
            .tag("instance_id", instance_id_)
3332
0
            .tag("tablet_id", tablet_id)
3333
0
            .tag("rowset_id", rowset_id)
3334
0
            .tag("packed_file_path", packed_file_path);
3335
3336
0
    const int max_retry_times = std::max(1, config::decrement_packed_file_ref_counts_retry_times);
3337
0
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3338
0
        std::unique_ptr<Transaction> update_txn;
3339
0
        err = txn_kv_->create_txn(&update_txn);
3340
0
        if (err != TxnErrorCode::TXN_OK) {
3341
0
            LOG_WARNING("failed to create txn for delete bitmap packed file update")
3342
0
                    .tag("instance_id", instance_id_)
3343
0
                    .tag("tablet_id", tablet_id)
3344
0
                    .tag("rowset_id", rowset_id)
3345
0
                    .tag("err", err);
3346
0
            return -1;
3347
0
        }
3348
3349
0
        std::string packed_key = packed_file_key({instance_id_, packed_file_path});
3350
0
        std::string packed_val;
3351
0
        err = update_txn->get(packed_key, &packed_val);
3352
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3353
0
            LOG_WARNING("packed file info not found for delete bitmap")
3354
0
                    .tag("instance_id", instance_id_)
3355
0
                    .tag("tablet_id", tablet_id)
3356
0
                    .tag("rowset_id", rowset_id)
3357
0
                    .tag("packed_file_path", packed_file_path);
3358
0
            return 0;
3359
0
        }
3360
0
        if (err != TxnErrorCode::TXN_OK) {
3361
0
            LOG_WARNING("failed to get packed file info for delete bitmap")
3362
0
                    .tag("instance_id", instance_id_)
3363
0
                    .tag("tablet_id", tablet_id)
3364
0
                    .tag("rowset_id", rowset_id)
3365
0
                    .tag("packed_file_path", packed_file_path)
3366
0
                    .tag("err", err);
3367
0
            return -1;
3368
0
        }
3369
3370
0
        cloud::PackedFileInfoPB packed_info;
3371
0
        if (!packed_info.ParseFromString(packed_val)) {
3372
0
            LOG_WARNING("failed to parse packed file info for delete bitmap")
3373
0
                    .tag("instance_id", instance_id_)
3374
0
                    .tag("tablet_id", tablet_id)
3375
0
                    .tag("rowset_id", rowset_id)
3376
0
                    .tag("packed_file_path", packed_file_path);
3377
0
            return -1;
3378
0
        }
3379
3380
        // Find and mark the small file entry as deleted
3381
        // Use tablet_id and rowset_id to match entry instead of path,
3382
        // because path format may vary with path_version (with or without shard prefix)
3383
0
        auto* entries = packed_info.mutable_slices();
3384
0
        bool found = false;
3385
0
        bool already_deleted = false;
3386
0
        for (auto& entry : *entries) {
3387
0
            if (entry.tablet_id() == tablet_id && entry.rowset_id() == rowset_id) {
3388
0
                if (!entry.deleted()) {
3389
0
                    entry.set_deleted(true);
3390
0
                    if (!entry.corrected()) {
3391
0
                        entry.set_corrected(true);
3392
0
                    }
3393
0
                } else {
3394
0
                    already_deleted = true;
3395
0
                }
3396
0
                found = true;
3397
0
                break;
3398
0
            }
3399
0
        }
3400
3401
0
        if (!found) {
3402
0
            LOG_WARNING("delete bitmap entry not found in packed file")
3403
0
                    .tag("instance_id", instance_id_)
3404
0
                    .tag("tablet_id", tablet_id)
3405
0
                    .tag("rowset_id", rowset_id)
3406
0
                    .tag("packed_file_path", packed_file_path);
3407
0
            return 0;
3408
0
        }
3409
3410
0
        if (already_deleted) {
3411
0
            LOG_INFO("delete bitmap entry already deleted in packed file")
3412
0
                    .tag("instance_id", instance_id_)
3413
0
                    .tag("tablet_id", tablet_id)
3414
0
                    .tag("rowset_id", rowset_id)
3415
0
                    .tag("packed_file_path", packed_file_path);
3416
0
            return 0;
3417
0
        }
3418
3419
        // Calculate remaining files
3420
0
        int64_t left_file_count = 0;
3421
0
        int64_t left_file_bytes = 0;
3422
0
        for (const auto& entry : packed_info.slices()) {
3423
0
            if (!entry.deleted()) {
3424
0
                ++left_file_count;
3425
0
                left_file_bytes += entry.size();
3426
0
            }
3427
0
        }
3428
0
        packed_info.set_remaining_slice_bytes(left_file_bytes);
3429
0
        packed_info.set_ref_cnt(left_file_count);
3430
3431
0
        if (left_file_count == 0) {
3432
0
            packed_info.set_state(cloud::PackedFileInfoPB::RECYCLING);
3433
0
        }
3434
3435
0
        std::string updated_val;
3436
0
        if (!packed_info.SerializeToString(&updated_val)) {
3437
0
            LOG_WARNING("failed to serialize packed file info for delete bitmap")
3438
0
                    .tag("instance_id", instance_id_)
3439
0
                    .tag("tablet_id", tablet_id)
3440
0
                    .tag("rowset_id", rowset_id)
3441
0
                    .tag("packed_file_path", packed_file_path);
3442
0
            return -1;
3443
0
        }
3444
3445
0
        update_txn->put(packed_key, updated_val);
3446
0
        err = update_txn->commit();
3447
0
        if (err == TxnErrorCode::TXN_OK) {
3448
0
            LOG_INFO("delete bitmap packed file ref count decremented")
3449
0
                    .tag("instance_id", instance_id_)
3450
0
                    .tag("tablet_id", tablet_id)
3451
0
                    .tag("rowset_id", rowset_id)
3452
0
                    .tag("packed_file_path", packed_file_path)
3453
0
                    .tag("left_file_count", left_file_count);
3454
0
            if (left_file_count == 0) {
3455
0
                if (delete_packed_file_and_kv(packed_file_path, packed_key, packed_info) != 0) {
3456
0
                    return -1;
3457
0
                }
3458
0
            }
3459
0
            return 0;
3460
0
        }
3461
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3462
0
            if (attempt >= max_retry_times) {
3463
0
                LOG_WARNING("delete bitmap packed file update conflict after max retry")
3464
0
                        .tag("instance_id", instance_id_)
3465
0
                        .tag("tablet_id", tablet_id)
3466
0
                        .tag("rowset_id", rowset_id)
3467
0
                        .tag("packed_file_path", packed_file_path)
3468
0
                        .tag("attempt", attempt);
3469
0
                return -1;
3470
0
            }
3471
0
            sleep_for_packed_file_retry();
3472
0
            continue;
3473
0
        }
3474
3475
0
        LOG_WARNING("failed to commit delete bitmap packed file update")
3476
0
                .tag("instance_id", instance_id_)
3477
0
                .tag("tablet_id", tablet_id)
3478
0
                .tag("rowset_id", rowset_id)
3479
0
                .tag("packed_file_path", packed_file_path)
3480
0
                .tag("err", err);
3481
0
        return -1;
3482
0
    }
3483
3484
0
    return -1;
3485
0
}
3486
3487
int InstanceRecycler::delete_packed_file_and_kv(const std::string& packed_file_path,
3488
                                                const std::string& packed_key,
3489
7
                                                const cloud::PackedFileInfoPB& packed_info) {
3490
7
    if (!packed_info.has_resource_id() || packed_info.resource_id().empty()) {
3491
0
        LOG_WARNING("packed file missing resource id when recycling")
3492
0
                .tag("instance_id", instance_id_)
3493
0
                .tag("packed_file_path", packed_file_path);
3494
0
        return -1;
3495
0
    }
3496
3497
7
    auto [resource_id, accessor] = resolve_packed_file_accessor(packed_info.resource_id());
3498
7
    if (!accessor) {
3499
0
        LOG_WARNING("no accessor available to delete packed file")
3500
0
                .tag("instance_id", instance_id_)
3501
0
                .tag("packed_file_path", packed_file_path)
3502
0
                .tag("resource_id", packed_info.resource_id());
3503
0
        return -1;
3504
0
    }
3505
3506
7
    int del_ret = accessor->delete_file(packed_file_path);
3507
7
    if (del_ret != 0 && del_ret != 1) {
3508
0
        LOG_WARNING("failed to delete packed file")
3509
0
                .tag("instance_id", instance_id_)
3510
0
                .tag("packed_file_path", packed_file_path)
3511
0
                .tag("resource_id", resource_id)
3512
0
                .tag("ret", del_ret);
3513
0
        return -1;
3514
0
    }
3515
7
    if (del_ret == 1) {
3516
0
        LOG_INFO("packed file already removed")
3517
0
                .tag("instance_id", instance_id_)
3518
0
                .tag("packed_file_path", packed_file_path)
3519
0
                .tag("resource_id", resource_id);
3520
7
    } else {
3521
7
        LOG_INFO("deleted packed file")
3522
7
                .tag("instance_id", instance_id_)
3523
7
                .tag("packed_file_path", packed_file_path)
3524
7
                .tag("resource_id", resource_id);
3525
7
    }
3526
3527
7
    const int max_retry_times = std::max(1, config::packed_file_txn_retry_times);
3528
7
    for (int attempt = 1; attempt <= max_retry_times; ++attempt) {
3529
7
        std::unique_ptr<Transaction> del_txn;
3530
7
        TxnErrorCode err = txn_kv_->create_txn(&del_txn);
3531
7
        if (err != TxnErrorCode::TXN_OK) {
3532
0
            LOG_WARNING("failed to create txn when removing packed file kv")
3533
0
                    .tag("instance_id", instance_id_)
3534
0
                    .tag("packed_file_path", packed_file_path)
3535
0
                    .tag("attempt", attempt)
3536
0
                    .tag("err", err);
3537
0
            return -1;
3538
0
        }
3539
3540
7
        std::string latest_val;
3541
7
        err = del_txn->get(packed_key, &latest_val);
3542
7
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
3543
0
            return 0;
3544
0
        }
3545
7
        if (err != TxnErrorCode::TXN_OK) {
3546
0
            LOG_WARNING("failed to re-read packed file kv before removal")
3547
0
                    .tag("instance_id", instance_id_)
3548
0
                    .tag("packed_file_path", packed_file_path)
3549
0
                    .tag("attempt", attempt)
3550
0
                    .tag("err", err);
3551
0
            return -1;
3552
0
        }
3553
3554
7
        cloud::PackedFileInfoPB latest_info;
3555
7
        if (!latest_info.ParseFromString(latest_val)) {
3556
0
            LOG_WARNING("failed to parse packed file info before removal")
3557
0
                    .tag("instance_id", instance_id_)
3558
0
                    .tag("packed_file_path", packed_file_path)
3559
0
                    .tag("attempt", attempt);
3560
0
            return -1;
3561
0
        }
3562
3563
7
        if (!(latest_info.state() == cloud::PackedFileInfoPB::RECYCLING &&
3564
7
              latest_info.ref_cnt() == 0)) {
3565
0
            LOG_INFO("packed file state changed before removal, skip deleting kv")
3566
0
                    .tag("instance_id", instance_id_)
3567
0
                    .tag("packed_file_path", packed_file_path)
3568
0
                    .tag("attempt", attempt);
3569
0
            return 0;
3570
0
        }
3571
3572
7
        del_txn->remove(packed_key);
3573
7
        err = del_txn->commit();
3574
7
        if (err == TxnErrorCode::TXN_OK) {
3575
7
            LOG_INFO("removed packed file metadata")
3576
7
                    .tag("instance_id", instance_id_)
3577
7
                    .tag("packed_file_path", packed_file_path);
3578
7
            return 0;
3579
7
        }
3580
0
        if (err == TxnErrorCode::TXN_CONFLICT) {
3581
0
            if (attempt >= max_retry_times) {
3582
0
                LOG_WARNING("failed to remove packed file kv due to conflict after max retry")
3583
0
                        .tag("instance_id", instance_id_)
3584
0
                        .tag("packed_file_path", packed_file_path)
3585
0
                        .tag("attempt", attempt);
3586
0
                return -1;
3587
0
            }
3588
0
            LOG_WARNING("failed to remove packed file kv due to conflict, retrying")
3589
0
                    .tag("instance_id", instance_id_)
3590
0
                    .tag("packed_file_path", packed_file_path)
3591
0
                    .tag("attempt", attempt);
3592
0
            sleep_for_packed_file_retry();
3593
0
            continue;
3594
0
        }
3595
0
        LOG_WARNING("failed to remove packed file kv")
3596
0
                .tag("instance_id", instance_id_)
3597
0
                .tag("packed_file_path", packed_file_path)
3598
0
                .tag("attempt", attempt)
3599
0
                .tag("err", err);
3600
0
        return -1;
3601
0
    }
3602
0
    return -1;
3603
7
}
3604
3605
int InstanceRecycler::delete_rowset_data(
3606
        const std::map<std::string, doris::RowsetMetaCloudPB>& rowsets, RowsetRecyclingState type,
3607
98
        RecyclerMetricsContext& metrics_context) {
3608
98
    int ret = 0;
3609
    // resource_id -> file_paths
3610
98
    std::map<std::string, std::vector<std::string>> resource_file_paths;
3611
    // (resource_id, tablet_id, rowset_id)
3612
98
    std::vector<std::tuple<std::string, int64_t, std::string>> rowsets_delete_by_prefix;
3613
98
    bool is_formal_rowset = (type == RowsetRecyclingState::FORMAL_ROWSET);
3614
3615
57.1k
    for (const auto& [_, rs] : rowsets) {
3616
        // we have to treat tmp rowset as "orphans" that may not related to any existing tablets
3617
        // due to aborted schema change.
3618
57.1k
        if (is_formal_rowset) {
3619
3.16k
            std::lock_guard lock(recycled_tablets_mtx_);
3620
3.16k
            if (recycled_tablets_.count(rs.tablet_id()) && rs.packed_slice_locations_size() == 0) {
3621
                // Tablet has been recycled and this rowset has no packed slices, so file data
3622
                // should already be gone; skip to avoid redundant deletes. Rowsets with packed
3623
                // slice info must still run to decrement packed file ref counts.
3624
0
                continue;
3625
0
            }
3626
3.16k
        }
3627
3628
57.1k
        int64_t num_segments = rs.num_segments();
3629
        // Check num_segments before accessor lookup, because empty rowsets
3630
        // (e.g. base compaction output of empty rowsets) may have no resource_id
3631
        // set. Skipping them early avoids a spurious "no such resource id" error
3632
        // that marks the entire batch as failed and prevents txn_remove from
3633
        // cleaning up recycle KV keys.
3634
57.1k
        if (num_segments <= 0) {
3635
0
            metrics_context.total_recycled_num++;
3636
0
            metrics_context.total_recycled_data_size += rs.total_disk_size();
3637
0
            continue;
3638
0
        }
3639
3640
57.1k
        auto it = accessor_map_.find(rs.resource_id());
3641
        // possible if the accessor is not initilized correctly
3642
57.1k
        if (it == accessor_map_.end()) [[unlikely]] {
3643
3.00k
            LOG_WARNING("instance has no such resource id")
3644
3.00k
                    .tag("instance_id", instance_id_)
3645
3.00k
                    .tag("resource_id", rs.resource_id());
3646
3.00k
            ret = -1;
3647
3.00k
            continue;
3648
3.00k
        }
3649
3650
54.1k
        auto& file_paths = resource_file_paths[rs.resource_id()];
3651
54.1k
        const auto& rowset_id = rs.rowset_id_v2();
3652
54.1k
        int64_t tablet_id = rs.tablet_id();
3653
54.1k
        LOG_INFO("recycle rowset merge index size")
3654
54.1k
                .tag("instance_id", instance_id_)
3655
54.1k
                .tag("tablet_id", tablet_id)
3656
54.1k
                .tag("rowset_id", rowset_id)
3657
54.1k
                .tag("merge_index_size", rs.packed_slice_locations_size());
3658
54.1k
        if (decrement_packed_file_ref_counts(rs) != 0) {
3659
0
            ret = -1;
3660
0
            continue;
3661
0
        }
3662
3663
        // Process delete bitmap - check where it's stored.
3664
54.1k
        DeleteBitmapStorageType delete_bitmap_storage_type = DeleteBitmapStorageType::NOT_FOUND;
3665
54.1k
        if (decrement_delete_bitmap_packed_file_ref_counts(tablet_id, rowset_id,
3666
54.1k
                                                           &delete_bitmap_storage_type) != 0) {
3667
0
            LOG_WARNING("failed to decrement delete bitmap packed file ref count")
3668
0
                    .tag("instance_id", instance_id_)
3669
0
                    .tag("tablet_id", tablet_id)
3670
0
                    .tag("rowset_id", rowset_id);
3671
0
            ret = -1;
3672
0
            continue;
3673
0
        }
3674
54.1k
        if (delete_bitmap_storage_type == DeleteBitmapStorageType::STANDALONE_FILE) {
3675
51.5k
            file_paths.push_back(delete_bitmap_path(tablet_id, rowset_id));
3676
51.5k
        }
3677
3678
        // Process inverted indexes
3679
54.1k
        std::vector<std::pair<int64_t, std::string>> index_ids;
3680
        // default format as v1.
3681
54.1k
        InvertedIndexStorageFormatPB index_format = InvertedIndexStorageFormatPB::V1;
3682
54.1k
        int inverted_index_get_ret = 0;
3683
54.1k
        if (rs.has_tablet_schema()) {
3684
53.5k
            for (const auto& index : rs.tablet_schema().index()) {
3685
53.5k
                if (index.has_index_type() && index.index_type() == IndexType::INVERTED) {
3686
53.5k
                    index_ids.emplace_back(index.index_id(), index.index_suffix_name());
3687
53.5k
                }
3688
53.5k
            }
3689
26.6k
            if (rs.tablet_schema().has_inverted_index_storage_format()) {
3690
26.5k
                index_format = rs.tablet_schema().inverted_index_storage_format();
3691
26.5k
            }
3692
27.5k
        } else {
3693
27.5k
            if (!rs.has_index_id() || !rs.has_schema_version()) {
3694
0
                LOG(WARNING) << "rowset must have either schema or schema_version and index_id, "
3695
0
                                "instance_id="
3696
0
                             << instance_id_ << " tablet_id=" << tablet_id
3697
0
                             << " rowset_id=" << rowset_id;
3698
0
                ret = -1;
3699
0
                continue;
3700
0
            }
3701
27.5k
            InvertedIndexInfo index_info;
3702
27.5k
            inverted_index_get_ret =
3703
27.5k
                    inverted_index_id_cache_->get(rs.index_id(), rs.schema_version(), index_info);
3704
27.5k
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.tmp_rowset",
3705
27.5k
                                     &inverted_index_get_ret);
3706
27.5k
            if (inverted_index_get_ret == 0) {
3707
27.0k
                index_format = index_info.first;
3708
27.0k
                index_ids = index_info.second;
3709
27.0k
            } else if (inverted_index_get_ret == 1) {
3710
                // 1. Schema kv not found means tablet has been recycled
3711
                // Maybe some tablet recycle failed by some bugs
3712
                // We need to delete again to double check
3713
                // 2. Ensure this operation only deletes tablets and does not perform any operations on indexes,
3714
                // because we are uncertain about the inverted index information.
3715
                // If there are inverted indexes, some data might not be deleted,
3716
                // but this is acceptable as we have made our best effort to delete the data.
3717
507
                LOG_INFO(
3718
507
                        "delete rowset data schema kv not found, need to delete again to "
3719
507
                        "double "
3720
507
                        "check")
3721
507
                        .tag("instance_id", instance_id_)
3722
507
                        .tag("tablet_id", tablet_id)
3723
507
                        .tag("rowset", rs.ShortDebugString());
3724
                // Currently index_ids is guaranteed to be empty,
3725
                // but we clear it again here as a safeguard against future code changes
3726
                // that might cause index_ids to no longer be empty
3727
507
                index_format = InvertedIndexStorageFormatPB::V2;
3728
507
                index_ids.clear();
3729
18.4E
            } else {
3730
18.4E
                LOG(WARNING) << "failed to get schema kv for rowset, instance_id=" << instance_id_
3731
18.4E
                             << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id;
3732
18.4E
                ret = -1;
3733
18.4E
                continue;
3734
18.4E
            }
3735
27.5k
        }
3736
54.2k
        if (rs.rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) {
3737
            // if rowset state is RowsetStatePB::BEGIN_PARTIAL_UPDATE, the number of segments data
3738
            // may be larger than num_segments field in RowsetMeta, so we need to delete the rowset's data by prefix
3739
5
            rowsets_delete_by_prefix.emplace_back(rs.resource_id(), tablet_id, rs.rowset_id_v2());
3740
5
            continue;
3741
5
        }
3742
323k
        for (int64_t i = 0; i < num_segments; ++i) {
3743
269k
            file_paths.push_back(segment_path(tablet_id, rowset_id, i));
3744
269k
            if (index_format == InvertedIndexStorageFormatPB::V1) {
3745
535k
                for (const auto& index_id : index_ids) {
3746
535k
                    file_paths.push_back(inverted_index_path_v1(tablet_id, rowset_id, i,
3747
535k
                                                                index_id.first, index_id.second));
3748
535k
                }
3749
267k
            } else if (!index_ids.empty() || inverted_index_get_ret == 1) {
3750
                // try to recycle inverted index v2 when get_ret == 1
3751
                // we treat schema not found as if it has a v2 format inverted index
3752
                // to reduce chance of data leakage
3753
2.50k
                if (inverted_index_get_ret == 1) {
3754
2.50k
                    LOG_INFO("delete rowset data schema kv not found, try to delete index file")
3755
2.50k
                            .tag("instance_id", instance_id_)
3756
2.50k
                            .tag("inverted index v2 path",
3757
2.50k
                                 inverted_index_path_v2(tablet_id, rowset_id, i));
3758
2.50k
                }
3759
2.50k
                file_paths.push_back(inverted_index_path_v2(tablet_id, rowset_id, i));
3760
2.50k
            }
3761
269k
        }
3762
54.1k
    }
3763
3764
98
    SyncExecutor<int> concurrent_delete_executor(_thread_pool_group.s3_producer_pool,
3765
98
                                                 "delete_rowset_data",
3766
98
                                                 [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3766
5
                                                 [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_1clERKi
Line
Count
Source
3766
51
                                                 [](const int& ret) { return ret != 0; });
3767
98
    for (auto& [resource_id, file_paths] : resource_file_paths) {
3768
51
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3769
51
            DCHECK(accessor_map_.count(*rid))
3770
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3771
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3772
51
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3773
51
                                     &accessor_map_);
3774
51
            if (!accessor_map_.contains(*rid)) {
3775
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3776
0
                        .tag("resource_id", resource_id)
3777
0
                        .tag("instance_id", instance_id_);
3778
0
                return -1;
3779
0
            }
3780
51
            auto& accessor = accessor_map_[*rid];
3781
51
            int ret = accessor->delete_files(*paths);
3782
51
            if (!ret) {
3783
                // deduplication of different files with the same rowset id
3784
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3785
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3786
51
                std::set<std::string> deleted_rowset_id;
3787
3788
51
                std::for_each(paths->begin(), paths->end(),
3789
51
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3790
859k
                               this](const std::string& path) {
3791
859k
                                  std::vector<std::string> str;
3792
859k
                                  butil::SplitString(path, '/', &str);
3793
859k
                                  std::string rowset_id;
3794
859k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3795
855k
                                      rowset_id = str.back().substr(0, pos);
3796
855k
                                  } else {
3797
3.71k
                                      if (path.find("packed_file/") != std::string::npos) {
3798
0
                                          return; // packed files do not have rowset_id encoded
3799
0
                                      }
3800
3.71k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3801
3.71k
                                      return;
3802
3.71k
                                  }
3803
855k
                                  auto rs_meta = rowsets.find(rowset_id);
3804
855k
                                  if (rs_meta != rowsets.end() &&
3805
859k
                                      !deleted_rowset_id.contains(rowset_id)) {
3806
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3807
54.1k
                                      metrics_context.total_recycled_data_size +=
3808
54.1k
                                              rs_meta->second.total_disk_size();
3809
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3810
54.1k
                                              rs_meta->second.num_segments();
3811
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3812
54.1k
                                              rs_meta->second.total_disk_size();
3813
54.1k
                                      metrics_context.total_recycled_num++;
3814
54.1k
                                  }
3815
855k
                              });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3790
7
                               this](const std::string& path) {
3791
7
                                  std::vector<std::string> str;
3792
7
                                  butil::SplitString(path, '/', &str);
3793
7
                                  std::string rowset_id;
3794
7
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3795
7
                                      rowset_id = str.back().substr(0, pos);
3796
7
                                  } else {
3797
0
                                      if (path.find("packed_file/") != std::string::npos) {
3798
0
                                          return; // packed files do not have rowset_id encoded
3799
0
                                      }
3800
0
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3801
0
                                      return;
3802
0
                                  }
3803
7
                                  auto rs_meta = rowsets.find(rowset_id);
3804
7
                                  if (rs_meta != rowsets.end() &&
3805
7
                                      !deleted_rowset_id.contains(rowset_id)) {
3806
7
                                      deleted_rowset_id.emplace(rowset_id);
3807
7
                                      metrics_context.total_recycled_data_size +=
3808
7
                                              rs_meta->second.total_disk_size();
3809
7
                                      segment_metrics_context_.total_recycled_num +=
3810
7
                                              rs_meta->second.num_segments();
3811
7
                                      segment_metrics_context_.total_recycled_data_size +=
3812
7
                                              rs_meta->second.total_disk_size();
3813
7
                                      metrics_context.total_recycled_num++;
3814
7
                                  }
3815
7
                              });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEvENKUlRSD_E_clESN_
Line
Count
Source
3790
859k
                               this](const std::string& path) {
3791
859k
                                  std::vector<std::string> str;
3792
859k
                                  butil::SplitString(path, '/', &str);
3793
859k
                                  std::string rowset_id;
3794
859k
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3795
855k
                                      rowset_id = str.back().substr(0, pos);
3796
855k
                                  } else {
3797
3.71k
                                      if (path.find("packed_file/") != std::string::npos) {
3798
0
                                          return; // packed files do not have rowset_id encoded
3799
0
                                      }
3800
3.71k
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3801
3.71k
                                      return;
3802
3.71k
                                  }
3803
855k
                                  auto rs_meta = rowsets.find(rowset_id);
3804
855k
                                  if (rs_meta != rowsets.end() &&
3805
859k
                                      !deleted_rowset_id.contains(rowset_id)) {
3806
54.1k
                                      deleted_rowset_id.emplace(rowset_id);
3807
54.1k
                                      metrics_context.total_recycled_data_size +=
3808
54.1k
                                              rs_meta->second.total_disk_size();
3809
54.1k
                                      segment_metrics_context_.total_recycled_num +=
3810
54.1k
                                              rs_meta->second.num_segments();
3811
54.1k
                                      segment_metrics_context_.total_recycled_data_size +=
3812
54.1k
                                              rs_meta->second.total_disk_size();
3813
54.1k
                                      metrics_context.total_recycled_num++;
3814
54.1k
                                  }
3815
855k
                              });
3816
51
            }
3817
51
            return ret;
3818
51
        });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3768
5
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3769
5
            DCHECK(accessor_map_.count(*rid))
3770
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3771
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3772
5
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3773
5
                                     &accessor_map_);
3774
5
            if (!accessor_map_.contains(*rid)) {
3775
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3776
0
                        .tag("resource_id", resource_id)
3777
0
                        .tag("instance_id", instance_id_);
3778
0
                return -1;
3779
0
            }
3780
5
            auto& accessor = accessor_map_[*rid];
3781
5
            int ret = accessor->delete_files(*paths);
3782
5
            if (!ret) {
3783
                // deduplication of different files with the same rowset id
3784
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3785
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3786
5
                std::set<std::string> deleted_rowset_id;
3787
3788
5
                std::for_each(paths->begin(), paths->end(),
3789
5
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3790
5
                               this](const std::string& path) {
3791
5
                                  std::vector<std::string> str;
3792
5
                                  butil::SplitString(path, '/', &str);
3793
5
                                  std::string rowset_id;
3794
5
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3795
5
                                      rowset_id = str.back().substr(0, pos);
3796
5
                                  } else {
3797
5
                                      if (path.find("packed_file/") != std::string::npos) {
3798
5
                                          return; // packed files do not have rowset_id encoded
3799
5
                                      }
3800
5
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3801
5
                                      return;
3802
5
                                  }
3803
5
                                  auto rs_meta = rowsets.find(rowset_id);
3804
5
                                  if (rs_meta != rowsets.end() &&
3805
5
                                      !deleted_rowset_id.contains(rowset_id)) {
3806
5
                                      deleted_rowset_id.emplace(rowset_id);
3807
5
                                      metrics_context.total_recycled_data_size +=
3808
5
                                              rs_meta->second.total_disk_size();
3809
5
                                      segment_metrics_context_.total_recycled_num +=
3810
5
                                              rs_meta->second.num_segments();
3811
5
                                      segment_metrics_context_.total_recycled_data_size +=
3812
5
                                              rs_meta->second.total_disk_size();
3813
5
                                      metrics_context.total_recycled_num++;
3814
5
                                  }
3815
5
                              });
3816
5
            }
3817
5
            return ret;
3818
5
        });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
3768
46
        concurrent_delete_executor.add([&, rid = &resource_id, paths = &file_paths]() -> int {
3769
46
            DCHECK(accessor_map_.count(*rid))
3770
0
                    << "uninitilized accessor, instance_id=" << instance_id_
3771
0
                    << " resource_id=" << resource_id << " path[0]=" << (*paths)[0];
3772
46
            TEST_SYNC_POINT_CALLBACK("InstanceRecycler::delete_rowset_data.no_resource_id",
3773
46
                                     &accessor_map_);
3774
46
            if (!accessor_map_.contains(*rid)) {
3775
0
                LOG_WARNING("delete rowset data accessor_map_ does not contains resouce id")
3776
0
                        .tag("resource_id", resource_id)
3777
0
                        .tag("instance_id", instance_id_);
3778
0
                return -1;
3779
0
            }
3780
46
            auto& accessor = accessor_map_[*rid];
3781
46
            int ret = accessor->delete_files(*paths);
3782
46
            if (!ret) {
3783
                // deduplication of different files with the same rowset id
3784
                // 020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.dat
3785
                //020000000000007fd045a62bc87a6587dd7ac274aa36e5a9_0.idx
3786
46
                std::set<std::string> deleted_rowset_id;
3787
3788
46
                std::for_each(paths->begin(), paths->end(),
3789
46
                              [&metrics_context, &rowsets, &deleted_rowset_id,
3790
46
                               this](const std::string& path) {
3791
46
                                  std::vector<std::string> str;
3792
46
                                  butil::SplitString(path, '/', &str);
3793
46
                                  std::string rowset_id;
3794
46
                                  if (auto pos = str.back().find('_'); pos != std::string::npos) {
3795
46
                                      rowset_id = str.back().substr(0, pos);
3796
46
                                  } else {
3797
46
                                      if (path.find("packed_file/") != std::string::npos) {
3798
46
                                          return; // packed files do not have rowset_id encoded
3799
46
                                      }
3800
46
                                      LOG(WARNING) << "failed to parse rowset_id, path=" << path;
3801
46
                                      return;
3802
46
                                  }
3803
46
                                  auto rs_meta = rowsets.find(rowset_id);
3804
46
                                  if (rs_meta != rowsets.end() &&
3805
46
                                      !deleted_rowset_id.contains(rowset_id)) {
3806
46
                                      deleted_rowset_id.emplace(rowset_id);
3807
46
                                      metrics_context.total_recycled_data_size +=
3808
46
                                              rs_meta->second.total_disk_size();
3809
46
                                      segment_metrics_context_.total_recycled_num +=
3810
46
                                              rs_meta->second.num_segments();
3811
46
                                      segment_metrics_context_.total_recycled_data_size +=
3812
46
                                              rs_meta->second.total_disk_size();
3813
46
                                      metrics_context.total_recycled_num++;
3814
46
                                  }
3815
46
                              });
3816
46
            }
3817
46
            return ret;
3818
46
        });
3819
51
    }
3820
98
    for (const auto& [resource_id, tablet_id, rowset_id] : rowsets_delete_by_prefix) {
3821
5
        LOG_INFO(
3822
5
                "delete rowset {} by prefix because it's in BEGIN_PARTIAL_UPDATE state, "
3823
5
                "resource_id={}, tablet_id={}, instance_id={}, task_type={}",
3824
5
                rowset_id, resource_id, tablet_id, instance_id_, metrics_context.operation_type);
3825
5
        concurrent_delete_executor.add([&]() -> int {
3826
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3827
5
            if (!ret) {
3828
5
                auto rs = rowsets.at(rowset_id);
3829
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3830
5
                metrics_context.total_recycled_num++;
3831
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3832
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3833
5
            }
3834
5
            return ret;
3835
5
        });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler18delete_rowset_dataERKSt3mapINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEENS_17RowsetMetaCloudPBESt4lessIS8_ESaISt4pairIKS8_S9_EEENS0_20RowsetRecyclingStateERNS0_22RecyclerMetricsContextEENK3$_2clEv
Line
Count
Source
3825
5
        concurrent_delete_executor.add([&]() -> int {
3826
5
            int ret = delete_rowset_data(resource_id, tablet_id, rowset_id);
3827
5
            if (!ret) {
3828
5
                auto rs = rowsets.at(rowset_id);
3829
5
                metrics_context.total_recycled_data_size += rs.total_disk_size();
3830
5
                metrics_context.total_recycled_num++;
3831
5
                segment_metrics_context_.total_recycled_data_size += rs.total_disk_size();
3832
5
                segment_metrics_context_.total_recycled_num += rs.num_segments();
3833
5
            }
3834
5
            return ret;
3835
5
        });
3836
5
    }
3837
3838
98
    bool finished = true;
3839
98
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
3840
98
    for (int r : rets) {
3841
56
        if (r != 0) {
3842
0
            ret = -1;
3843
0
            break;
3844
0
        }
3845
56
    }
3846
98
    ret = finished ? ret : -1;
3847
98
    return ret;
3848
98
}
3849
3850
int InstanceRecycler::delete_rowset_data(const std::string& resource_id, int64_t tablet_id,
3851
3.30k
                                         const std::string& rowset_id) {
3852
3.30k
    auto it = accessor_map_.find(resource_id);
3853
3.30k
    if (it == accessor_map_.end()) {
3854
400
        LOG_WARNING("instance has no such resource id")
3855
400
                .tag("instance_id", instance_id_)
3856
400
                .tag("resource_id", resource_id)
3857
400
                .tag("tablet_id", tablet_id)
3858
400
                .tag("rowset_id", rowset_id);
3859
400
        return -1;
3860
400
    }
3861
2.90k
    auto& accessor = it->second;
3862
2.90k
    return accessor->delete_prefix(rowset_path_prefix(tablet_id, rowset_id));
3863
3.30k
}
3864
3865
4
bool InstanceRecycler::decode_packed_file_key(std::string_view key, std::string* packed_path) {
3866
4
    if (key.empty()) {
3867
0
        return false;
3868
0
    }
3869
4
    std::string_view key_view = key;
3870
4
    key_view.remove_prefix(1); // remove keyspace prefix
3871
4
    std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> decoded;
3872
4
    if (decode_key(&key_view, &decoded) != 0) {
3873
0
        return false;
3874
0
    }
3875
4
    if (decoded.size() < 4) {
3876
0
        return false;
3877
0
    }
3878
4
    try {
3879
4
        *packed_path = std::get<std::string>(std::get<0>(decoded.back()));
3880
4
    } catch (const std::bad_variant_access&) {
3881
0
        return false;
3882
0
    }
3883
4
    return true;
3884
4
}
3885
3886
14
int InstanceRecycler::recycle_packed_files() {
3887
14
    const std::string task_name = "recycle_packed_files";
3888
14
    auto start_tp = steady_clock::now();
3889
14
    int64_t start_time = duration_cast<seconds>(start_tp.time_since_epoch()).count();
3890
14
    int ret = 0;
3891
14
    PackedFileRecycleStats stats;
3892
3893
14
    register_recycle_task(task_name, start_time);
3894
14
    DORIS_CLOUD_DEFER {
3895
14
        unregister_recycle_task(task_name);
3896
14
        int64_t cost =
3897
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3898
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3899
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3900
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3901
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3902
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3903
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3904
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3905
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3906
14
                                                             stats.bytes_object_deleted);
3907
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3908
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3909
14
                .tag("instance_id", instance_id_)
3910
14
                .tag("num_scanned", stats.num_scanned)
3911
14
                .tag("num_corrected", stats.num_corrected)
3912
14
                .tag("num_deleted", stats.num_deleted)
3913
14
                .tag("num_failed", stats.num_failed)
3914
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3915
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3916
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3917
14
                .tag("bytes_deleted", stats.bytes_deleted)
3918
14
                .tag("ret", ret);
3919
14
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_0clEv
Line
Count
Source
3894
14
    DORIS_CLOUD_DEFER {
3895
14
        unregister_recycle_task(task_name);
3896
14
        int64_t cost =
3897
14
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
3898
14
        int64_t cost_ms = duration_cast<milliseconds>(steady_clock::now() - start_tp).count();
3899
14
        g_bvar_recycler_packed_file_recycled_kv_num.put(instance_id_, stats.num_deleted);
3900
14
        g_bvar_recycler_packed_file_recycled_kv_bytes.put(instance_id_, stats.bytes_deleted);
3901
14
        g_bvar_recycler_packed_file_recycle_cost_ms.put(instance_id_, cost_ms);
3902
14
        g_bvar_recycler_packed_file_scanned_kv_num.put(instance_id_, stats.num_scanned);
3903
14
        g_bvar_recycler_packed_file_corrected_kv_num.put(instance_id_, stats.num_corrected);
3904
14
        g_bvar_recycler_packed_file_recycled_object_num.put(instance_id_, stats.num_object_deleted);
3905
14
        g_bvar_recycler_packed_file_bytes_object_deleted.put(instance_id_,
3906
14
                                                             stats.bytes_object_deleted);
3907
14
        g_bvar_recycler_packed_file_rowset_scanned_num.put(instance_id_, stats.rowset_scan_count);
3908
14
        LOG_INFO("recycle packed files finished, cost={}s", cost)
3909
14
                .tag("instance_id", instance_id_)
3910
14
                .tag("num_scanned", stats.num_scanned)
3911
14
                .tag("num_corrected", stats.num_corrected)
3912
14
                .tag("num_deleted", stats.num_deleted)
3913
14
                .tag("num_failed", stats.num_failed)
3914
14
                .tag("num_objects_deleted", stats.num_object_deleted)
3915
14
                .tag("bytes_object_deleted", stats.bytes_object_deleted)
3916
14
                .tag("rowset_scan_count", stats.rowset_scan_count)
3917
14
                .tag("bytes_deleted", stats.bytes_deleted)
3918
14
                .tag("ret", ret);
3919
14
    };
3920
3921
14
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3922
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3923
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3924
4
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_packed_filesEvENK3$_1clISt17basic_string_viewIcSt11char_traitsIcEES7_EEDaOT_OT0_
Line
Count
Source
3921
4
    auto recycle_func = [this, &stats, &ret](auto&& key, auto&& value) {
3922
4
        return handle_packed_file_kv(std::forward<decltype(key)>(key),
3923
4
                                     std::forward<decltype(value)>(value), &stats, &ret);
3924
4
    };
3925
3926
14
    LOG_INFO("begin to recycle packed file").tag("instance_id", instance_id_);
3927
3928
14
    std::string begin = packed_file_key({instance_id_, ""});
3929
14
    std::string end = packed_file_key({instance_id_, "\xff"});
3930
14
    if (scan_and_recycle(begin, end, recycle_func) != 0) {
3931
0
        ret = -1;
3932
0
    }
3933
3934
14
    return ret;
3935
14
}
3936
3937
int InstanceRecycler::scan_tablets_and_statistics(int64_t table_id, int64_t index_id,
3938
                                                  RecyclerMetricsContext& metrics_context,
3939
0
                                                  int64_t partition_id, bool is_empty_tablet) {
3940
0
    std::string tablet_key_begin, tablet_key_end;
3941
3942
0
    if (partition_id > 0) {
3943
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id, 0}, &tablet_key_begin);
3944
0
        meta_tablet_key({instance_id_, table_id, index_id, partition_id + 1, 0}, &tablet_key_end);
3945
0
    } else {
3946
0
        meta_tablet_key({instance_id_, table_id, index_id, 0, 0}, &tablet_key_begin);
3947
0
        meta_tablet_key({instance_id_, table_id, index_id + 1, 0, 0}, &tablet_key_end);
3948
0
    }
3949
    // for calculate the total num or bytes of recyled objects
3950
0
    auto scan_and_statistics = [&, is_empty_tablet, this](std::string_view k,
3951
0
                                                          std::string_view v) -> int {
3952
0
        doris::TabletMetaCloudPB tablet_meta_pb;
3953
0
        if (!tablet_meta_pb.ParseFromArray(v.data(), v.size())) {
3954
0
            return 0;
3955
0
        }
3956
0
        int64_t tablet_id = tablet_meta_pb.tablet_id();
3957
3958
0
        if (!check_lazy_txn_finished(txn_kv_, instance_id_, tablet_meta_pb.tablet_id())) {
3959
0
            return 0;
3960
0
        }
3961
3962
0
        if (!is_empty_tablet) {
3963
0
            if (scan_tablet_and_statistics(tablet_id, metrics_context) != 0) {
3964
0
                return 0;
3965
0
            }
3966
0
            tablet_metrics_context_.total_need_recycle_num++;
3967
0
        }
3968
0
        return 0;
3969
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_tablets_and_statisticsEllRNS0_22RecyclerMetricsContextElbENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES8_
3970
0
    int ret = scan_and_recycle(tablet_key_begin, tablet_key_end, std::move(scan_and_statistics));
3971
0
    metrics_context.report(true);
3972
0
    tablet_metrics_context_.report(true);
3973
0
    segment_metrics_context_.report(true);
3974
0
    return ret;
3975
0
}
3976
3977
int InstanceRecycler::scan_tablet_and_statistics(int64_t tablet_id,
3978
0
                                                 RecyclerMetricsContext& metrics_context) {
3979
0
    int ret = 0;
3980
0
    std::map<std::string, RowsetMetaCloudPB> rowset_meta_map;
3981
0
    std::unique_ptr<Transaction> txn;
3982
0
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
3983
0
        LOG_WARNING("failed to recycle tablet ")
3984
0
                .tag("tablet id", tablet_id)
3985
0
                .tag("instance_id", instance_id_)
3986
0
                .tag("reason", "failed to create txn");
3987
0
        ret = -1;
3988
0
    }
3989
0
    GetRowsetResponse resp;
3990
0
    std::string msg;
3991
0
    MetaServiceCode code = MetaServiceCode::OK;
3992
    // get rowsets in tablet
3993
0
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
3994
0
                        tablet_id, code, msg, &resp);
3995
0
    if (code != MetaServiceCode::OK) {
3996
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
3997
0
                .tag("tablet id", tablet_id)
3998
0
                .tag("msg", msg)
3999
0
                .tag("code", code)
4000
0
                .tag("instance id", instance_id_);
4001
0
        ret = -1;
4002
0
    }
4003
0
    for (const auto& rs_meta : resp.rowset_meta()) {
4004
        /*
4005
        * For compatibility, we skip the loop for [0-1] here.
4006
        * The purpose of this loop is to delete object files,
4007
        * and since [0-1] only has meta and doesn't have object files,
4008
        * skipping it doesn't affect system correctness.
4009
        *
4010
        * If not skipped, the check "if (!rs_meta.has_resource_id())" below
4011
        * would return error -1 directly, causing the recycle operation to fail.
4012
        *
4013
        * [0-1] doesn't have resource id is a bug.
4014
        * In the future, we will fix this problem, after that,
4015
        * we can remove this if statement.
4016
        *
4017
        * TODO(Yukang-Lian): remove this if statement when [0-1] has resource id in the future.
4018
        */
4019
4020
0
        if (rs_meta.end_version() == 1) {
4021
            // Assert that [0-1] has no resource_id to make sure
4022
            // this if statement will not be forgetted to remove
4023
            // when the resource id bug is fixed
4024
0
            DCHECK(!rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4025
0
            continue;
4026
0
        }
4027
0
        if (!rs_meta.has_resource_id()) {
4028
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4029
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4030
0
                    .tag("instance_id", instance_id_)
4031
0
                    .tag("tablet_id", tablet_id);
4032
0
            continue;
4033
0
        }
4034
0
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4035
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4036
        // possible if the accessor is not initilized correctly
4037
0
        if (it == accessor_map_.end()) [[unlikely]] {
4038
0
            LOG_WARNING(
4039
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4040
0
                    "recycle process")
4041
0
                    .tag("tablet id", tablet_id)
4042
0
                    .tag("instance_id", instance_id_)
4043
0
                    .tag("resource_id", rs_meta.resource_id())
4044
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4045
0
            continue;
4046
0
        }
4047
4048
0
        metrics_context.total_need_recycle_data_size += rs_meta.total_disk_size();
4049
0
        tablet_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4050
0
        segment_metrics_context_.total_need_recycle_data_size += rs_meta.total_disk_size();
4051
0
        segment_metrics_context_.total_need_recycle_num += rs_meta.num_segments();
4052
0
    }
4053
0
    return ret;
4054
0
}
4055
4056
4.25k
int InstanceRecycler::recycle_tablet(int64_t tablet_id, RecyclerMetricsContext& metrics_context) {
4057
4.25k
    LOG_INFO("begin to recycle rowsets in a dropped tablet")
4058
4.25k
            .tag("instance_id", instance_id_)
4059
4.25k
            .tag("tablet_id", tablet_id);
4060
4061
4.25k
    if (should_recycle_versioned_keys()) {
4062
11
        int ret = recycle_versioned_tablet(tablet_id, metrics_context);
4063
11
        if (ret != 0) {
4064
0
            return ret;
4065
0
        }
4066
        // Continue to recycle non-versioned rowsets, if multi-version is set to DISABLED
4067
        // during the recycle_versioned_tablet process.
4068
        //
4069
        // .. And remove restore job rowsets of this tablet too
4070
11
    }
4071
4072
4.25k
    int ret = 0;
4073
4.25k
    auto start_time = steady_clock::now();
4074
4075
4.25k
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4076
4077
    // collect resource ids
4078
248
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4079
248
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4080
248
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4081
248
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4082
248
    std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
4083
248
    std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
4084
4085
248
    std::set<std::string> resource_ids;
4086
248
    int64_t recycle_rowsets_number = 0;
4087
248
    int64_t recycle_segments_number = 0;
4088
248
    int64_t recycle_rowsets_data_size = 0;
4089
248
    int64_t recycle_rowsets_index_size = 0;
4090
248
    int64_t recycle_restore_job_rowsets_number = 0;
4091
248
    int64_t recycle_restore_job_segments_number = 0;
4092
248
    int64_t recycle_restore_job_rowsets_data_size = 0;
4093
248
    int64_t recycle_restore_job_rowsets_index_size = 0;
4094
248
    int64_t max_rowset_version = 0;
4095
248
    int64_t min_rowset_creation_time = INT64_MAX;
4096
248
    int64_t max_rowset_creation_time = 0;
4097
248
    int64_t min_rowset_expiration_time = INT64_MAX;
4098
248
    int64_t max_rowset_expiration_time = 0;
4099
4100
248
    DORIS_CLOUD_DEFER {
4101
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4102
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4103
248
                .tag("instance_id", instance_id_)
4104
248
                .tag("tablet_id", tablet_id)
4105
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4106
248
                .tag("recycle segments number", recycle_segments_number)
4107
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4108
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4109
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4110
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4111
248
                .tag("all restore job rowsets recycle data size",
4112
248
                     recycle_restore_job_rowsets_data_size)
4113
248
                .tag("all restore job rowsets recycle index size",
4114
248
                     recycle_restore_job_rowsets_index_size)
4115
248
                .tag("max rowset version", max_rowset_version)
4116
248
                .tag("min rowset creation time", min_rowset_creation_time)
4117
248
                .tag("max rowset creation time", max_rowset_creation_time)
4118
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4119
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4120
248
                .tag("task type", metrics_context.operation_type)
4121
248
                .tag("ret", ret);
4122
248
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4100
248
    DORIS_CLOUD_DEFER {
4101
248
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4102
248
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4103
248
                .tag("instance_id", instance_id_)
4104
248
                .tag("tablet_id", tablet_id)
4105
248
                .tag("recycle rowsets number", recycle_rowsets_number)
4106
248
                .tag("recycle segments number", recycle_segments_number)
4107
248
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4108
248
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4109
248
                .tag("recycle restore job rowsets number", recycle_restore_job_rowsets_number)
4110
248
                .tag("recycle restore job segments number", recycle_restore_job_segments_number)
4111
248
                .tag("all restore job rowsets recycle data size",
4112
248
                     recycle_restore_job_rowsets_data_size)
4113
248
                .tag("all restore job rowsets recycle index size",
4114
248
                     recycle_restore_job_rowsets_index_size)
4115
248
                .tag("max rowset version", max_rowset_version)
4116
248
                .tag("min rowset creation time", min_rowset_creation_time)
4117
248
                .tag("max rowset creation time", max_rowset_creation_time)
4118
248
                .tag("min rowset expiration time", min_rowset_expiration_time)
4119
248
                .tag("max rowset expiration time", max_rowset_expiration_time)
4120
248
                .tag("task type", metrics_context.operation_type)
4121
248
                .tag("ret", ret);
4122
248
    };
4123
4124
248
    std::unique_ptr<Transaction> txn;
4125
248
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4126
0
        LOG_WARNING("failed to recycle tablet ")
4127
0
                .tag("tablet id", tablet_id)
4128
0
                .tag("instance_id", instance_id_)
4129
0
                .tag("reason", "failed to create txn");
4130
0
        ret = -1;
4131
0
    }
4132
248
    GetRowsetResponse resp;
4133
248
    std::string msg;
4134
248
    MetaServiceCode code = MetaServiceCode::OK;
4135
    // get rowsets in tablet
4136
248
    internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id_,
4137
248
                        tablet_id, code, msg, &resp);
4138
248
    if (code != MetaServiceCode::OK) {
4139
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4140
0
                .tag("tablet id", tablet_id)
4141
0
                .tag("msg", msg)
4142
0
                .tag("code", code)
4143
0
                .tag("instance id", instance_id_);
4144
0
        ret = -1;
4145
0
    }
4146
248
    TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_tablet.create_rowset_meta", &resp);
4147
4148
2.51k
    for (const auto& rs_meta : resp.rowset_meta()) {
4149
        // The rowset has no resource id and segments when it was generated by compaction
4150
        // with multiple hole rowsets or it's version is [0-1], so we can skip it.
4151
2.51k
        if (!rs_meta.has_resource_id() && rs_meta.num_segments() == 0) {
4152
0
            LOG_INFO("rowset meta does not have a resource id and no segments, skip this rowset")
4153
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4154
0
                    .tag("instance_id", instance_id_)
4155
0
                    .tag("tablet_id", tablet_id);
4156
0
            recycle_rowsets_number += 1;
4157
0
            continue;
4158
0
        }
4159
2.51k
        if (!rs_meta.has_resource_id()) {
4160
1
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4161
1
                    .tag("rs_meta", rs_meta.ShortDebugString())
4162
1
                    .tag("instance_id", instance_id_)
4163
1
                    .tag("tablet_id", tablet_id);
4164
1
            return -1;
4165
1
        }
4166
2.51k
        DCHECK(rs_meta.has_resource_id()) << "rs_meta" << rs_meta.ShortDebugString();
4167
2.51k
        auto it = accessor_map_.find(rs_meta.resource_id());
4168
        // possible if the accessor is not initilized correctly
4169
2.51k
        if (it == accessor_map_.end()) [[unlikely]] {
4170
1
            LOG_WARNING(
4171
1
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4172
1
                    "recycle process")
4173
1
                    .tag("tablet id", tablet_id)
4174
1
                    .tag("instance_id", instance_id_)
4175
1
                    .tag("resource_id", rs_meta.resource_id())
4176
1
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4177
1
            return -1;
4178
1
        }
4179
2.51k
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4180
0
            LOG_WARNING("failed to update packed file info when recycling tablet")
4181
0
                    .tag("instance_id", instance_id_)
4182
0
                    .tag("tablet_id", tablet_id)
4183
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4184
0
            return -1;
4185
0
        }
4186
2.51k
        recycle_rowsets_number += 1;
4187
2.51k
        recycle_segments_number += rs_meta.num_segments();
4188
2.51k
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4189
2.51k
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4190
2.51k
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4191
2.51k
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4192
2.51k
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4193
2.51k
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4194
2.51k
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4195
2.51k
        resource_ids.emplace(rs_meta.resource_id());
4196
2.51k
    }
4197
4198
    // get restore job rowset in tablet
4199
246
    std::vector<std::pair<std::string, doris::RowsetMetaCloudPB>> restore_job_rs_metas;
4200
246
    scan_restore_job_rowset(txn.get(), instance_id_, tablet_id, code, msg, &restore_job_rs_metas);
4201
246
    if (code != MetaServiceCode::OK) {
4202
0
        LOG_WARNING("scan restore job rowsets failed when recycle tablet")
4203
0
                .tag("tablet id", tablet_id)
4204
0
                .tag("msg", msg)
4205
0
                .tag("code", code)
4206
0
                .tag("instance id", instance_id_);
4207
0
        return -1;
4208
0
    }
4209
4210
246
    for (auto& [_, rs_meta] : restore_job_rs_metas) {
4211
0
        if (!rs_meta.has_resource_id()) {
4212
0
            LOG_WARNING("rowset meta does not have a resource id, impossible!")
4213
0
                    .tag("rs_meta", rs_meta.ShortDebugString())
4214
0
                    .tag("instance_id", instance_id_)
4215
0
                    .tag("tablet_id", tablet_id);
4216
0
            return -1;
4217
0
        }
4218
4219
0
        auto it = accessor_map_.find(rs_meta.resource_id());
4220
        // possible if the accessor is not initilized correctly
4221
0
        if (it == accessor_map_.end()) [[unlikely]] {
4222
0
            LOG_WARNING(
4223
0
                    "failed to find resource id when recycle tablet, skip this vault accessor "
4224
0
                    "recycle process")
4225
0
                    .tag("tablet id", tablet_id)
4226
0
                    .tag("instance_id", instance_id_)
4227
0
                    .tag("resource_id", rs_meta.resource_id())
4228
0
                    .tag("rowset meta pb", rs_meta.ShortDebugString());
4229
0
            return -1;
4230
0
        }
4231
0
        if (decrement_packed_file_ref_counts(rs_meta) != 0) {
4232
0
            LOG_WARNING("failed to update packed file info when recycling restore job rowset")
4233
0
                    .tag("instance_id", instance_id_)
4234
0
                    .tag("tablet_id", tablet_id)
4235
0
                    .tag("rowset_id", rs_meta.rowset_id_v2());
4236
0
            return -1;
4237
0
        }
4238
0
        recycle_restore_job_rowsets_number += 1;
4239
0
        recycle_restore_job_segments_number += rs_meta.num_segments();
4240
0
        recycle_restore_job_rowsets_data_size += rs_meta.data_disk_size();
4241
0
        recycle_restore_job_rowsets_index_size += rs_meta.index_disk_size();
4242
0
        resource_ids.emplace(rs_meta.resource_id());
4243
0
    }
4244
4245
246
    LOG_INFO("recycle tablet start to delete object")
4246
246
            .tag("instance id", instance_id_)
4247
246
            .tag("tablet id", tablet_id)
4248
246
            .tag("recycle tablet resource ids are",
4249
246
                 std::accumulate(resource_ids.begin(), resource_ids.end(), std::string(),
4250
246
                                 [](std::string rs_id, const auto& it) {
4251
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4252
206
                                 }));
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEEEDaSB_RKT_
Line
Count
Source
4250
206
                                 [](std::string rs_id, const auto& it) {
4251
206
                                     return rs_id.empty() ? it : rs_id + ", " + it;
4252
206
                                 }));
4253
4254
246
    SyncExecutor<std::pair<int, std::string>> concurrent_delete_executor(
4255
246
            _thread_pool_group.s3_producer_pool,
4256
246
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4257
246
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKSt4pairIiNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE
Line
Count
Source
4257
206
            [](const std::pair<int, std::string>& ret) { return ret.first != 0; });
4258
4259
    // delete all rowset data in this tablet
4260
    // ATTN: there may be data leak if not all accessor initilized successfully
4261
    //       partial data deleted if the tablet is stored cross-storage vault
4262
    //       vault id is not attached to TabletMeta...
4263
246
    for (const auto& resource_id : resource_ids) {
4264
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, resource_id, "submitted"}, 1);
4265
206
        concurrent_delete_executor.add(
4266
206
                [&, rs_id = resource_id,
4267
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4268
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4269
206
                    if (res != 0) {
4270
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4271
2
                                     << " path=" << accessor_ptr->uri()
4272
2
                                     << " task type=" << metrics_context.operation_type;
4273
2
                        return std::make_pair(-1, rs_id);
4274
2
                    }
4275
204
                    return std::make_pair(0, rs_id);
4276
206
                });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler14recycle_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clB5cxx11Ev
Line
Count
Source
4267
206
                 accessor_ptr = accessor_map_[resource_id]]() -> decltype(auto) {
4268
206
                    int res = accessor_ptr->delete_directory(tablet_path_prefix(tablet_id));
4269
206
                    if (res != 0) {
4270
2
                        LOG(WARNING) << "failed to delete rowset data of tablet " << tablet_id
4271
2
                                     << " path=" << accessor_ptr->uri()
4272
2
                                     << " task type=" << metrics_context.operation_type;
4273
2
                        return std::make_pair(-1, rs_id);
4274
2
                    }
4275
204
                    return std::make_pair(0, rs_id);
4276
206
                });
4277
206
    }
4278
4279
246
    bool finished = true;
4280
246
    std::vector<std::pair<int, std::string>> rets = concurrent_delete_executor.when_all(&finished);
4281
246
    for (auto& r : rets) {
4282
206
        if (r.first != 0) {
4283
2
            g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "error"}, 1);
4284
2
            ret = -1;
4285
2
        }
4286
206
        g_bvar_recycler_vault_recycle_task_status.put({instance_id_, r.second, "completed"}, 1);
4287
206
    }
4288
246
    ret = finished ? ret : -1;
4289
4290
246
    if (ret != 0) { // failed recycle tablet data
4291
2
        LOG_WARNING("ret!=0")
4292
2
                .tag("finished", finished)
4293
2
                .tag("ret", ret)
4294
2
                .tag("instance_id", instance_id_)
4295
2
                .tag("tablet_id", tablet_id);
4296
2
        return ret;
4297
2
    }
4298
4299
244
    tablet_metrics_context_.total_recycled_data_size +=
4300
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4301
244
    tablet_metrics_context_.total_recycled_num += 1;
4302
244
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4303
244
    segment_metrics_context_.total_recycled_data_size +=
4304
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4305
244
    metrics_context.total_recycled_data_size +=
4306
244
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4307
244
    tablet_metrics_context_.report();
4308
244
    segment_metrics_context_.report();
4309
244
    metrics_context.report();
4310
4311
244
    txn.reset();
4312
244
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4313
0
        LOG_WARNING("failed to recycle tablet ")
4314
0
                .tag("tablet id", tablet_id)
4315
0
                .tag("instance_id", instance_id_)
4316
0
                .tag("reason", "failed to create txn");
4317
0
        ret = -1;
4318
0
    }
4319
    // delete all rowset kv in this tablet
4320
244
    txn->remove(rs_key0, rs_key1);
4321
244
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4322
244
    txn->remove(restore_job_rs_key0, restore_job_rs_key1);
4323
4324
    // remove delete bitmap for MoW table
4325
244
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4326
244
    txn->remove(pending_key);
4327
244
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4328
244
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4329
244
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4330
4331
244
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4332
244
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4333
244
    txn->remove(dbm_start_key, dbm_end_key);
4334
244
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4335
244
              << " end=" << hex(dbm_end_key);
4336
4337
244
    TxnErrorCode err = txn->commit();
4338
244
    if (err != TxnErrorCode::TXN_OK) {
4339
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4340
0
        ret = -1;
4341
0
    }
4342
4343
244
    if (ret == 0) {
4344
        // All object files under tablet have been deleted
4345
244
        std::lock_guard lock(recycled_tablets_mtx_);
4346
244
        recycled_tablets_.insert(tablet_id);
4347
244
    }
4348
4349
244
    return ret;
4350
246
}
4351
4352
int InstanceRecycler::recycle_versioned_tablet(int64_t tablet_id,
4353
11
                                               RecyclerMetricsContext& metrics_context) {
4354
11
    int ret = 0;
4355
11
    auto start_time = steady_clock::now();
4356
4357
11
    TEST_SYNC_POINT_RETURN_WITH_VALUE("recycle_tablet::begin", (int)0);
4358
4359
    // collect resource ids
4360
11
    std::string rs_key0 = meta_rowset_key({instance_id_, tablet_id, 0});
4361
11
    std::string rs_key1 = meta_rowset_key({instance_id_, tablet_id + 1, 0});
4362
11
    std::string recyc_rs_key0 = recycle_rowset_key({instance_id_, tablet_id, ""});
4363
11
    std::string recyc_rs_key1 = recycle_rowset_key({instance_id_, tablet_id + 1, ""});
4364
4365
11
    int64_t recycle_rowsets_number = 0;
4366
11
    int64_t recycle_segments_number = 0;
4367
11
    int64_t recycle_rowsets_data_size = 0;
4368
11
    int64_t recycle_rowsets_index_size = 0;
4369
11
    int64_t max_rowset_version = 0;
4370
11
    int64_t min_rowset_creation_time = INT64_MAX;
4371
11
    int64_t max_rowset_creation_time = 0;
4372
11
    int64_t min_rowset_expiration_time = INT64_MAX;
4373
11
    int64_t max_rowset_expiration_time = 0;
4374
4375
11
    DORIS_CLOUD_DEFER {
4376
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4377
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4378
11
                .tag("instance_id", instance_id_)
4379
11
                .tag("tablet_id", tablet_id)
4380
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4381
11
                .tag("recycle segments number", recycle_segments_number)
4382
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4383
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4384
11
                .tag("max rowset version", max_rowset_version)
4385
11
                .tag("min rowset creation time", min_rowset_creation_time)
4386
11
                .tag("max rowset creation time", max_rowset_creation_time)
4387
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4388
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4389
11
                .tag("ret", ret);
4390
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_0clEv
Line
Count
Source
4375
11
    DORIS_CLOUD_DEFER {
4376
11
        auto cost = duration<float>(steady_clock::now() - start_time).count();
4377
11
        LOG_INFO("recycle the rowsets of dropped tablet finished, cost={}s", cost)
4378
11
                .tag("instance_id", instance_id_)
4379
11
                .tag("tablet_id", tablet_id)
4380
11
                .tag("recycle rowsets number", recycle_rowsets_number)
4381
11
                .tag("recycle segments number", recycle_segments_number)
4382
11
                .tag("all rowsets recycle data size", recycle_rowsets_data_size)
4383
11
                .tag("all rowsets recycle index size", recycle_rowsets_index_size)
4384
11
                .tag("max rowset version", max_rowset_version)
4385
11
                .tag("min rowset creation time", min_rowset_creation_time)
4386
11
                .tag("max rowset creation time", max_rowset_creation_time)
4387
11
                .tag("min rowset expiration time", min_rowset_expiration_time)
4388
11
                .tag("max rowset expiration time", max_rowset_expiration_time)
4389
11
                .tag("ret", ret);
4390
11
    };
4391
4392
11
    std::unique_ptr<Transaction> txn;
4393
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4394
0
        LOG_WARNING("failed to recycle tablet ")
4395
0
                .tag("tablet id", tablet_id)
4396
0
                .tag("instance_id", instance_id_)
4397
0
                .tag("reason", "failed to create txn");
4398
0
        ret = -1;
4399
0
    }
4400
4401
    // Read the last version of load and compact rowsets, the previous rowsets will be recycled
4402
    // by the related operation logs.
4403
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> load_rowset_metas;
4404
11
    std::vector<std::pair<RowsetMetaCloudPB, Versionstamp>> compact_rowset_metas;
4405
11
    MetaReader meta_reader(instance_id_);
4406
11
    TxnErrorCode err = meta_reader.get_load_rowset_metas(txn.get(), tablet_id, &load_rowset_metas);
4407
11
    if (err == TxnErrorCode::TXN_OK) {
4408
11
        err = meta_reader.get_compact_rowset_metas(txn.get(), tablet_id, &compact_rowset_metas);
4409
11
    }
4410
11
    if (err != TxnErrorCode::TXN_OK) {
4411
0
        LOG_WARNING("failed to get rowsets of tablet when recycle tablet")
4412
0
                .tag("tablet id", tablet_id)
4413
0
                .tag("err", err)
4414
0
                .tag("instance id", instance_id_);
4415
0
        ret = -1;
4416
0
    }
4417
4418
11
    LOG_INFO("recycle versioned tablet get {} load rowsets and {} compact rowsets",
4419
11
             load_rowset_metas.size(), compact_rowset_metas.size())
4420
11
            .tag("instance_id", instance_id_)
4421
11
            .tag("tablet_id", tablet_id);
4422
4423
11
    SyncExecutor<int> concurrent_delete_executor(
4424
11
            _thread_pool_group.s3_producer_pool,
4425
11
            fmt::format("delete tablet {} s3 rowset", tablet_id),
4426
11
            [](const int& ret) { return ret != 0; });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_1clERKi
4427
4428
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4429
60
        recycle_rowsets_number += 1;
4430
60
        recycle_segments_number += rs_meta.num_segments();
4431
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4432
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4433
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4434
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4435
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4436
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4437
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4438
60
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_2clERKNS_17RowsetMetaCloudPBE
Line
Count
Source
4428
60
    auto update_rowset_stats = [&](const RowsetMetaCloudPB& rs_meta) {
4429
60
        recycle_rowsets_number += 1;
4430
60
        recycle_segments_number += rs_meta.num_segments();
4431
60
        recycle_rowsets_data_size += rs_meta.data_disk_size();
4432
60
        recycle_rowsets_index_size += rs_meta.index_disk_size();
4433
60
        max_rowset_version = std::max(max_rowset_version, rs_meta.end_version());
4434
60
        min_rowset_creation_time = std::min(min_rowset_creation_time, rs_meta.creation_time());
4435
60
        max_rowset_creation_time = std::max(max_rowset_creation_time, rs_meta.creation_time());
4436
60
        min_rowset_expiration_time = std::min(min_rowset_expiration_time, rs_meta.txn_expiration());
4437
60
        max_rowset_expiration_time = std::max(max_rowset_expiration_time, rs_meta.txn_expiration());
4438
60
    };
4439
4440
11
    std::vector<RowsetDeleteTask> all_tasks;
4441
60
    for (const auto& [rs_meta, versionstamp] : load_rowset_metas) {
4442
60
        update_rowset_stats(rs_meta);
4443
        // Version 0-1 rowset has no resource_id and no actual data files,
4444
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4445
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4446
60
        RowsetDeleteTask task;
4447
60
        task.rowset_meta = rs_meta;
4448
60
        task.versioned_rowset_key =
4449
60
                versioned::meta_rowset_load_key({instance_id_, tablet_id, rs_meta.end_version()});
4450
60
        task.non_versioned_rowset_key =
4451
60
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4452
60
        task.versionstamp = versionstamp;
4453
60
        all_tasks.push_back(std::move(task));
4454
60
    }
4455
4456
11
    for (const auto& [rs_meta, versionstamp] : compact_rowset_metas) {
4457
0
        update_rowset_stats(rs_meta);
4458
        // Version 0-1 rowset has no resource_id and no actual data files,
4459
        // but still needs ref_count key cleanup, so we add it to all_tasks.
4460
        // It will be filtered out in Phase 2 when building rowsets_to_delete.
4461
0
        RowsetDeleteTask task;
4462
0
        task.rowset_meta = rs_meta;
4463
0
        task.versioned_rowset_key = versioned::meta_rowset_compact_key(
4464
0
                {instance_id_, tablet_id, rs_meta.end_version()});
4465
0
        task.non_versioned_rowset_key =
4466
0
                meta_rowset_key({instance_id_, tablet_id, rs_meta.end_version()});
4467
0
        task.versionstamp = versionstamp;
4468
0
        all_tasks.push_back(std::move(task));
4469
0
    }
4470
4471
11
    auto handle_recycle_rowset_kv = [&](std::string_view k, std::string_view v) {
4472
0
        RecycleRowsetPB recycle_rowset;
4473
0
        if (!recycle_rowset.ParseFromArray(v.data(), v.size())) {
4474
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4475
0
            return -1;
4476
0
        }
4477
0
        if (!recycle_rowset.has_type()) { // compatible with old version `RecycleRowsetPB`
4478
0
            if (!recycle_rowset.has_resource_id()) [[unlikely]] { // impossible
4479
                // in old version, keep this key-value pair and it needs to be checked manually
4480
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4481
0
                return -1;
4482
0
            }
4483
0
            if (recycle_rowset.resource_id().empty()) [[unlikely]] {
4484
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4485
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4486
0
                          << hex(k) << " value=" << proto_to_json(recycle_rowset);
4487
0
                return -1;
4488
0
            }
4489
            // decode rowset_id
4490
0
            auto k1 = k;
4491
0
            k1.remove_prefix(1);
4492
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4493
0
            decode_key(&k1, &out);
4494
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4495
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4496
0
            LOG_INFO("delete old-version rowset data")
4497
0
                    .tag("instance_id", instance_id_)
4498
0
                    .tag("tablet_id", tablet_id)
4499
0
                    .tag("rowset_id", rowset_id);
4500
4501
            // Old version RecycleRowsetPB lacks full rowset_meta info (num_segments, schema, etc.),
4502
            // so we must use prefix deletion directly instead of batch delete.
4503
0
            concurrent_delete_executor.add(
4504
0
                    [tablet_id, resource_id = recycle_rowset.resource_id(), rowset_id, this]() {
4505
                        // delete by prefix, the recycle rowset key will be deleted by range later.
4506
0
                        return delete_rowset_data(resource_id, tablet_id, rowset_id);
4507
0
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
Unexecuted instantiation: recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_ENKUlvE_clEv
4508
0
        } else {
4509
0
            const auto& rowset_meta = recycle_rowset.rowset_meta();
4510
            // Version 0-1 rowset has no resource_id and no actual data files,
4511
            // but still needs ref_count key cleanup, so we add it to all_tasks.
4512
            // It will be filtered out in Phase 2 when building rowsets_to_delete.
4513
0
            RowsetDeleteTask task;
4514
0
            task.rowset_meta = rowset_meta;
4515
0
            task.recycle_rowset_key = k;
4516
0
            all_tasks.push_back(std::move(task));
4517
0
        }
4518
0
        return 0;
4519
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES8_
4520
4521
11
    if (scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_recycle_rowset_kv))) {
4522
0
        LOG_WARNING("failed to recycle rowset kv of tablet")
4523
0
                .tag("tablet id", tablet_id)
4524
0
                .tag("instance_id", instance_id_)
4525
0
                .tag("reason", "failed to scan and recycle RecycleRowsetPB");
4526
0
        ret = -1;
4527
0
    }
4528
4529
    // Phase 1: Classify tasks by ref_count
4530
11
    std::vector<RowsetDeleteTask> batch_delete_tasks;
4531
60
    for (auto& task : all_tasks) {
4532
60
        int classify_ret = classify_rowset_task_by_ref_count(task, batch_delete_tasks);
4533
60
        if (classify_ret < 0) {
4534
0
            LOG_WARNING("failed to classify rowset task, fallback to old logic")
4535
0
                    .tag("instance_id", instance_id_)
4536
0
                    .tag("tablet_id", tablet_id)
4537
0
                    .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4538
0
            concurrent_delete_executor.add([this, t = std::move(task)]() mutable {
4539
0
                return recycle_rowset_meta_and_data(t);
4540
0
            });
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler24recycle_versioned_tabletElRNS0_22RecyclerMetricsContextEEN3$_4clEv
4541
0
        }
4542
60
    }
4543
4544
11
    g_bvar_recycler_batch_delete_rowset_plan_count.put(instance_id_, batch_delete_tasks.size());
4545
4546
11
    LOG_INFO("batch delete plan created")
4547
11
            .tag("instance_id", instance_id_)
4548
11
            .tag("tablet_id", tablet_id)
4549
11
            .tag("plan_count", batch_delete_tasks.size());
4550
4551
    // Phase 2: Execute batch delete using existing delete_rowset_data
4552
11
    if (!batch_delete_tasks.empty()) {
4553
10
        std::map<std::string, RowsetMetaCloudPB> rowsets_to_delete;
4554
49
        for (const auto& task : batch_delete_tasks) {
4555
            // Version 0-1 rowset has no resource_id and no actual data files, skip it
4556
49
            if (task.rowset_meta.resource_id().empty()) {
4557
10
                LOG_INFO("skip rowset with empty resource_id in batch delete")
4558
10
                        .tag("instance_id", instance_id_)
4559
10
                        .tag("tablet_id", tablet_id)
4560
10
                        .tag("rowset_id", task.rowset_meta.rowset_id_v2());
4561
10
                continue;
4562
10
            }
4563
39
            rowsets_to_delete[task.rowset_meta.rowset_id_v2()] = task.rowset_meta;
4564
39
        }
4565
4566
        // Only call delete_rowset_data if there are rowsets with actual data to delete
4567
10
        bool delete_success = true;
4568
10
        if (!rowsets_to_delete.empty()) {
4569
9
            RecyclerMetricsContext batch_metrics_context(instance_id_,
4570
9
                                                         "batch_delete_versioned_tablet");
4571
9
            int delete_ret = delete_rowset_data(
4572
9
                    rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET, batch_metrics_context);
4573
9
            if (delete_ret != 0) {
4574
0
                LOG_WARNING("batch delete execution failed")
4575
0
                        .tag("instance_id", instance_id_)
4576
0
                        .tag("tablet_id", tablet_id);
4577
0
                g_bvar_recycler_batch_delete_failures.put(instance_id_, 1);
4578
0
                ret = -1;
4579
0
                delete_success = false;
4580
0
            }
4581
9
        }
4582
4583
        // Phase 3: Only cleanup metadata if data deletion succeeded.
4584
        // If deletion failed, keep recycle_rowset_key so next round will retry.
4585
10
        if (delete_success) {
4586
10
            int cleanup_ret = cleanup_rowset_metadata(batch_delete_tasks);
4587
10
            if (cleanup_ret != 0) {
4588
0
                LOG_WARNING("batch delete cleanup failed")
4589
0
                        .tag("instance_id", instance_id_)
4590
0
                        .tag("tablet_id", tablet_id);
4591
0
                ret = -1;
4592
0
            }
4593
10
        }
4594
10
    }
4595
4596
    // Always wait for fallback tasks to complete before returning
4597
11
    bool finished = true;
4598
11
    std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
4599
11
    for (int r : rets) {
4600
0
        if (r != 0) {
4601
0
            ret = -1;
4602
0
        }
4603
0
    }
4604
4605
11
    ret = finished ? ret : -1;
4606
4607
11
    if (ret != 0) { // failed recycle tablet data
4608
0
        LOG_WARNING("recycle versioned tablet failed")
4609
0
                .tag("finished", finished)
4610
0
                .tag("ret", ret)
4611
0
                .tag("instance_id", instance_id_)
4612
0
                .tag("tablet_id", tablet_id);
4613
0
        return ret;
4614
0
    }
4615
4616
11
    tablet_metrics_context_.total_recycled_data_size +=
4617
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4618
11
    tablet_metrics_context_.total_recycled_num += 1;
4619
11
    segment_metrics_context_.total_recycled_num += recycle_segments_number;
4620
11
    segment_metrics_context_.total_recycled_data_size +=
4621
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4622
11
    metrics_context.total_recycled_data_size +=
4623
11
            recycle_rowsets_data_size + recycle_rowsets_index_size;
4624
11
    tablet_metrics_context_.report();
4625
11
    segment_metrics_context_.report();
4626
11
    metrics_context.report();
4627
4628
11
    txn.reset();
4629
11
    if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
4630
0
        LOG_WARNING("failed to recycle tablet ")
4631
0
                .tag("tablet id", tablet_id)
4632
0
                .tag("instance_id", instance_id_)
4633
0
                .tag("reason", "failed to create txn");
4634
0
        ret = -1;
4635
0
    }
4636
    // delete all rowset kv in this tablet
4637
11
    txn->remove(rs_key0, rs_key1);
4638
11
    txn->remove(recyc_rs_key0, recyc_rs_key1);
4639
4640
    // remove delete bitmap for MoW table
4641
11
    std::string pending_key = meta_pending_delete_bitmap_key({instance_id_, tablet_id});
4642
11
    txn->remove(pending_key);
4643
11
    std::string delete_bitmap_start = meta_delete_bitmap_key({instance_id_, tablet_id, "", 0, 0});
4644
11
    std::string delete_bitmap_end = meta_delete_bitmap_key({instance_id_, tablet_id + 1, "", 0, 0});
4645
11
    txn->remove(delete_bitmap_start, delete_bitmap_end);
4646
4647
11
    std::string dbm_start_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id, ""});
4648
11
    std::string dbm_end_key = versioned::meta_delete_bitmap_key({instance_id_, tablet_id + 1, ""});
4649
11
    txn->remove(dbm_start_key, dbm_end_key);
4650
11
    LOG(INFO) << "remove delete bitmap kv, tablet=" << tablet_id << ", begin=" << hex(dbm_start_key)
4651
11
              << " end=" << hex(dbm_end_key);
4652
4653
11
    std::string versioned_idx_key = versioned::tablet_index_key({instance_id_, tablet_id});
4654
11
    std::string tablet_index_val;
4655
11
    err = txn->get(versioned_idx_key, &tablet_index_val);
4656
11
    if (err != TxnErrorCode::TXN_KEY_NOT_FOUND && err != TxnErrorCode::TXN_OK) {
4657
0
        LOG_WARNING("failed to get tablet index kv")
4658
0
                .tag("instance_id", instance_id_)
4659
0
                .tag("tablet_id", tablet_id)
4660
0
                .tag("err", err);
4661
0
        ret = -1;
4662
11
    } else if (err == TxnErrorCode::TXN_OK) {
4663
        // If the tablet index kv exists, we need to delete it
4664
10
        TabletIndexPB tablet_index_pb;
4665
10
        if (!tablet_index_pb.ParseFromString(tablet_index_val)) {
4666
0
            LOG_WARNING("failed to parse tablet index pb")
4667
0
                    .tag("instance_id", instance_id_)
4668
0
                    .tag("tablet_id", tablet_id);
4669
0
            ret = -1;
4670
10
        } else {
4671
10
            std::string versioned_inverted_idx_key = versioned::tablet_inverted_index_key(
4672
10
                    {instance_id_, tablet_index_pb.db_id(), tablet_index_pb.table_id(),
4673
10
                     tablet_index_pb.index_id(), tablet_index_pb.partition_id(), tablet_id});
4674
10
            txn->remove(versioned_inverted_idx_key);
4675
10
            txn->remove(versioned_idx_key);
4676
10
        }
4677
10
    }
4678
4679
11
    err = txn->commit();
4680
11
    if (err != TxnErrorCode::TXN_OK) {
4681
0
        LOG(WARNING) << "failed to delete rowset kv of tablet " << tablet_id << ", err=" << err;
4682
0
        ret = -1;
4683
0
    }
4684
4685
11
    if (ret == 0) {
4686
        // All object files under tablet have been deleted
4687
11
        std::lock_guard lock(recycled_tablets_mtx_);
4688
11
        recycled_tablets_.insert(tablet_id);
4689
11
    }
4690
4691
11
    return ret;
4692
11
}
4693
4694
27
int InstanceRecycler::recycle_rowsets() {
4695
27
    if (should_recycle_versioned_keys()) {
4696
5
        return recycle_versioned_rowsets();
4697
5
    }
4698
4699
22
    const std::string task_name = "recycle_rowsets";
4700
22
    int64_t num_scanned = 0;
4701
22
    int64_t num_expired = 0;
4702
22
    int64_t num_prepare = 0;
4703
22
    int64_t num_compacted = 0;
4704
22
    int64_t num_empty_rowset = 0;
4705
22
    size_t total_rowset_key_size = 0;
4706
22
    size_t total_rowset_value_size = 0;
4707
22
    size_t expired_rowset_size = 0;
4708
22
    std::atomic_long num_recycled = 0;
4709
22
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
4710
4711
22
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
4712
22
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
4713
22
    std::string recyc_rs_key0;
4714
22
    std::string recyc_rs_key1;
4715
22
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
4716
22
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
4717
4718
22
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
4719
4720
22
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
4721
22
    register_recycle_task(task_name, start_time);
4722
4723
22
    DORIS_CLOUD_DEFER {
4724
22
        unregister_recycle_task(task_name);
4725
22
        int64_t cost =
4726
22
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4727
22
        metrics_context.finish_report();
4728
22
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4729
22
                .tag("instance_id", instance_id_)
4730
22
                .tag("num_scanned", num_scanned)
4731
22
                .tag("num_expired", num_expired)
4732
22
                .tag("num_recycled", num_recycled)
4733
22
                .tag("num_recycled.prepare", num_prepare)
4734
22
                .tag("num_recycled.compacted", num_compacted)
4735
22
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4736
22
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4737
22
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4738
22
                .tag("expired_rowset_meta_size", expired_rowset_size);
4739
22
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4723
7
    DORIS_CLOUD_DEFER {
4724
7
        unregister_recycle_task(task_name);
4725
7
        int64_t cost =
4726
7
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4727
7
        metrics_context.finish_report();
4728
7
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4729
7
                .tag("instance_id", instance_id_)
4730
7
                .tag("num_scanned", num_scanned)
4731
7
                .tag("num_expired", num_expired)
4732
7
                .tag("num_recycled", num_recycled)
4733
7
                .tag("num_recycled.prepare", num_prepare)
4734
7
                .tag("num_recycled.compacted", num_compacted)
4735
7
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4736
7
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4737
7
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4738
7
                .tag("expired_rowset_meta_size", expired_rowset_size);
4739
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_0clEv
Line
Count
Source
4723
15
    DORIS_CLOUD_DEFER {
4724
15
        unregister_recycle_task(task_name);
4725
15
        int64_t cost =
4726
15
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
4727
15
        metrics_context.finish_report();
4728
15
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
4729
15
                .tag("instance_id", instance_id_)
4730
15
                .tag("num_scanned", num_scanned)
4731
15
                .tag("num_expired", num_expired)
4732
15
                .tag("num_recycled", num_recycled)
4733
15
                .tag("num_recycled.prepare", num_prepare)
4734
15
                .tag("num_recycled.compacted", num_compacted)
4735
15
                .tag("num_recycled.empty_rowset", num_empty_rowset)
4736
15
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
4737
15
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
4738
15
                .tag("expired_rowset_meta_size", expired_rowset_size);
4739
15
    };
4740
4741
22
    std::vector<std::string> rowset_keys;
4742
    // rowset_id -> rowset_meta
4743
    // store rowset id and meta for statistics rs size when delete
4744
22
    std::map<std::string, doris::RowsetMetaCloudPB> rowsets;
4745
4746
    // Store keys of rowset recycled by background workers
4747
22
    std::mutex async_recycled_rowset_keys_mutex;
4748
22
    std::vector<std::string> async_recycled_rowset_keys;
4749
22
    auto worker_pool = std::make_unique<SimpleThreadPool>(
4750
22
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
4751
22
    worker_pool->start();
4752
    // TODO bacth delete
4753
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4754
4.00k
        std::string dbm_start_key =
4755
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4756
4.00k
        std::string dbm_end_key = dbm_start_key;
4757
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4758
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4759
4.00k
        if (ret != 0) {
4760
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4761
0
                         << instance_id_;
4762
0
        }
4763
4.00k
        return ret;
4764
4.00k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4753
2
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4754
2
        std::string dbm_start_key =
4755
2
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4756
2
        std::string dbm_end_key = dbm_start_key;
4757
2
        encode_int64(INT64_MAX, &dbm_end_key);
4758
2
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4759
2
        if (ret != 0) {
4760
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4761
0
                         << instance_id_;
4762
0
        }
4763
2
        return ret;
4764
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
4753
4.00k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
4754
4.00k
        std::string dbm_start_key =
4755
4.00k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
4756
4.00k
        std::string dbm_end_key = dbm_start_key;
4757
4.00k
        encode_int64(INT64_MAX, &dbm_end_key);
4758
4.00k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
4759
4.00k
        if (ret != 0) {
4760
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
4761
0
                         << instance_id_;
4762
0
        }
4763
4.00k
        return ret;
4764
4.00k
    };
4765
22
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
4766
902
                                            int64_t tablet_id, const std::string& rowset_id) {
4767
        // Try to delete rowset data in background thread
4768
902
        int ret = worker_pool->submit_with_timeout(
4769
902
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4770
810
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4771
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4772
0
                        return;
4773
0
                    }
4774
810
                    std::vector<std::string> keys;
4775
810
                    {
4776
810
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4777
810
                        async_recycled_rowset_keys.push_back(std::move(key));
4778
810
                        if (async_recycled_rowset_keys.size() > 100) {
4779
7
                            keys.swap(async_recycled_rowset_keys);
4780
7
                        }
4781
810
                    }
4782
810
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4783
810
                    if (keys.empty()) return;
4784
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
0
                                     << instance_id_;
4787
7
                    } else {
4788
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
7
                                           num_recycled, start_time);
4791
7
                    }
4792
7
                },
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4769
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4770
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4771
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4772
0
                        return;
4773
0
                    }
4774
2
                    std::vector<std::string> keys;
4775
2
                    {
4776
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4777
2
                        async_recycled_rowset_keys.push_back(std::move(key));
4778
2
                        if (async_recycled_rowset_keys.size() > 100) {
4779
0
                            keys.swap(async_recycled_rowset_keys);
4780
0
                        }
4781
2
                    }
4782
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4783
2
                    if (keys.empty()) return;
4784
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
0
                                     << instance_id_;
4787
0
                    } else {
4788
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
0
                                           num_recycled, start_time);
4791
0
                    }
4792
0
                },
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
4769
808
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4770
808
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4771
0
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4772
0
                        return;
4773
0
                    }
4774
808
                    std::vector<std::string> keys;
4775
808
                    {
4776
808
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4777
808
                        async_recycled_rowset_keys.push_back(std::move(key));
4778
808
                        if (async_recycled_rowset_keys.size() > 100) {
4779
7
                            keys.swap(async_recycled_rowset_keys);
4780
7
                        }
4781
808
                    }
4782
808
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4783
808
                    if (keys.empty()) return;
4784
7
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
0
                                     << instance_id_;
4787
7
                    } else {
4788
7
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
7
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
7
                                           num_recycled, start_time);
4791
7
                    }
4792
7
                },
4793
902
                0);
4794
902
        if (ret == 0) return 0;
4795
        // Submit task failed, delete rowset data in current thread
4796
92
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4797
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4798
0
            return -1;
4799
0
        }
4800
92
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4801
0
            return -1;
4802
0
        }
4803
92
        rowset_keys.push_back(std::move(key));
4804
92
        return 0;
4805
92
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4766
2
                                            int64_t tablet_id, const std::string& rowset_id) {
4767
        // Try to delete rowset data in background thread
4768
2
        int ret = worker_pool->submit_with_timeout(
4769
2
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4770
2
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4771
2
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4772
2
                        return;
4773
2
                    }
4774
2
                    std::vector<std::string> keys;
4775
2
                    {
4776
2
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4777
2
                        async_recycled_rowset_keys.push_back(std::move(key));
4778
2
                        if (async_recycled_rowset_keys.size() > 100) {
4779
2
                            keys.swap(async_recycled_rowset_keys);
4780
2
                        }
4781
2
                    }
4782
2
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4783
2
                    if (keys.empty()) return;
4784
2
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
2
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
2
                                     << instance_id_;
4787
2
                    } else {
4788
2
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
2
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
2
                                           num_recycled, start_time);
4791
2
                    }
4792
2
                },
4793
2
                0);
4794
2
        if (ret == 0) return 0;
4795
        // Submit task failed, delete rowset data in current thread
4796
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4797
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4798
0
            return -1;
4799
0
        }
4800
0
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4801
0
            return -1;
4802
0
        }
4803
0
        rowset_keys.push_back(std::move(key));
4804
0
        return 0;
4805
0
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
4766
900
                                            int64_t tablet_id, const std::string& rowset_id) {
4767
        // Try to delete rowset data in background thread
4768
900
        int ret = worker_pool->submit_with_timeout(
4769
900
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
4770
900
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4771
900
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4772
900
                        return;
4773
900
                    }
4774
900
                    std::vector<std::string> keys;
4775
900
                    {
4776
900
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
4777
900
                        async_recycled_rowset_keys.push_back(std::move(key));
4778
900
                        if (async_recycled_rowset_keys.size() > 100) {
4779
900
                            keys.swap(async_recycled_rowset_keys);
4780
900
                        }
4781
900
                    }
4782
900
                    delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id);
4783
900
                    if (keys.empty()) return;
4784
900
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
4785
900
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
4786
900
                                     << instance_id_;
4787
900
                    } else {
4788
900
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
4789
900
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
4790
900
                                           num_recycled, start_time);
4791
900
                    }
4792
900
                },
4793
900
                0);
4794
900
        if (ret == 0) return 0;
4795
        // Submit task failed, delete rowset data in current thread
4796
92
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
4797
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
4798
0
            return -1;
4799
0
        }
4800
92
        if (delete_versioned_delete_bitmap_kvs(tablet_id, rowset_id) != 0) {
4801
0
            return -1;
4802
0
        }
4803
92
        rowset_keys.push_back(std::move(key));
4804
92
        return 0;
4805
92
    };
4806
4807
22
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
4808
4809
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4810
7.75k
        ++num_scanned;
4811
7.75k
        total_rowset_key_size += k.size();
4812
7.75k
        total_rowset_value_size += v.size();
4813
7.75k
        RecycleRowsetPB rowset;
4814
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4815
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4816
0
            return -1;
4817
0
        }
4818
4819
7.75k
        int64_t current_time = ::time(nullptr);
4820
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4821
4822
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4823
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4824
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4825
7.75k
        if (current_time < expiration) { // not expired
4826
0
            return 0;
4827
0
        }
4828
7.75k
        ++num_expired;
4829
7.75k
        expired_rowset_size += v.size();
4830
4831
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4832
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4833
                // in old version, keep this key-value pair and it needs to be checked manually
4834
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4835
0
                return -1;
4836
0
            }
4837
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4838
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4839
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4840
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4841
0
                rowset_keys.emplace_back(k);
4842
0
                return -1;
4843
0
            }
4844
            // decode rowset_id
4845
250
            auto k1 = k;
4846
250
            k1.remove_prefix(1);
4847
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4848
250
            decode_key(&k1, &out);
4849
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4850
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4851
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4852
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4853
250
                      << " task_type=" << metrics_context.operation_type;
4854
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4855
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4856
0
                return -1;
4857
0
            }
4858
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4859
250
            metrics_context.total_recycled_num++;
4860
250
            segment_metrics_context_.total_recycled_data_size +=
4861
250
                    rowset.rowset_meta().total_disk_size();
4862
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4863
250
            return 0;
4864
250
        }
4865
4866
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
4867
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
4868
7.50k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4869
7.50k
            if (mark_ret == -1) {
4870
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4871
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4872
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4873
0
                             << "]";
4874
0
                return -1;
4875
7.50k
            } else if (mark_ret == 1) {
4876
3.75k
                LOG(INFO)
4877
3.75k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4878
3.75k
                           "next turn, instance_id="
4879
3.75k
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4880
3.75k
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4881
3.75k
                return 0;
4882
3.75k
            }
4883
7.50k
        }
4884
4885
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4886
3.75k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4887
3.75k
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4888
3.75k
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4889
4890
3.75k
            if (rowset_meta->end_version() != 1) {
4891
3.75k
                int ret = abort_txn_or_job_for_recycle(rowset);
4892
4893
3.75k
                if (ret != 0) {
4894
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4895
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4896
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4897
0
                                 << rowset_meta->end_version() << "]";
4898
0
                    return ret;
4899
0
                }
4900
3.75k
            }
4901
3.75k
        }
4902
4903
        // TODO(plat1ko): check rowset not referenced
4904
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4905
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4906
0
                LOG_INFO("recycle rowset that has empty resource id");
4907
0
            } else {
4908
                // other situations, keep this key-value pair and it needs to be checked manually
4909
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4910
0
                return -1;
4911
0
            }
4912
0
        }
4913
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4914
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4915
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4916
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4917
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4918
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4919
3.75k
                  << " rowset_meta_size=" << v.size()
4920
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4921
3.75k
                  << " task_type=" << metrics_context.operation_type;
4922
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4923
            // unable to calculate file path, can only be deleted by rowset id prefix
4924
652
            num_prepare += 1;
4925
652
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4926
652
                                             rowset_meta->tablet_id(),
4927
652
                                             rowset_meta->rowset_id_v2()) != 0) {
4928
0
                return -1;
4929
0
            }
4930
3.10k
        } else {
4931
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4932
3.10k
            rowset_keys.emplace_back(k);
4933
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4934
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4935
3.10k
                ++num_empty_rowset;
4936
3.10k
            }
4937
3.10k
        }
4938
3.75k
        return 0;
4939
3.75k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4809
7
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4810
7
        ++num_scanned;
4811
7
        total_rowset_key_size += k.size();
4812
7
        total_rowset_value_size += v.size();
4813
7
        RecycleRowsetPB rowset;
4814
7
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4815
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4816
0
            return -1;
4817
0
        }
4818
4819
7
        int64_t current_time = ::time(nullptr);
4820
7
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4821
4822
7
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4823
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4824
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4825
7
        if (current_time < expiration) { // not expired
4826
0
            return 0;
4827
0
        }
4828
7
        ++num_expired;
4829
7
        expired_rowset_size += v.size();
4830
4831
7
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4832
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4833
                // in old version, keep this key-value pair and it needs to be checked manually
4834
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4835
0
                return -1;
4836
0
            }
4837
0
            if (rowset.resource_id().empty()) [[unlikely]] {
4838
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4839
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4840
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4841
0
                rowset_keys.emplace_back(k);
4842
0
                return -1;
4843
0
            }
4844
            // decode rowset_id
4845
0
            auto k1 = k;
4846
0
            k1.remove_prefix(1);
4847
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4848
0
            decode_key(&k1, &out);
4849
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4850
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4851
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4852
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4853
0
                      << " task_type=" << metrics_context.operation_type;
4854
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4855
0
                                             rowset.tablet_id(), rowset_id) != 0) {
4856
0
                return -1;
4857
0
            }
4858
0
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4859
0
            metrics_context.total_recycled_num++;
4860
0
            segment_metrics_context_.total_recycled_data_size +=
4861
0
                    rowset.rowset_meta().total_disk_size();
4862
0
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4863
0
            return 0;
4864
0
        }
4865
4866
7
        auto* rowset_meta = rowset.mutable_rowset_meta();
4867
7
        if (config::enable_mark_delete_rowset_before_recycle) {
4868
7
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4869
7
            if (mark_ret == -1) {
4870
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4871
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4872
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4873
0
                             << "]";
4874
0
                return -1;
4875
7
            } else if (mark_ret == 1) {
4876
5
                LOG(INFO)
4877
5
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4878
5
                           "next turn, instance_id="
4879
5
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4880
5
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4881
5
                return 0;
4882
5
            }
4883
7
        }
4884
4885
2
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4886
2
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4887
2
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4888
2
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4889
4890
2
            if (rowset_meta->end_version() != 1) {
4891
2
                int ret = abort_txn_or_job_for_recycle(rowset);
4892
4893
2
                if (ret != 0) {
4894
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4895
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4896
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4897
0
                                 << rowset_meta->end_version() << "]";
4898
0
                    return ret;
4899
0
                }
4900
2
            }
4901
2
        }
4902
4903
        // TODO(plat1ko): check rowset not referenced
4904
2
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4905
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4906
0
                LOG_INFO("recycle rowset that has empty resource id");
4907
0
            } else {
4908
                // other situations, keep this key-value pair and it needs to be checked manually
4909
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4910
0
                return -1;
4911
0
            }
4912
0
        }
4913
2
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4914
2
                  << " tablet_id=" << rowset_meta->tablet_id()
4915
2
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4916
2
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4917
2
                  << "] txn_id=" << rowset_meta->txn_id()
4918
2
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4919
2
                  << " rowset_meta_size=" << v.size()
4920
2
                  << " creation_time=" << rowset_meta->creation_time()
4921
2
                  << " task_type=" << metrics_context.operation_type;
4922
2
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4923
            // unable to calculate file path, can only be deleted by rowset id prefix
4924
2
            num_prepare += 1;
4925
2
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4926
2
                                             rowset_meta->tablet_id(),
4927
2
                                             rowset_meta->rowset_id_v2()) != 0) {
4928
0
                return -1;
4929
0
            }
4930
2
        } else {
4931
0
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4932
0
            rowset_keys.emplace_back(k);
4933
0
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4934
0
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4935
0
                ++num_empty_rowset;
4936
0
            }
4937
0
        }
4938
2
        return 0;
4939
2
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
4809
7.75k
    auto handle_rowset_kv = [&](std::string_view k, std::string_view v) -> int {
4810
7.75k
        ++num_scanned;
4811
7.75k
        total_rowset_key_size += k.size();
4812
7.75k
        total_rowset_value_size += v.size();
4813
7.75k
        RecycleRowsetPB rowset;
4814
7.75k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
4815
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
4816
0
            return -1;
4817
0
        }
4818
4819
7.75k
        int64_t current_time = ::time(nullptr);
4820
7.75k
        int64_t expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
4821
4822
7.75k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
4823
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
4824
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
4825
7.75k
        if (current_time < expiration) { // not expired
4826
0
            return 0;
4827
0
        }
4828
7.75k
        ++num_expired;
4829
7.75k
        expired_rowset_size += v.size();
4830
4831
7.75k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
4832
250
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
4833
                // in old version, keep this key-value pair and it needs to be checked manually
4834
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4835
0
                return -1;
4836
0
            }
4837
250
            if (rowset.resource_id().empty()) [[unlikely]] {
4838
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
4839
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
4840
0
                          << hex(k) << " value=" << proto_to_json(rowset);
4841
0
                rowset_keys.emplace_back(k);
4842
0
                return -1;
4843
0
            }
4844
            // decode rowset_id
4845
250
            auto k1 = k;
4846
250
            k1.remove_prefix(1);
4847
250
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
4848
250
            decode_key(&k1, &out);
4849
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
4850
250
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
4851
250
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4852
250
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id
4853
250
                      << " task_type=" << metrics_context.operation_type;
4854
250
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
4855
250
                                             rowset.tablet_id(), rowset_id) != 0) {
4856
0
                return -1;
4857
0
            }
4858
250
            metrics_context.total_recycled_data_size += rowset.rowset_meta().total_disk_size();
4859
250
            metrics_context.total_recycled_num++;
4860
250
            segment_metrics_context_.total_recycled_data_size +=
4861
250
                    rowset.rowset_meta().total_disk_size();
4862
250
            segment_metrics_context_.total_recycled_num += rowset.rowset_meta().num_segments();
4863
250
            return 0;
4864
250
        }
4865
4866
7.50k
        auto* rowset_meta = rowset.mutable_rowset_meta();
4867
7.50k
        if (config::enable_mark_delete_rowset_before_recycle) {
4868
7.50k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
4869
7.50k
            if (mark_ret == -1) {
4870
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
4871
0
                             << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4872
0
                             << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4873
0
                             << "]";
4874
0
                return -1;
4875
7.50k
            } else if (mark_ret == 1) {
4876
3.75k
                LOG(INFO)
4877
3.75k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
4878
3.75k
                           "next turn, instance_id="
4879
3.75k
                        << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4880
3.75k
                        << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4881
3.75k
                return 0;
4882
3.75k
            }
4883
7.50k
        }
4884
4885
3.75k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
4886
3.75k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
4887
3.75k
                      << instance_id_ << " tablet_id=" << rowset_meta->tablet_id() << " version=["
4888
3.75k
                      << rowset_meta->start_version() << '-' << rowset_meta->end_version() << "]";
4889
4890
3.75k
            if (rowset_meta->end_version() != 1) {
4891
3.75k
                int ret = abort_txn_or_job_for_recycle(rowset);
4892
4893
3.75k
                if (ret != 0) {
4894
0
                    LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
4895
0
                                 << instance_id_ << " tablet_id=" << rowset.tablet_id()
4896
0
                                 << " version=[" << rowset_meta->start_version() << '-'
4897
0
                                 << rowset_meta->end_version() << "]";
4898
0
                    return ret;
4899
0
                }
4900
3.75k
            }
4901
3.75k
        }
4902
4903
        // TODO(plat1ko): check rowset not referenced
4904
3.75k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
4905
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
4906
0
                LOG_INFO("recycle rowset that has empty resource id");
4907
0
            } else {
4908
                // other situations, keep this key-value pair and it needs to be checked manually
4909
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
4910
0
                return -1;
4911
0
            }
4912
0
        }
4913
3.75k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
4914
3.75k
                  << " tablet_id=" << rowset_meta->tablet_id()
4915
3.75k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
4916
3.75k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
4917
3.75k
                  << "] txn_id=" << rowset_meta->txn_id()
4918
3.75k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
4919
3.75k
                  << " rowset_meta_size=" << v.size()
4920
3.75k
                  << " creation_time=" << rowset_meta->creation_time()
4921
3.75k
                  << " task_type=" << metrics_context.operation_type;
4922
3.75k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
4923
            // unable to calculate file path, can only be deleted by rowset id prefix
4924
650
            num_prepare += 1;
4925
650
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
4926
650
                                             rowset_meta->tablet_id(),
4927
650
                                             rowset_meta->rowset_id_v2()) != 0) {
4928
0
                return -1;
4929
0
            }
4930
3.10k
        } else {
4931
3.10k
            num_compacted += rowset.type() == RecycleRowsetPB::COMPACT;
4932
3.10k
            rowset_keys.emplace_back(k);
4933
3.10k
            rowsets.emplace(rowset_meta->rowset_id_v2(), std::move(*rowset_meta));
4934
3.10k
            if (rowset_meta->num_segments() <= 0) { // Skip empty rowset
4935
3.10k
                ++num_empty_rowset;
4936
3.10k
            }
4937
3.10k
        }
4938
3.75k
        return 0;
4939
3.75k
    };
4940
4941
49
    auto loop_done = [&]() -> int {
4942
49
        std::vector<std::string> rowset_keys_to_delete;
4943
        // rowset_id -> rowset_meta
4944
        // store rowset id and meta for statistics rs size when delete
4945
49
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4946
49
        rowset_keys_to_delete.swap(rowset_keys);
4947
49
        rowsets_to_delete.swap(rowsets);
4948
49
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4949
49
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4950
49
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4951
49
                                   metrics_context) != 0) {
4952
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4953
0
                return;
4954
0
            }
4955
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4956
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4957
0
                    return;
4958
0
                }
4959
3.10k
            }
4960
49
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4961
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4962
0
                return;
4963
0
            }
4964
49
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4965
49
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4949
7
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4950
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4951
7
                                   metrics_context) != 0) {
4952
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4953
0
                return;
4954
0
            }
4955
7
            for (const auto& [_, rs] : rowsets_to_delete) {
4956
0
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4957
0
                    return;
4958
0
                }
4959
0
            }
4960
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4961
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4962
0
                return;
4963
0
            }
4964
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4965
7
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEvENKUlvE_clEv
Line
Count
Source
4949
42
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4950
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4951
42
                                   metrics_context) != 0) {
4952
0
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4953
0
                return;
4954
0
            }
4955
3.10k
            for (const auto& [_, rs] : rowsets_to_delete) {
4956
3.10k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4957
0
                    return;
4958
0
                }
4959
3.10k
            }
4960
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4961
0
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4962
0
                return;
4963
0
            }
4964
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4965
42
        });
4966
49
        return 0;
4967
49
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4941
7
    auto loop_done = [&]() -> int {
4942
7
        std::vector<std::string> rowset_keys_to_delete;
4943
        // rowset_id -> rowset_meta
4944
        // store rowset id and meta for statistics rs size when delete
4945
7
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4946
7
        rowset_keys_to_delete.swap(rowset_keys);
4947
7
        rowsets_to_delete.swap(rowsets);
4948
7
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4949
7
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4950
7
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4951
7
                                   metrics_context) != 0) {
4952
7
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4953
7
                return;
4954
7
            }
4955
7
            for (const auto& [_, rs] : rowsets_to_delete) {
4956
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4957
7
                    return;
4958
7
                }
4959
7
            }
4960
7
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4961
7
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4962
7
                return;
4963
7
            }
4964
7
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4965
7
        });
4966
7
        return 0;
4967
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler15recycle_rowsetsEvENK3$_2clEv
Line
Count
Source
4941
42
    auto loop_done = [&]() -> int {
4942
42
        std::vector<std::string> rowset_keys_to_delete;
4943
        // rowset_id -> rowset_meta
4944
        // store rowset id and meta for statistics rs size when delete
4945
42
        std::map<std::string, doris::RowsetMetaCloudPB> rowsets_to_delete;
4946
42
        rowset_keys_to_delete.swap(rowset_keys);
4947
42
        rowsets_to_delete.swap(rowsets);
4948
42
        worker_pool->submit([&, rowset_keys_to_delete = std::move(rowset_keys_to_delete),
4949
42
                             rowsets_to_delete = std::move(rowsets_to_delete)]() {
4950
42
            if (delete_rowset_data(rowsets_to_delete, RowsetRecyclingState::FORMAL_ROWSET,
4951
42
                                   metrics_context) != 0) {
4952
42
                LOG(WARNING) << "failed to delete rowset data, instance_id=" << instance_id_;
4953
42
                return;
4954
42
            }
4955
42
            for (const auto& [_, rs] : rowsets_to_delete) {
4956
42
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
4957
42
                    return;
4958
42
                }
4959
42
            }
4960
42
            if (txn_remove(txn_kv_.get(), rowset_keys_to_delete) != 0) {
4961
42
                LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4962
42
                return;
4963
42
            }
4964
42
            num_recycled.fetch_add(rowset_keys_to_delete.size(), std::memory_order_relaxed);
4965
42
        });
4966
42
        return 0;
4967
42
    };
4968
4969
22
    if (config::enable_recycler_stats_metrics) {
4970
0
        scan_and_statistics_rowsets();
4971
0
    }
4972
    // recycle_func and loop_done for scan and recycle
4973
22
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
4974
22
                               std::move(loop_done));
4975
4976
22
    worker_pool->stop();
4977
4978
22
    if (!async_recycled_rowset_keys.empty()) {
4979
5
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
4980
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
4981
0
            return -1;
4982
5
        } else {
4983
5
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
4984
5
        }
4985
5
    }
4986
4987
    // Report final metrics after all concurrent tasks completed
4988
22
    segment_metrics_context_.report();
4989
22
    metrics_context.report();
4990
4991
22
    return ret;
4992
22
}
4993
4994
13
int InstanceRecycler::recycle_restore_jobs() {
4995
13
    const std::string task_name = "recycle_restore_jobs";
4996
13
    int64_t num_scanned = 0;
4997
13
    int64_t num_expired = 0;
4998
13
    int64_t num_recycled = 0;
4999
13
    int64_t num_aborted = 0;
5000
5001
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5002
5003
13
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
5004
13
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
5005
13
    std::string restore_job_key0;
5006
13
    std::string restore_job_key1;
5007
13
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
5008
13
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
5009
5010
13
    LOG_INFO("begin to recycle restore jobs").tag("instance_id", instance_id_);
5011
5012
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5013
13
    register_recycle_task(task_name, start_time);
5014
5015
13
    DORIS_CLOUD_DEFER {
5016
13
        unregister_recycle_task(task_name);
5017
13
        int64_t cost =
5018
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5019
13
        metrics_context.finish_report();
5020
5021
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5022
13
                .tag("instance_id", instance_id_)
5023
13
                .tag("num_scanned", num_scanned)
5024
13
                .tag("num_expired", num_expired)
5025
13
                .tag("num_recycled", num_recycled)
5026
13
                .tag("num_aborted", num_aborted);
5027
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_0clEv
Line
Count
Source
5015
13
    DORIS_CLOUD_DEFER {
5016
13
        unregister_recycle_task(task_name);
5017
13
        int64_t cost =
5018
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5019
13
        metrics_context.finish_report();
5020
5021
13
        LOG_INFO("recycle restore jobs finished, cost={}s", cost)
5022
13
                .tag("instance_id", instance_id_)
5023
13
                .tag("num_scanned", num_scanned)
5024
13
                .tag("num_expired", num_expired)
5025
13
                .tag("num_recycled", num_recycled)
5026
13
                .tag("num_aborted", num_aborted);
5027
13
    };
5028
5029
13
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5030
5031
13
    std::vector<std::string_view> restore_job_keys;
5032
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5033
41
        ++num_scanned;
5034
41
        RestoreJobCloudPB restore_job_pb;
5035
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5036
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5037
0
            return -1;
5038
0
        }
5039
41
        int64_t expiration =
5040
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5041
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5042
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5043
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5044
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5045
0
                   << " state=" << restore_job_pb.state();
5046
41
        int64_t current_time = ::time(nullptr);
5047
41
        if (current_time < expiration) { // not expired
5048
0
            return 0;
5049
0
        }
5050
41
        ++num_expired;
5051
5052
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5053
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5054
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5055
5056
41
        std::unique_ptr<Transaction> txn;
5057
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5058
41
        if (err != TxnErrorCode::TXN_OK) {
5059
0
            LOG_WARNING("failed to recycle restore job")
5060
0
                    .tag("err", err)
5061
0
                    .tag("tablet id", tablet_id)
5062
0
                    .tag("instance_id", instance_id_)
5063
0
                    .tag("reason", "failed to create txn");
5064
0
            return -1;
5065
0
        }
5066
5067
41
        std::string val;
5068
41
        err = txn->get(k, &val);
5069
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5070
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5071
0
            return 0;
5072
0
        }
5073
41
        if (err != TxnErrorCode::TXN_OK) {
5074
0
            LOG_WARNING("failed to get kv");
5075
0
            return -1;
5076
0
        }
5077
41
        restore_job_pb.Clear();
5078
41
        if (!restore_job_pb.ParseFromString(val)) {
5079
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5080
0
            return -1;
5081
0
        }
5082
5083
        // PREPARED or COMMITTED, change state to DROPPED and return
5084
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5085
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5086
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5087
0
            restore_job_pb.set_need_recycle_data(true);
5088
0
            txn->put(k, restore_job_pb.SerializeAsString());
5089
0
            err = txn->commit();
5090
0
            if (err != TxnErrorCode::TXN_OK) {
5091
0
                LOG_WARNING("failed to commit txn: {}", err);
5092
0
                return -1;
5093
0
            }
5094
0
            num_aborted++;
5095
0
            return 0;
5096
0
        }
5097
5098
        // Change state to RECYCLING
5099
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5100
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5101
21
            txn->put(k, restore_job_pb.SerializeAsString());
5102
21
            err = txn->commit();
5103
21
            if (err != TxnErrorCode::TXN_OK) {
5104
0
                LOG_WARNING("failed to commit txn: {}", err);
5105
0
                return -1;
5106
0
            }
5107
21
            return 0;
5108
21
        }
5109
5110
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5111
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5112
5113
        // Recycle all data associated with the restore job.
5114
        // This includes rowsets, segments, and related resources.
5115
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5116
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5117
0
            LOG_WARNING("failed to recycle tablet")
5118
0
                    .tag("tablet_id", tablet_id)
5119
0
                    .tag("instance_id", instance_id_);
5120
0
            return -1;
5121
0
        }
5122
5123
        // delete all restore job rowset kv
5124
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5125
5126
20
        err = txn->commit();
5127
20
        if (err != TxnErrorCode::TXN_OK) {
5128
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5129
0
                    .tag("err", err)
5130
0
                    .tag("tablet id", tablet_id)
5131
0
                    .tag("instance_id", instance_id_)
5132
0
                    .tag("reason", "failed to commit txn");
5133
0
            return -1;
5134
0
        }
5135
5136
20
        metrics_context.total_recycled_num = ++num_recycled;
5137
20
        metrics_context.report();
5138
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5139
20
        restore_job_keys.push_back(k);
5140
5141
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5142
20
                  << " tablet_id=" << tablet_id;
5143
20
        return 0;
5144
20
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5032
41
    auto recycle_func = [&, this](std::string_view k, std::string_view v) -> int {
5033
41
        ++num_scanned;
5034
41
        RestoreJobCloudPB restore_job_pb;
5035
41
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
5036
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
5037
0
            return -1;
5038
0
        }
5039
41
        int64_t expiration =
5040
41
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
5041
41
        VLOG_DEBUG << "recycle restore job scan, key=" << hex(k) << " num_scanned=" << num_scanned
5042
0
                   << " num_expired=" << num_expired << " expiration time=" << expiration
5043
0
                   << " job expiration=" << restore_job_pb.expired_at_s()
5044
0
                   << " ctime=" << restore_job_pb.ctime_s() << " mtime=" << restore_job_pb.mtime_s()
5045
0
                   << " state=" << restore_job_pb.state();
5046
41
        int64_t current_time = ::time(nullptr);
5047
41
        if (current_time < expiration) { // not expired
5048
0
            return 0;
5049
0
        }
5050
41
        ++num_expired;
5051
5052
41
        int64_t tablet_id = restore_job_pb.tablet_id();
5053
41
        LOG(INFO) << "begin to recycle expired restore jobs, instance_id=" << instance_id_
5054
41
                  << " restore_job_pb=" << restore_job_pb.DebugString();
5055
5056
41
        std::unique_ptr<Transaction> txn;
5057
41
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5058
41
        if (err != TxnErrorCode::TXN_OK) {
5059
0
            LOG_WARNING("failed to recycle restore job")
5060
0
                    .tag("err", err)
5061
0
                    .tag("tablet id", tablet_id)
5062
0
                    .tag("instance_id", instance_id_)
5063
0
                    .tag("reason", "failed to create txn");
5064
0
            return -1;
5065
0
        }
5066
5067
41
        std::string val;
5068
41
        err = txn->get(k, &val);
5069
41
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { // maybe recycled, skip it
5070
0
            LOG_INFO("restore job {} has been recycled", tablet_id);
5071
0
            return 0;
5072
0
        }
5073
41
        if (err != TxnErrorCode::TXN_OK) {
5074
0
            LOG_WARNING("failed to get kv");
5075
0
            return -1;
5076
0
        }
5077
41
        restore_job_pb.Clear();
5078
41
        if (!restore_job_pb.ParseFromString(val)) {
5079
0
            LOG_WARNING("malformed recycle restore job value").tag("key", hex(k));
5080
0
            return -1;
5081
0
        }
5082
5083
        // PREPARED or COMMITTED, change state to DROPPED and return
5084
41
        if (restore_job_pb.state() == RestoreJobCloudPB::PREPARED ||
5085
41
            restore_job_pb.state() == RestoreJobCloudPB::COMMITTED) {
5086
0
            restore_job_pb.set_state(RestoreJobCloudPB::DROPPED);
5087
0
            restore_job_pb.set_need_recycle_data(true);
5088
0
            txn->put(k, restore_job_pb.SerializeAsString());
5089
0
            err = txn->commit();
5090
0
            if (err != TxnErrorCode::TXN_OK) {
5091
0
                LOG_WARNING("failed to commit txn: {}", err);
5092
0
                return -1;
5093
0
            }
5094
0
            num_aborted++;
5095
0
            return 0;
5096
0
        }
5097
5098
        // Change state to RECYCLING
5099
41
        if (restore_job_pb.state() != RestoreJobCloudPB::RECYCLING) {
5100
21
            restore_job_pb.set_state(RestoreJobCloudPB::RECYCLING);
5101
21
            txn->put(k, restore_job_pb.SerializeAsString());
5102
21
            err = txn->commit();
5103
21
            if (err != TxnErrorCode::TXN_OK) {
5104
0
                LOG_WARNING("failed to commit txn: {}", err);
5105
0
                return -1;
5106
0
            }
5107
21
            return 0;
5108
21
        }
5109
5110
20
        std::string restore_job_rs_key0 = job_restore_rowset_key({instance_id_, tablet_id, 0});
5111
20
        std::string restore_job_rs_key1 = job_restore_rowset_key({instance_id_, tablet_id + 1, 0});
5112
5113
        // Recycle all data associated with the restore job.
5114
        // This includes rowsets, segments, and related resources.
5115
20
        bool need_recycle_data = restore_job_pb.need_recycle_data();
5116
20
        if (need_recycle_data && recycle_tablet(tablet_id, metrics_context) != 0) {
5117
0
            LOG_WARNING("failed to recycle tablet")
5118
0
                    .tag("tablet_id", tablet_id)
5119
0
                    .tag("instance_id", instance_id_);
5120
0
            return -1;
5121
0
        }
5122
5123
        // delete all restore job rowset kv
5124
20
        txn->remove(restore_job_rs_key0, restore_job_rs_key1);
5125
5126
20
        err = txn->commit();
5127
20
        if (err != TxnErrorCode::TXN_OK) {
5128
0
            LOG_WARNING("failed to recycle tablet restore job rowset kv")
5129
0
                    .tag("err", err)
5130
0
                    .tag("tablet id", tablet_id)
5131
0
                    .tag("instance_id", instance_id_)
5132
0
                    .tag("reason", "failed to commit txn");
5133
0
            return -1;
5134
0
        }
5135
5136
20
        metrics_context.total_recycled_num = ++num_recycled;
5137
20
        metrics_context.report();
5138
20
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
5139
20
        restore_job_keys.push_back(k);
5140
5141
20
        LOG(INFO) << "finish to recycle expired restore job, key=" << hex(k)
5142
20
                  << " tablet_id=" << tablet_id;
5143
20
        return 0;
5144
20
    };
5145
5146
13
    auto loop_done = [&restore_job_keys, this]() -> int {
5147
3
        if (restore_job_keys.empty()) return 0;
5148
1
        DORIS_CLOUD_DEFER {
5149
1
            restore_job_keys.clear();
5150
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5148
1
        DORIS_CLOUD_DEFER {
5149
1
            restore_job_keys.clear();
5150
1
        };
5151
5152
1
        std::unique_ptr<Transaction> txn;
5153
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5154
1
        if (err != TxnErrorCode::TXN_OK) {
5155
0
            LOG_WARNING("failed to recycle restore job")
5156
0
                    .tag("err", err)
5157
0
                    .tag("instance_id", instance_id_)
5158
0
                    .tag("reason", "failed to create txn");
5159
0
            return -1;
5160
0
        }
5161
20
        for (auto& k : restore_job_keys) {
5162
20
            txn->remove(k);
5163
20
        }
5164
1
        err = txn->commit();
5165
1
        if (err != TxnErrorCode::TXN_OK) {
5166
0
            LOG_WARNING("failed to recycle restore job")
5167
0
                    .tag("err", err)
5168
0
                    .tag("instance_id", instance_id_)
5169
0
                    .tag("reason", "failed to commit txn");
5170
0
            return -1;
5171
0
        }
5172
1
        return 0;
5173
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler20recycle_restore_jobsEvENK3$_1clEv
Line
Count
Source
5146
3
    auto loop_done = [&restore_job_keys, this]() -> int {
5147
3
        if (restore_job_keys.empty()) return 0;
5148
1
        DORIS_CLOUD_DEFER {
5149
1
            restore_job_keys.clear();
5150
1
        };
5151
5152
1
        std::unique_ptr<Transaction> txn;
5153
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5154
1
        if (err != TxnErrorCode::TXN_OK) {
5155
0
            LOG_WARNING("failed to recycle restore job")
5156
0
                    .tag("err", err)
5157
0
                    .tag("instance_id", instance_id_)
5158
0
                    .tag("reason", "failed to create txn");
5159
0
            return -1;
5160
0
        }
5161
20
        for (auto& k : restore_job_keys) {
5162
20
            txn->remove(k);
5163
20
        }
5164
1
        err = txn->commit();
5165
1
        if (err != TxnErrorCode::TXN_OK) {
5166
0
            LOG_WARNING("failed to recycle restore job")
5167
0
                    .tag("err", err)
5168
0
                    .tag("instance_id", instance_id_)
5169
0
                    .tag("reason", "failed to commit txn");
5170
0
            return -1;
5171
0
        }
5172
1
        return 0;
5173
1
    };
5174
5175
13
    if (config::enable_recycler_stats_metrics) {
5176
0
        scan_and_statistics_restore_jobs();
5177
0
    }
5178
5179
13
    return scan_and_recycle(restore_job_key0, restore_job_key1, std::move(recycle_func),
5180
13
                            std::move(loop_done));
5181
13
}
5182
5183
10
int InstanceRecycler::recycle_versioned_rowsets() {
5184
10
    const std::string task_name = "recycle_rowsets";
5185
10
    int64_t num_scanned = 0;
5186
10
    int64_t num_expired = 0;
5187
10
    int64_t num_prepare = 0;
5188
10
    int64_t num_compacted = 0;
5189
10
    int64_t num_empty_rowset = 0;
5190
10
    size_t total_rowset_key_size = 0;
5191
10
    size_t total_rowset_value_size = 0;
5192
10
    size_t expired_rowset_size = 0;
5193
10
    std::atomic_long num_recycled = 0;
5194
10
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5195
5196
10
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
5197
10
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
5198
10
    std::string recyc_rs_key0;
5199
10
    std::string recyc_rs_key1;
5200
10
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
5201
10
    recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
5202
5203
10
    LOG_WARNING("begin to recycle rowsets").tag("instance_id", instance_id_);
5204
5205
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5206
10
    register_recycle_task(task_name, start_time);
5207
5208
10
    DORIS_CLOUD_DEFER {
5209
10
        unregister_recycle_task(task_name);
5210
10
        int64_t cost =
5211
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5212
10
        metrics_context.finish_report();
5213
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5214
10
                .tag("instance_id", instance_id_)
5215
10
                .tag("num_scanned", num_scanned)
5216
10
                .tag("num_expired", num_expired)
5217
10
                .tag("num_recycled", num_recycled)
5218
10
                .tag("num_recycled.prepare", num_prepare)
5219
10
                .tag("num_recycled.compacted", num_compacted)
5220
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5221
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5222
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5223
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5224
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_0clEv
Line
Count
Source
5208
10
    DORIS_CLOUD_DEFER {
5209
10
        unregister_recycle_task(task_name);
5210
10
        int64_t cost =
5211
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5212
10
        metrics_context.finish_report();
5213
10
        LOG_WARNING("recycle rowsets finished, cost={}s", cost)
5214
10
                .tag("instance_id", instance_id_)
5215
10
                .tag("num_scanned", num_scanned)
5216
10
                .tag("num_expired", num_expired)
5217
10
                .tag("num_recycled", num_recycled)
5218
10
                .tag("num_recycled.prepare", num_prepare)
5219
10
                .tag("num_recycled.compacted", num_compacted)
5220
10
                .tag("num_recycled.empty_rowset", num_empty_rowset)
5221
10
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5222
10
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5223
10
                .tag("expired_rowset_meta_size", expired_rowset_size);
5224
10
    };
5225
5226
10
    std::vector<std::string> orphan_rowset_keys;
5227
5228
    // Store keys of rowset recycled by background workers
5229
10
    std::mutex async_recycled_rowset_keys_mutex;
5230
10
    std::vector<std::string> async_recycled_rowset_keys;
5231
10
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5232
10
            config::instance_recycler_worker_pool_size, "recycle_rowsets");
5233
10
    worker_pool->start();
5234
10
    auto delete_rowset_data_by_prefix = [&](std::string key, const std::string& resource_id,
5235
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5236
        // Try to delete rowset data in background thread
5237
400
        int ret = worker_pool->submit_with_timeout(
5238
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5239
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5240
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5241
400
                        return;
5242
400
                    }
5243
                    // The async recycled rowsets are staled format or has not been used,
5244
                    // so we don't need to check the rowset ref count key.
5245
0
                    std::vector<std::string> keys;
5246
0
                    {
5247
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5248
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5249
0
                        if (async_recycled_rowset_keys.size() > 100) {
5250
0
                            keys.swap(async_recycled_rowset_keys);
5251
0
                        }
5252
0
                    }
5253
0
                    if (keys.empty()) return;
5254
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5255
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5256
0
                                     << instance_id_;
5257
0
                    } else {
5258
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5259
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5260
0
                                           num_recycled, start_time);
5261
0
                    }
5262
0
                },
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_ENUlvE_clEv
Line
Count
Source
5238
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5239
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5240
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5241
400
                        return;
5242
400
                    }
5243
                    // The async recycled rowsets are staled format or has not been used,
5244
                    // so we don't need to check the rowset ref count key.
5245
0
                    std::vector<std::string> keys;
5246
0
                    {
5247
0
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5248
0
                        async_recycled_rowset_keys.push_back(std::move(key));
5249
0
                        if (async_recycled_rowset_keys.size() > 100) {
5250
0
                            keys.swap(async_recycled_rowset_keys);
5251
0
                        }
5252
0
                    }
5253
0
                    if (keys.empty()) return;
5254
0
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5255
0
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5256
0
                                     << instance_id_;
5257
0
                    } else {
5258
0
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5259
0
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5260
0
                                           num_recycled, start_time);
5261
0
                    }
5262
0
                },
5263
400
                0);
5264
400
        if (ret == 0) return 0;
5265
        // Submit task failed, delete rowset data in current thread
5266
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5267
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5268
0
            return -1;
5269
0
        }
5270
0
        orphan_rowset_keys.push_back(std::move(key));
5271
0
        return 0;
5272
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_3clENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKS8_lSA_
Line
Count
Source
5235
400
                                            int64_t tablet_id, const std::string& rowset_id) {
5236
        // Try to delete rowset data in background thread
5237
400
        int ret = worker_pool->submit_with_timeout(
5238
400
                [&, resource_id, tablet_id, rowset_id, key]() mutable {
5239
400
                    if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5240
400
                        LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5241
400
                        return;
5242
400
                    }
5243
                    // The async recycled rowsets are staled format or has not been used,
5244
                    // so we don't need to check the rowset ref count key.
5245
400
                    std::vector<std::string> keys;
5246
400
                    {
5247
400
                        std::lock_guard lock(async_recycled_rowset_keys_mutex);
5248
400
                        async_recycled_rowset_keys.push_back(std::move(key));
5249
400
                        if (async_recycled_rowset_keys.size() > 100) {
5250
400
                            keys.swap(async_recycled_rowset_keys);
5251
400
                        }
5252
400
                    }
5253
400
                    if (keys.empty()) return;
5254
400
                    if (txn_remove(txn_kv_.get(), keys) != 0) {
5255
400
                        LOG(WARNING) << "failed to delete recycle rowset kv, instance_id="
5256
400
                                     << instance_id_;
5257
400
                    } else {
5258
400
                        num_recycled.fetch_add(keys.size(), std::memory_order_relaxed);
5259
400
                        check_recycle_task(instance_id_, "recycle_rowsets", num_scanned,
5260
400
                                           num_recycled, start_time);
5261
400
                    }
5262
400
                },
5263
400
                0);
5264
400
        if (ret == 0) return 0;
5265
        // Submit task failed, delete rowset data in current thread
5266
0
        if (delete_rowset_data(resource_id, tablet_id, rowset_id) != 0) {
5267
0
            LOG(WARNING) << "failed to delete rowset data, key=" << hex(key);
5268
0
            return -1;
5269
0
        }
5270
0
        orphan_rowset_keys.push_back(std::move(key));
5271
0
        return 0;
5272
0
    };
5273
5274
10
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5275
5276
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5277
2.01k
        ++num_scanned;
5278
2.01k
        total_rowset_key_size += k.size();
5279
2.01k
        total_rowset_value_size += v.size();
5280
2.01k
        RecycleRowsetPB rowset;
5281
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5282
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5283
0
            return -1;
5284
0
        }
5285
5286
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5287
5288
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5289
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5290
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5291
2.01k
        int64_t current_time = ::time(nullptr);
5292
2.01k
        if (current_time < final_expiration) { // not expired
5293
0
            return 0;
5294
0
        }
5295
2.01k
        ++num_expired;
5296
2.01k
        expired_rowset_size += v.size();
5297
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5298
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5299
                // in old version, keep this key-value pair and it needs to be checked manually
5300
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5301
0
                return -1;
5302
0
            }
5303
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5304
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5305
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5306
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5307
0
                orphan_rowset_keys.emplace_back(k);
5308
0
                return -1;
5309
0
            }
5310
            // decode rowset_id
5311
0
            auto k1 = k;
5312
0
            k1.remove_prefix(1);
5313
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5314
0
            decode_key(&k1, &out);
5315
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5316
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5317
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5318
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5319
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5320
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5321
0
                return -1;
5322
0
            }
5323
0
            return 0;
5324
0
        }
5325
        // TODO(plat1ko): check rowset not referenced
5326
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5327
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5328
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5329
0
                LOG_INFO("recycle rowset that has empty resource id");
5330
0
            } else {
5331
                // other situations, keep this key-value pair and it needs to be checked manually
5332
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5333
0
                return -1;
5334
0
            }
5335
0
        }
5336
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5337
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5338
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5339
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5340
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5341
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5342
2.01k
                  << " rowset_meta_size=" << v.size()
5343
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5344
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5345
            // unable to calculate file path, can only be deleted by rowset id prefix
5346
400
            num_prepare += 1;
5347
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5348
400
                                             rowset_meta->tablet_id(),
5349
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5350
0
                return -1;
5351
0
            }
5352
1.61k
        } else {
5353
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5354
1.61k
            worker_pool->submit(
5355
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5356
                        // The load & compact rowset keys are recycled during recycling operation logs.
5357
1.61k
                        RowsetDeleteTask task;
5358
1.61k
                        task.rowset_meta = rowset_meta;
5359
1.61k
                        task.recycle_rowset_key = k;
5360
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5361
1.60k
                            return;
5362
1.60k
                        }
5363
13
                        num_compacted += is_compacted;
5364
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5365
13
                        if (rowset_meta.num_segments() == 0) {
5366
0
                            ++num_empty_rowset;
5367
0
                        }
5368
13
                    });
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_ENKUlvE_clEv
Line
Count
Source
5355
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5356
                        // The load & compact rowset keys are recycled during recycling operation logs.
5357
1.61k
                        RowsetDeleteTask task;
5358
1.61k
                        task.rowset_meta = rowset_meta;
5359
1.61k
                        task.recycle_rowset_key = k;
5360
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5361
1.60k
                            return;
5362
1.60k
                        }
5363
13
                        num_compacted += is_compacted;
5364
13
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5365
13
                        if (rowset_meta.num_segments() == 0) {
5366
0
                            ++num_empty_rowset;
5367
0
                        }
5368
13
                    });
5369
1.61k
        }
5370
2.01k
        return 0;
5371
2.01k
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5276
2.01k
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
5277
2.01k
        ++num_scanned;
5278
2.01k
        total_rowset_key_size += k.size();
5279
2.01k
        total_rowset_value_size += v.size();
5280
2.01k
        RecycleRowsetPB rowset;
5281
2.01k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5282
0
            LOG_WARNING("malformed recycle rowset").tag("key", hex(k));
5283
0
            return -1;
5284
0
        }
5285
5286
2.01k
        int final_expiration = calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5287
5288
2.01k
        VLOG_DEBUG << "recycle rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5289
0
                   << " num_expired=" << num_expired << " expiration=" << final_expiration
5290
0
                   << " RecycleRowsetPB=" << rowset.ShortDebugString();
5291
2.01k
        int64_t current_time = ::time(nullptr);
5292
2.01k
        if (current_time < final_expiration) { // not expired
5293
0
            return 0;
5294
0
        }
5295
2.01k
        ++num_expired;
5296
2.01k
        expired_rowset_size += v.size();
5297
2.01k
        if (!rowset.has_type()) {                         // old version `RecycleRowsetPB`
5298
0
            if (!rowset.has_resource_id()) [[unlikely]] { // impossible
5299
                // in old version, keep this key-value pair and it needs to be checked manually
5300
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5301
0
                return -1;
5302
0
            }
5303
0
            if (rowset.resource_id().empty()) [[unlikely]] {
5304
                // old version `RecycleRowsetPB` may has empty resource_id, just remove the kv.
5305
0
                LOG(INFO) << "delete the recycle rowset kv that has empty resource_id, key="
5306
0
                          << hex(k) << " value=" << proto_to_json(rowset);
5307
0
                orphan_rowset_keys.emplace_back(k);
5308
0
                return -1;
5309
0
            }
5310
            // decode rowset_id
5311
0
            auto k1 = k;
5312
0
            k1.remove_prefix(1);
5313
0
            std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5314
0
            decode_key(&k1, &out);
5315
            // 0x01 "recycle" ${instance_id} "rowset" ${tablet_id} ${rowset_id} -> RecycleRowsetPB
5316
0
            const auto& rowset_id = std::get<std::string>(std::get<0>(out[4]));
5317
0
            LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5318
0
                      << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset_id;
5319
0
            if (delete_rowset_data_by_prefix(std::string(k), rowset.resource_id(),
5320
0
                                             rowset.tablet_id(), rowset_id) != 0) {
5321
0
                return -1;
5322
0
            }
5323
0
            return 0;
5324
0
        }
5325
        // TODO(plat1ko): check rowset not referenced
5326
2.01k
        auto rowset_meta = rowset.mutable_rowset_meta();
5327
2.01k
        if (!rowset_meta->has_resource_id()) [[unlikely]] { // impossible
5328
0
            if (rowset.type() != RecycleRowsetPB::PREPARE && rowset_meta->num_segments() == 0) {
5329
0
                LOG_INFO("recycle rowset that has empty resource id");
5330
0
            } else {
5331
                // other situations, keep this key-value pair and it needs to be checked manually
5332
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", hex(k));
5333
0
                return -1;
5334
0
            }
5335
0
        }
5336
2.01k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5337
2.01k
                  << " tablet_id=" << rowset_meta->tablet_id()
5338
2.01k
                  << " rowset_id=" << rowset_meta->rowset_id_v2() << " version=["
5339
2.01k
                  << rowset_meta->start_version() << '-' << rowset_meta->end_version()
5340
2.01k
                  << "] txn_id=" << rowset_meta->txn_id()
5341
2.01k
                  << " type=" << RecycleRowsetPB_Type_Name(rowset.type())
5342
2.01k
                  << " rowset_meta_size=" << v.size()
5343
2.01k
                  << " creation_time=" << rowset_meta->creation_time();
5344
2.01k
        if (rowset.type() == RecycleRowsetPB::PREPARE) {
5345
            // unable to calculate file path, can only be deleted by rowset id prefix
5346
400
            num_prepare += 1;
5347
400
            if (delete_rowset_data_by_prefix(std::string(k), rowset_meta->resource_id(),
5348
400
                                             rowset_meta->tablet_id(),
5349
400
                                             rowset_meta->rowset_id_v2()) != 0) {
5350
0
                return -1;
5351
0
            }
5352
1.61k
        } else {
5353
1.61k
            bool is_compacted = rowset.type() == RecycleRowsetPB::COMPACT;
5354
1.61k
            worker_pool->submit(
5355
1.61k
                    [&, is_compacted, k = std::string(k), rowset_meta = std::move(*rowset_meta)]() {
5356
                        // The load & compact rowset keys are recycled during recycling operation logs.
5357
1.61k
                        RowsetDeleteTask task;
5358
1.61k
                        task.rowset_meta = rowset_meta;
5359
1.61k
                        task.recycle_rowset_key = k;
5360
1.61k
                        if (recycle_rowset_meta_and_data(task) != 0) {
5361
1.61k
                            return;
5362
1.61k
                        }
5363
1.61k
                        num_compacted += is_compacted;
5364
1.61k
                        num_recycled.fetch_add(1, std::memory_order_relaxed);
5365
1.61k
                        if (rowset_meta.num_segments() == 0) {
5366
1.61k
                            ++num_empty_rowset;
5367
1.61k
                        }
5368
1.61k
                    });
5369
1.61k
        }
5370
2.01k
        return 0;
5371
2.01k
    };
5372
5373
10
    if (config::enable_recycler_stats_metrics) {
5374
0
        scan_and_statistics_rowsets();
5375
0
    }
5376
5377
10
    auto loop_done = [&]() -> int {
5378
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5379
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5380
0
        }
5381
6
        orphan_rowset_keys.clear();
5382
6
        return 0;
5383
6
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_versioned_rowsetsEvENK3$_2clEv
Line
Count
Source
5377
6
    auto loop_done = [&]() -> int {
5378
6
        if (txn_remove(txn_kv_.get(), orphan_rowset_keys)) {
5379
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5380
0
        }
5381
6
        orphan_rowset_keys.clear();
5382
6
        return 0;
5383
6
    };
5384
5385
    // recycle_func and loop_done for scan and recycle
5386
10
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv),
5387
10
                               std::move(loop_done));
5388
5389
10
    worker_pool->stop();
5390
5391
10
    if (!async_recycled_rowset_keys.empty()) {
5392
0
        if (txn_remove(txn_kv_.get(), async_recycled_rowset_keys) != 0) {
5393
0
            LOG(WARNING) << "failed to delete recycle rowset kv, instance_id=" << instance_id_;
5394
0
            return -1;
5395
0
        } else {
5396
0
            num_recycled.fetch_add(async_recycled_rowset_keys.size(), std::memory_order_relaxed);
5397
0
        }
5398
0
    }
5399
5400
    // Report final metrics after all concurrent tasks completed
5401
10
    segment_metrics_context_.report();
5402
10
    metrics_context.report();
5403
5404
10
    return ret;
5405
10
}
5406
5407
1.61k
int InstanceRecycler::recycle_rowset_meta_and_data(const RowsetDeleteTask& task) {
5408
1.61k
    constexpr int MAX_RETRY = 10;
5409
1.61k
    const RowsetMetaCloudPB& rowset_meta = task.rowset_meta;
5410
1.61k
    int64_t tablet_id = rowset_meta.tablet_id();
5411
1.61k
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
5412
1.61k
    std::string_view reference_instance_id = instance_id_;
5413
1.61k
    if (rowset_meta.has_reference_instance_id()) {
5414
8
        reference_instance_id = rowset_meta.reference_instance_id();
5415
8
    }
5416
5417
1.61k
    AnnotateTag tablet_id_tag("tablet_id", tablet_id);
5418
1.61k
    AnnotateTag rowset_id_tag("rowset_id", rowset_id);
5419
1.61k
    AnnotateTag rowset_key_tag("recycle_rowset_key", hex(task.recycle_rowset_key));
5420
1.61k
    AnnotateTag instance_id_tag("instance_id", instance_id_);
5421
1.61k
    AnnotateTag ref_instance_id_tag("ref_instance_id", reference_instance_id);
5422
1.61k
    for (int i = 0; i < MAX_RETRY; ++i) {
5423
1.61k
        std::unique_ptr<Transaction> txn;
5424
1.61k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5425
1.61k
        if (err != TxnErrorCode::TXN_OK) {
5426
0
            LOG_WARNING("failed to create txn").tag("err", err);
5427
0
            return -1;
5428
0
        }
5429
5430
1.61k
        std::string rowset_ref_count_key =
5431
1.61k
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
5432
1.61k
        int64_t ref_count = 0;
5433
1.61k
        {
5434
1.61k
            std::string value;
5435
1.61k
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
5436
1.61k
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
5437
                // This is the old version rowset, we could recycle it directly.
5438
1.60k
                ref_count = 1;
5439
1.60k
            } else if (err != TxnErrorCode::TXN_OK) {
5440
0
                LOG_WARNING("failed to get rowset ref count key").tag("err", err);
5441
0
                return -1;
5442
10
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
5443
0
                LOG_WARNING("failed to decode rowset data ref count").tag("value", hex(value));
5444
0
                return -1;
5445
0
            }
5446
1.61k
        }
5447
5448
1.61k
        if (ref_count == 1) {
5449
            // It would not be added since it is recycling.
5450
1.61k
            if (delete_rowset_data(rowset_meta) != 0) {
5451
1.60k
                LOG_WARNING("failed to delete rowset data");
5452
1.60k
                return -1;
5453
1.60k
            }
5454
5455
            // Reset the transaction to avoid timeout.
5456
10
            err = txn_kv_->create_txn(&txn);
5457
10
            if (err != TxnErrorCode::TXN_OK) {
5458
0
                LOG_WARNING("failed to create txn").tag("err", err);
5459
0
                return -1;
5460
0
            }
5461
10
            txn->remove(rowset_ref_count_key);
5462
10
            LOG_INFO("delete rowset data ref count key")
5463
10
                    .tag("txn_id", rowset_meta.txn_id())
5464
10
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5465
5466
10
            std::string dbm_start_key =
5467
10
                    meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
5468
10
            std::string dbm_end_key = meta_delete_bitmap_key(
5469
10
                    {reference_instance_id, tablet_id, rowset_id,
5470
10
                     std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
5471
10
            txn->remove(dbm_start_key, dbm_end_key);
5472
10
            LOG_INFO("remove delete bitmap kv")
5473
10
                    .tag("begin", hex(dbm_start_key))
5474
10
                    .tag("end", hex(dbm_end_key));
5475
5476
10
            std::string versioned_dbm_start_key = versioned::meta_delete_bitmap_key(
5477
10
                    {reference_instance_id, tablet_id, rowset_id});
5478
10
            std::string versioned_dbm_end_key = versioned_dbm_start_key;
5479
10
            encode_int64(INT64_MAX, &versioned_dbm_end_key);
5480
10
            txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
5481
10
            LOG_INFO("remove versioned delete bitmap kv")
5482
10
                    .tag("begin", hex(versioned_dbm_start_key))
5483
10
                    .tag("end", hex(versioned_dbm_end_key));
5484
10
        } else {
5485
            // Decrease the rowset ref count.
5486
            //
5487
            // The read conflict range will protect the rowset ref count key, if any conflict happens,
5488
            // we will retry and check whether the rowset ref count is 1 and the data need to be deleted.
5489
2
            txn->atomic_add(rowset_ref_count_key, -1);
5490
2
            LOG_INFO("decrease rowset data ref count")
5491
2
                    .tag("txn_id", rowset_meta.txn_id())
5492
2
                    .tag("ref_count", ref_count - 1)
5493
2
                    .tag("ref_count_key", hex(rowset_ref_count_key));
5494
2
        }
5495
5496
12
        if (!task.versioned_rowset_key.empty()) {
5497
0
            versioned::document_remove<RowsetMetaCloudPB>(txn.get(), task.versioned_rowset_key,
5498
0
                                                          task.versionstamp);
5499
0
            LOG_INFO("remove versioned meta rowset key").tag("key", hex(task.versioned_rowset_key));
5500
0
        }
5501
5502
12
        if (!task.non_versioned_rowset_key.empty()) {
5503
0
            txn->remove(task.non_versioned_rowset_key);
5504
0
            LOG_INFO("remove non versioned rowset key")
5505
0
                    .tag("key", hex(task.non_versioned_rowset_key));
5506
0
        }
5507
5508
        // empty when recycle ref rowsets for deleted instance
5509
13
        if (!task.recycle_rowset_key.empty()) {
5510
13
            txn->remove(task.recycle_rowset_key);
5511
13
            LOG_INFO("remove recycle rowset key").tag("key", hex(task.recycle_rowset_key));
5512
13
        }
5513
5514
12
        err = txn->commit();
5515
12
        if (err == TxnErrorCode::TXN_CONFLICT) { // unlikely
5516
            // The rowset ref count key has been changed, we need to retry.
5517
0
            VLOG_DEBUG << "decrease rowset ref count but txn conflict, retry"
5518
0
                       << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
5519
0
                       << ", ref_count=" << ref_count << ", retry=" << i;
5520
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5521
0
            continue;
5522
12
        } else if (err != TxnErrorCode::TXN_OK) {
5523
0
            LOG_WARNING("failed to recycle rowset meta and data").tag("err", err);
5524
0
            return -1;
5525
0
        }
5526
12
        LOG_INFO("recycle rowset meta and data success");
5527
12
        return 0;
5528
12
    }
5529
1
    LOG_WARNING("failed to recycle rowset meta and data after retry")
5530
1
            .tag("tablet_id", tablet_id)
5531
1
            .tag("rowset_id", rowset_id)
5532
1
            .tag("retry", MAX_RETRY);
5533
1
    return -1;
5534
1.61k
}
5535
5536
39
int InstanceRecycler::recycle_tmp_rowsets() {
5537
39
    const std::string task_name = "recycle_tmp_rowsets";
5538
39
    int64_t num_scanned = 0;
5539
39
    int64_t num_expired = 0;
5540
39
    std::atomic_long num_recycled = 0;
5541
39
    size_t expired_rowset_size = 0;
5542
39
    size_t total_rowset_key_size = 0;
5543
39
    size_t total_rowset_value_size = 0;
5544
39
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5545
5546
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
5547
39
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
5548
39
    std::string tmp_rs_key0;
5549
39
    std::string tmp_rs_key1;
5550
39
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
5551
39
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
5552
5553
39
    LOG_WARNING("begin to recycle tmp rowsets").tag("instance_id", instance_id_);
5554
5555
39
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5556
39
    register_recycle_task(task_name, start_time);
5557
5558
39
    DORIS_CLOUD_DEFER {
5559
39
        unregister_recycle_task(task_name);
5560
39
        int64_t cost =
5561
39
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5562
39
        metrics_context.finish_report();
5563
39
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5564
39
                .tag("instance_id", instance_id_)
5565
39
                .tag("num_scanned", num_scanned)
5566
39
                .tag("num_expired", num_expired)
5567
39
                .tag("num_recycled", num_recycled)
5568
39
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5569
39
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5570
39
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5571
39
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5558
12
    DORIS_CLOUD_DEFER {
5559
12
        unregister_recycle_task(task_name);
5560
12
        int64_t cost =
5561
12
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5562
12
        metrics_context.finish_report();
5563
12
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5564
12
                .tag("instance_id", instance_id_)
5565
12
                .tag("num_scanned", num_scanned)
5566
12
                .tag("num_expired", num_expired)
5567
12
                .tag("num_recycled", num_recycled)
5568
12
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5569
12
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5570
12
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5571
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_0clEv
Line
Count
Source
5558
27
    DORIS_CLOUD_DEFER {
5559
27
        unregister_recycle_task(task_name);
5560
27
        int64_t cost =
5561
27
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5562
27
        metrics_context.finish_report();
5563
27
        LOG_WARNING("recycle tmp rowsets finished, cost={}s", cost)
5564
27
                .tag("instance_id", instance_id_)
5565
27
                .tag("num_scanned", num_scanned)
5566
27
                .tag("num_expired", num_expired)
5567
27
                .tag("num_recycled", num_recycled)
5568
27
                .tag("total_rowset_meta_key_size_scanned", total_rowset_key_size)
5569
27
                .tag("total_rowset_meta_value_size_scanned", total_rowset_value_size)
5570
27
                .tag("expired_rowset_meta_size_recycled", expired_rowset_size);
5571
27
    };
5572
5573
    // Elements in `tmp_rowset_keys` has the same lifetime as `it`
5574
5575
39
    std::vector<std::string> tmp_rowset_keys;
5576
39
    std::vector<std::string> tmp_rowset_ref_count_keys;
5577
5578
    // rowset_id -> rowset_meta
5579
    // store tmp_rowset id and meta for statistics rs size when delete
5580
39
    std::map<std::string, doris::RowsetMetaCloudPB> tmp_rowsets;
5581
39
    auto worker_pool = std::make_unique<SimpleThreadPool>(
5582
39
            config::instance_recycler_worker_pool_size, "recycle_tmp_rowsets");
5583
39
    worker_pool->start();
5584
5585
39
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5586
5587
39
    auto handle_rowset_kv = [&num_scanned, &num_expired, &tmp_rowset_keys, &tmp_rowsets,
5588
39
                             &expired_rowset_size, &total_rowset_key_size, &total_rowset_value_size,
5589
39
                             &earlest_ts, &tmp_rowset_ref_count_keys, this,
5590
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5591
106k
        ++num_scanned;
5592
106k
        total_rowset_key_size += k.size();
5593
106k
        total_rowset_value_size += v.size();
5594
106k
        doris::RowsetMetaCloudPB rowset;
5595
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5596
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5597
0
            return -1;
5598
0
        }
5599
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5600
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5601
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5602
0
                   << " txn_expiration=" << rowset.txn_expiration()
5603
0
                   << " rowset_creation_time=" << rowset.creation_time();
5604
106k
        int64_t current_time = ::time(nullptr);
5605
106k
        if (current_time < expiration) { // not expired
5606
0
            return 0;
5607
0
        }
5608
5609
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5610
106k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5611
106k
            if (mark_ret == -1) {
5612
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5613
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5614
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5615
0
                return -1;
5616
106k
            } else if (mark_ret == 1) {
5617
52.0k
                LOG(INFO)
5618
52.0k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5619
52.0k
                           "next turn, instance_id="
5620
52.0k
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5621
52.0k
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5622
52.0k
                return 0;
5623
52.0k
            }
5624
106k
        }
5625
5626
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5627
54.0k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5628
54.0k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5629
54.0k
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5630
5631
54.0k
            int ret = abort_txn_or_job_for_recycle(rowset);
5632
54.0k
            if (ret != 0) {
5633
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5634
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5635
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5636
0
                return ret;
5637
0
            }
5638
54.0k
        }
5639
5640
54.0k
        ++num_expired;
5641
54.0k
        expired_rowset_size += v.size();
5642
54.0k
        if (!rowset.has_resource_id()) {
5643
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5644
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5645
0
                return -1;
5646
0
            }
5647
            // might be a delete pred rowset
5648
0
            tmp_rowset_keys.emplace_back(k);
5649
0
            return 0;
5650
0
        }
5651
        // TODO(plat1ko): check rowset not referenced
5652
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5653
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5654
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5655
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5656
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5657
54.0k
                  << " num_expired=" << num_expired
5658
54.0k
                  << " task_type=" << metrics_context.operation_type;
5659
5660
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5661
        // Remove the rowset ref count key directly since it has not been used.
5662
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5663
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5664
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5665
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5666
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5667
5668
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5669
54.0k
        return 0;
5670
54.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5590
16
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5591
16
        ++num_scanned;
5592
16
        total_rowset_key_size += k.size();
5593
16
        total_rowset_value_size += v.size();
5594
16
        doris::RowsetMetaCloudPB rowset;
5595
16
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5596
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5597
0
            return -1;
5598
0
        }
5599
16
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5600
16
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5601
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5602
0
                   << " txn_expiration=" << rowset.txn_expiration()
5603
0
                   << " rowset_creation_time=" << rowset.creation_time();
5604
16
        int64_t current_time = ::time(nullptr);
5605
16
        if (current_time < expiration) { // not expired
5606
0
            return 0;
5607
0
        }
5608
5609
16
        if (config::enable_mark_delete_rowset_before_recycle) {
5610
16
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5611
16
            if (mark_ret == -1) {
5612
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5613
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5614
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5615
0
                return -1;
5616
16
            } else if (mark_ret == 1) {
5617
9
                LOG(INFO)
5618
9
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5619
9
                           "next turn, instance_id="
5620
9
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5621
9
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5622
9
                return 0;
5623
9
            }
5624
16
        }
5625
5626
7
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5627
7
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5628
7
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5629
7
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5630
5631
7
            int ret = abort_txn_or_job_for_recycle(rowset);
5632
7
            if (ret != 0) {
5633
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5634
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5635
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5636
0
                return ret;
5637
0
            }
5638
7
        }
5639
5640
7
        ++num_expired;
5641
7
        expired_rowset_size += v.size();
5642
7
        if (!rowset.has_resource_id()) {
5643
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5644
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5645
0
                return -1;
5646
0
            }
5647
            // might be a delete pred rowset
5648
0
            tmp_rowset_keys.emplace_back(k);
5649
0
            return 0;
5650
0
        }
5651
        // TODO(plat1ko): check rowset not referenced
5652
7
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5653
7
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5654
7
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5655
7
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5656
7
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5657
7
                  << " num_expired=" << num_expired
5658
7
                  << " task_type=" << metrics_context.operation_type;
5659
5660
7
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5661
        // Remove the rowset ref count key directly since it has not been used.
5662
7
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5663
7
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5664
7
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5665
7
                  << "key=" << hex(rowset_ref_count_key);
5666
7
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5667
5668
7
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5669
7
        return 0;
5670
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5590
106k
                             &metrics_context](std::string_view k, std::string_view v) -> int {
5591
106k
        ++num_scanned;
5592
106k
        total_rowset_key_size += k.size();
5593
106k
        total_rowset_value_size += v.size();
5594
106k
        doris::RowsetMetaCloudPB rowset;
5595
106k
        if (!rowset.ParseFromArray(v.data(), v.size())) {
5596
0
            LOG_WARNING("malformed rowset meta").tag("key", hex(k));
5597
0
            return -1;
5598
0
        }
5599
106k
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
5600
106k
        VLOG_DEBUG << "recycle tmp rowset scan, key=" << hex(k) << " num_scanned=" << num_scanned
5601
0
                   << " num_expired=" << num_expired << " expiration=" << expiration
5602
0
                   << " txn_expiration=" << rowset.txn_expiration()
5603
0
                   << " rowset_creation_time=" << rowset.creation_time();
5604
106k
        int64_t current_time = ::time(nullptr);
5605
106k
        if (current_time < expiration) { // not expired
5606
0
            return 0;
5607
0
        }
5608
5609
106k
        if (config::enable_mark_delete_rowset_before_recycle) {
5610
106k
            int mark_ret = mark_rowset_as_recycled(txn_kv_.get(), instance_id_, k, rowset);
5611
106k
            if (mark_ret == -1) {
5612
0
                LOG(WARNING) << "failed to mark rowset as recycled, instance_id=" << instance_id_
5613
0
                             << " tablet_id=" << rowset.tablet_id() << " version=["
5614
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5615
0
                return -1;
5616
106k
            } else if (mark_ret == 1) {
5617
52.0k
                LOG(INFO)
5618
52.0k
                        << "rowset already marked as recycled, recycler will delete data and kv at "
5619
52.0k
                           "next turn, instance_id="
5620
52.0k
                        << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5621
52.0k
                        << rowset.start_version() << '-' << rowset.end_version() << "]";
5622
52.0k
                return 0;
5623
52.0k
            }
5624
106k
        }
5625
5626
54.0k
        if (config::enable_abort_txn_and_job_for_delete_rowset_before_recycle) {
5627
54.0k
            LOG(INFO) << "begin to abort txn or job for related rowset, instance_id="
5628
54.0k
                      << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5629
54.0k
                      << rowset.start_version() << '-' << rowset.end_version() << "]";
5630
5631
54.0k
            int ret = abort_txn_or_job_for_recycle(rowset);
5632
54.0k
            if (ret != 0) {
5633
0
                LOG(WARNING) << "failed to abort txn or job for related rowset, instance_id="
5634
0
                             << instance_id_ << " tablet_id=" << rowset.tablet_id() << " version=["
5635
0
                             << rowset.start_version() << '-' << rowset.end_version() << "]";
5636
0
                return ret;
5637
0
            }
5638
54.0k
        }
5639
5640
54.0k
        ++num_expired;
5641
54.0k
        expired_rowset_size += v.size();
5642
54.0k
        if (!rowset.has_resource_id()) {
5643
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
5644
0
                LOG_WARNING("rowset meta has empty resource id").tag("key", k);
5645
0
                return -1;
5646
0
            }
5647
            // might be a delete pred rowset
5648
0
            tmp_rowset_keys.emplace_back(k);
5649
0
            return 0;
5650
0
        }
5651
        // TODO(plat1ko): check rowset not referenced
5652
54.0k
        LOG(INFO) << "delete rowset data, instance_id=" << instance_id_
5653
54.0k
                  << " tablet_id=" << rowset.tablet_id() << " rowset_id=" << rowset.rowset_id_v2()
5654
54.0k
                  << " version=[" << rowset.start_version() << '-' << rowset.end_version()
5655
54.0k
                  << "] txn_id=" << rowset.txn_id() << " rowset_meta_size=" << v.size()
5656
54.0k
                  << " creation_time=" << rowset.creation_time() << " num_scanned=" << num_scanned
5657
54.0k
                  << " num_expired=" << num_expired
5658
54.0k
                  << " task_type=" << metrics_context.operation_type;
5659
5660
54.0k
        tmp_rowset_keys.emplace_back(k.data(), k.size());
5661
        // Remove the rowset ref count key directly since it has not been used.
5662
54.0k
        std::string rowset_ref_count_key = versioned::data_rowset_ref_count_key(
5663
54.0k
                {instance_id_, rowset.tablet_id(), rowset.rowset_id_v2()});
5664
54.0k
        LOG(INFO) << "delete rowset ref count key, instance_id=" << instance_id_
5665
54.0k
                  << "key=" << hex(rowset_ref_count_key);
5666
54.0k
        tmp_rowset_ref_count_keys.push_back(rowset_ref_count_key);
5667
5668
54.0k
        tmp_rowsets.emplace(rowset.rowset_id_v2(), std::move(rowset));
5669
54.0k
        return 0;
5670
54.0k
    };
5671
5672
    // TODO bacth delete
5673
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5674
51.0k
        std::string dbm_start_key =
5675
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5676
51.0k
        std::string dbm_end_key = dbm_start_key;
5677
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5678
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5679
51.0k
        if (ret != 0) {
5680
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5681
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5682
0
                         << ", rowset_id=" << rowset_id;
5683
0
        }
5684
51.0k
        return ret;
5685
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5673
7
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5674
7
        std::string dbm_start_key =
5675
7
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5676
7
        std::string dbm_end_key = dbm_start_key;
5677
7
        encode_int64(INT64_MAX, &dbm_end_key);
5678
7
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5679
7
        if (ret != 0) {
5680
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5681
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5682
0
                         << ", rowset_id=" << rowset_id;
5683
0
        }
5684
7
        return ret;
5685
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_3clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5673
51.0k
    auto delete_versioned_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5674
51.0k
        std::string dbm_start_key =
5675
51.0k
                versioned::meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id});
5676
51.0k
        std::string dbm_end_key = dbm_start_key;
5677
51.0k
        encode_int64(INT64_MAX, &dbm_end_key);
5678
51.0k
        auto ret = txn_remove(txn_kv_.get(), dbm_start_key, dbm_end_key);
5679
51.0k
        if (ret != 0) {
5680
0
            LOG(WARNING) << "failed to delete versioned delete bitmap kv, instance_id="
5681
0
                         << instance_id_ << ", tablet_id=" << tablet_id
5682
0
                         << ", rowset_id=" << rowset_id;
5683
0
        }
5684
51.0k
        return ret;
5685
51.0k
    };
5686
5687
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5688
51.0k
        auto delete_bitmap_start =
5689
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5690
51.0k
        auto delete_bitmap_end =
5691
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5692
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5693
51.0k
        if (ret != 0) {
5694
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5695
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5696
0
        }
5697
51.0k
        return ret;
5698
51.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5687
7
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5688
7
        auto delete_bitmap_start =
5689
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5690
7
        auto delete_bitmap_end =
5691
7
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5692
7
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5693
7
        if (ret != 0) {
5694
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5695
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5696
0
        }
5697
7
        return ret;
5698
7
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_4clElRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
5687
51.0k
    auto delete_delete_bitmap_kvs = [&](int64_t tablet_id, const std::string& rowset_id) {
5688
51.0k
        auto delete_bitmap_start =
5689
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, 0, 0});
5690
51.0k
        auto delete_bitmap_end =
5691
51.0k
                meta_delete_bitmap_key({instance_id_, tablet_id, rowset_id, INT64_MAX, INT64_MAX});
5692
51.0k
        auto ret = txn_remove(txn_kv_.get(), delete_bitmap_start, delete_bitmap_end);
5693
51.0k
        if (ret != 0) {
5694
0
            LOG(WARNING) << "failed to delete delete bitmap kv, instance_id=" << instance_id_
5695
0
                         << ", tablet_id=" << tablet_id << ", rowset_id=" << rowset_id;
5696
0
        }
5697
51.0k
        return ret;
5698
51.0k
    };
5699
5700
39
    auto loop_done = [&]() -> int {
5701
32
        DORIS_CLOUD_DEFER {
5702
32
            tmp_rowset_keys.clear();
5703
32
            tmp_rowsets.clear();
5704
32
            tmp_rowset_ref_count_keys.clear();
5705
32
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5701
12
        DORIS_CLOUD_DEFER {
5702
12
            tmp_rowset_keys.clear();
5703
12
            tmp_rowsets.clear();
5704
12
            tmp_rowset_ref_count_keys.clear();
5705
12
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
5701
20
        DORIS_CLOUD_DEFER {
5702
20
            tmp_rowset_keys.clear();
5703
20
            tmp_rowsets.clear();
5704
20
            tmp_rowset_ref_count_keys.clear();
5705
20
        };
5706
32
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5707
32
                             tmp_rowsets_to_delete = tmp_rowsets,
5708
32
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5709
32
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5710
32
                                   metrics_context) != 0) {
5711
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5712
3
                return;
5713
3
            }
5714
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5715
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5716
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5717
0
                                 << rs.ShortDebugString();
5718
0
                    return;
5719
0
                }
5720
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5721
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5722
0
                                 << rs.ShortDebugString();
5723
0
                    return;
5724
0
                }
5725
51.0k
            }
5726
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5727
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5728
0
                return;
5729
0
            }
5730
29
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5731
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5732
0
                return;
5733
0
            }
5734
29
            num_recycled += tmp_rowset_keys.size();
5735
29
            return;
5736
29
        });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5708
12
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5709
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5710
12
                                   metrics_context) != 0) {
5711
0
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5712
0
                return;
5713
0
            }
5714
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5715
7
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5716
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5717
0
                                 << rs.ShortDebugString();
5718
0
                    return;
5719
0
                }
5720
7
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5721
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5722
0
                                 << rs.ShortDebugString();
5723
0
                    return;
5724
0
                }
5725
7
            }
5726
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5727
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5728
0
                return;
5729
0
            }
5730
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5731
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5732
0
                return;
5733
0
            }
5734
12
            num_recycled += tmp_rowset_keys.size();
5735
12
            return;
5736
12
        });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
5708
20
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5709
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5710
20
                                   metrics_context) != 0) {
5711
3
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5712
3
                return;
5713
3
            }
5714
51.0k
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5715
51.0k
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5716
0
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5717
0
                                 << rs.ShortDebugString();
5718
0
                    return;
5719
0
                }
5720
51.0k
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5721
0
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5722
0
                                 << rs.ShortDebugString();
5723
0
                    return;
5724
0
                }
5725
51.0k
            }
5726
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5727
0
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5728
0
                return;
5729
0
            }
5730
17
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5731
0
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5732
0
                return;
5733
0
            }
5734
17
            num_recycled += tmp_rowset_keys.size();
5735
17
            return;
5736
17
        });
5737
32
        return 0;
5738
32
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5700
12
    auto loop_done = [&]() -> int {
5701
12
        DORIS_CLOUD_DEFER {
5702
12
            tmp_rowset_keys.clear();
5703
12
            tmp_rowsets.clear();
5704
12
            tmp_rowset_ref_count_keys.clear();
5705
12
        };
5706
12
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5707
12
                             tmp_rowsets_to_delete = tmp_rowsets,
5708
12
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5709
12
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5710
12
                                   metrics_context) != 0) {
5711
12
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5712
12
                return;
5713
12
            }
5714
12
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5715
12
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5716
12
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5717
12
                                 << rs.ShortDebugString();
5718
12
                    return;
5719
12
                }
5720
12
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5721
12
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5722
12
                                 << rs.ShortDebugString();
5723
12
                    return;
5724
12
                }
5725
12
            }
5726
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5727
12
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5728
12
                return;
5729
12
            }
5730
12
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5731
12
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5732
12
                return;
5733
12
            }
5734
12
            num_recycled += tmp_rowset_keys.size();
5735
12
            return;
5736
12
        });
5737
12
        return 0;
5738
12
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler19recycle_tmp_rowsetsEvENK3$_1clEv
Line
Count
Source
5700
20
    auto loop_done = [&]() -> int {
5701
20
        DORIS_CLOUD_DEFER {
5702
20
            tmp_rowset_keys.clear();
5703
20
            tmp_rowsets.clear();
5704
20
            tmp_rowset_ref_count_keys.clear();
5705
20
        };
5706
20
        worker_pool->submit([&, tmp_rowset_keys_to_delete = tmp_rowset_keys,
5707
20
                             tmp_rowsets_to_delete = tmp_rowsets,
5708
20
                             tmp_rowset_ref_count_keys_to_delete = tmp_rowset_ref_count_keys]() {
5709
20
            if (delete_rowset_data(tmp_rowsets_to_delete, RowsetRecyclingState::TMP_ROWSET,
5710
20
                                   metrics_context) != 0) {
5711
20
                LOG(WARNING) << "failed to delete tmp rowset data, instance_id=" << instance_id_;
5712
20
                return;
5713
20
            }
5714
20
            for (const auto& [_, rs] : tmp_rowsets_to_delete) {
5715
20
                if (delete_versioned_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5716
20
                    LOG(WARNING) << "failed to delete versioned delete bitmap kv, rs="
5717
20
                                 << rs.ShortDebugString();
5718
20
                    return;
5719
20
                }
5720
20
                if (delete_delete_bitmap_kvs(rs.tablet_id(), rs.rowset_id_v2()) != 0) {
5721
20
                    LOG(WARNING) << "failed to delete delete bitmap kv, rs="
5722
20
                                 << rs.ShortDebugString();
5723
20
                    return;
5724
20
                }
5725
20
            }
5726
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_keys_to_delete) != 0) {
5727
20
                LOG(WARNING) << "failed to tmp rowset kv, instance_id=" << instance_id_;
5728
20
                return;
5729
20
            }
5730
20
            if (txn_remove(txn_kv_.get(), tmp_rowset_ref_count_keys_to_delete) != 0) {
5731
20
                LOG(WARNING) << "failed to tmp rowset ref count kv, instance_id=" << instance_id_;
5732
20
                return;
5733
20
            }
5734
20
            num_recycled += tmp_rowset_keys.size();
5735
20
            return;
5736
20
        });
5737
20
        return 0;
5738
20
    };
5739
5740
39
    if (config::enable_recycler_stats_metrics) {
5741
0
        scan_and_statistics_tmp_rowsets();
5742
0
    }
5743
    // recycle_func and loop_done for scan and recycle
5744
39
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_rowset_kv),
5745
39
                               std::move(loop_done));
5746
5747
39
    worker_pool->stop();
5748
5749
    // Report final metrics after all concurrent tasks completed
5750
39
    segment_metrics_context_.report();
5751
39
    metrics_context.report();
5752
5753
39
    return ret;
5754
39
}
5755
5756
int InstanceRecycler::scan_and_recycle(
5757
        std::string begin, std::string_view end,
5758
        std::function<int(std::string_view k, std::string_view v)> recycle_func,
5759
268
        std::function<int()> loop_done) {
5760
268
    LOG(INFO) << "begin scan_and_recycle key_range=[" << hex(begin) << "," << hex(end) << ")";
5761
268
    int ret = 0;
5762
268
    int64_t cnt = 0;
5763
268
    int get_range_retried = 0;
5764
268
    std::string err;
5765
268
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5766
268
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5767
268
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5768
268
                  << " ret=" << ret << " err=" << err;
5769
268
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5765
31
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5766
31
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5767
31
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5768
31
                  << " ret=" << ret << " err=" << err;
5769
31
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler16scan_and_recycleENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS5_ESt8functionIFiS9_S9_EESA_IFivEEENK3$_0clEv
Line
Count
Source
5765
237
    DORIS_CLOUD_DEFER_COPY(begin, end) {
5766
237
        LOG(INFO) << "finish scan_and_recycle key_range=[" << hex(begin) << "," << hex(end)
5767
237
                  << ") num_scanned=" << cnt << " get_range_retried=" << get_range_retried
5768
237
                  << " ret=" << ret << " err=" << err;
5769
237
    };
5770
5771
268
    std::unique_ptr<RangeGetIterator> it;
5772
449
    while (it == nullptr /* may be not init */ || (it->more() && !stopped())) {
5773
321
        if (get_range_retried > 1000) {
5774
0
            err = "txn_get exceeds max retry(1000), may not scan all keys";
5775
0
            ret = -3;
5776
0
            return ret;
5777
0
        }
5778
321
        int get_ret = txn_get(txn_kv_.get(), begin, end, it);
5779
321
        if (get_ret != 0) { // txn kv may complain "Request for future version"
5780
0
            LOG(WARNING) << "failed to get kv, range=[" << hex(begin) << "," << hex(end)
5781
0
                         << ") num_scanned=" << cnt << " txn_get_ret=" << get_ret
5782
0
                         << " get_range_retried=" << get_range_retried;
5783
0
            ++get_range_retried;
5784
0
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
5785
0
            continue; // try again
5786
0
        }
5787
321
        if (!it->has_next()) {
5788
140
            LOG(INFO) << "no keys in the given range=[" << hex(begin) << "," << hex(end) << ")";
5789
140
            break; // scan finished
5790
140
        }
5791
154k
        while (it->has_next()) {
5792
154k
            ++cnt;
5793
            // recycle corresponding resources
5794
154k
            auto [k, v] = it->next();
5795
154k
            if (!it->has_next()) {
5796
181
                begin = k;
5797
181
                VLOG_DEBUG << "iterator has no more kvs. key=" << hex(k);
5798
181
            }
5799
            // FIXME(gavin): if we want to continue scanning, the recycle_func should not return non-zero
5800
154k
            if (recycle_func(k, v) != 0) {
5801
4.00k
                err = "recycle_func error";
5802
4.00k
                ret = -1;
5803
4.00k
            }
5804
154k
        }
5805
181
        begin.push_back('\x00'); // Update to next smallest key for iteration
5806
        // FIXME(gavin): if we want to continue scanning, the loop_done should not return non-zero
5807
181
        if (loop_done && loop_done() != 0) {
5808
4
            err = "loop_done error";
5809
4
            ret = -1;
5810
4
        }
5811
181
    }
5812
268
    return ret;
5813
268
}
5814
5815
19
int InstanceRecycler::abort_timeout_txn() {
5816
19
    const std::string task_name = "abort_timeout_txn";
5817
19
    int64_t num_scanned = 0;
5818
19
    int64_t num_timeout = 0;
5819
19
    int64_t num_abort = 0;
5820
19
    int64_t num_advance = 0;
5821
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5822
5823
19
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
5824
19
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5825
19
    std::string begin_txn_running_key;
5826
19
    std::string end_txn_running_key;
5827
19
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
5828
19
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
5829
5830
19
    LOG_WARNING("begin to abort timeout txn").tag("instance_id", instance_id_);
5831
5832
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5833
19
    register_recycle_task(task_name, start_time);
5834
5835
19
    DORIS_CLOUD_DEFER {
5836
19
        unregister_recycle_task(task_name);
5837
19
        int64_t cost =
5838
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5839
19
        metrics_context.finish_report();
5840
19
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5841
19
                .tag("instance_id", instance_id_)
5842
19
                .tag("num_scanned", num_scanned)
5843
19
                .tag("num_timeout", num_timeout)
5844
19
                .tag("num_abort", num_abort)
5845
19
                .tag("num_advance", num_advance);
5846
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5835
3
    DORIS_CLOUD_DEFER {
5836
3
        unregister_recycle_task(task_name);
5837
3
        int64_t cost =
5838
3
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5839
3
        metrics_context.finish_report();
5840
3
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5841
3
                .tag("instance_id", instance_id_)
5842
3
                .tag("num_scanned", num_scanned)
5843
3
                .tag("num_timeout", num_timeout)
5844
3
                .tag("num_abort", num_abort)
5845
3
                .tag("num_advance", num_advance);
5846
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_0clEv
Line
Count
Source
5835
16
    DORIS_CLOUD_DEFER {
5836
16
        unregister_recycle_task(task_name);
5837
16
        int64_t cost =
5838
16
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5839
16
        metrics_context.finish_report();
5840
16
        LOG_WARNING("end to abort timeout txn, cost={}s", cost)
5841
16
                .tag("instance_id", instance_id_)
5842
16
                .tag("num_scanned", num_scanned)
5843
16
                .tag("num_timeout", num_timeout)
5844
16
                .tag("num_abort", num_abort)
5845
16
                .tag("num_advance", num_advance);
5846
16
    };
5847
5848
19
    int64_t current_time =
5849
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
5850
5851
19
    auto handle_txn_running_kv = [&num_scanned, &num_timeout, &num_abort, &num_advance,
5852
19
                                  &current_time, &metrics_context,
5853
19
                                  this](std::string_view k, std::string_view v) -> int {
5854
9
        ++num_scanned;
5855
5856
9
        std::unique_ptr<Transaction> txn;
5857
9
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5858
9
        if (err != TxnErrorCode::TXN_OK) {
5859
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5860
0
            return -1;
5861
0
        }
5862
9
        std::string_view k1 = k;
5863
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5864
9
        k1.remove_prefix(1); // Remove key space
5865
9
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5866
9
        if (decode_key(&k1, &out) != 0) {
5867
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5868
0
            return -1;
5869
0
        }
5870
9
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5871
9
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5872
9
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5873
        // Update txn_info
5874
9
        std::string txn_inf_key, txn_inf_val;
5875
9
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5876
9
        err = txn->get(txn_inf_key, &txn_inf_val);
5877
9
        if (err != TxnErrorCode::TXN_OK) {
5878
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5879
0
            return -1;
5880
0
        }
5881
9
        TxnInfoPB txn_info;
5882
9
        if (!txn_info.ParseFromString(txn_inf_val)) {
5883
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5884
0
            return -1;
5885
0
        }
5886
5887
9
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5888
3
            txn.reset();
5889
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5890
3
            std::shared_ptr<TxnLazyCommitTask> task =
5891
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5892
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5893
3
            if (ret.first != MetaServiceCode::OK) {
5894
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5895
0
                             << "msg=" << ret.second;
5896
0
                return -1;
5897
0
            }
5898
3
            ++num_advance;
5899
3
            return 0;
5900
6
        } else {
5901
6
            TxnRunningPB txn_running_pb;
5902
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5903
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5904
0
                return -1;
5905
0
            }
5906
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5907
4
                return 0;
5908
4
            }
5909
2
            ++num_timeout;
5910
5911
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5912
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5913
2
            txn_info.set_finish_time(current_time);
5914
2
            txn_info.set_reason("timeout");
5915
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5916
2
            txn_inf_val.clear();
5917
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5918
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5919
0
                return -1;
5920
0
            }
5921
2
            txn->put(txn_inf_key, txn_inf_val);
5922
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5923
            // Put recycle txn key
5924
2
            std::string recyc_txn_key, recyc_txn_val;
5925
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5926
2
            RecycleTxnPB recycle_txn_pb;
5927
2
            recycle_txn_pb.set_creation_time(current_time);
5928
2
            recycle_txn_pb.set_label(txn_info.label());
5929
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5930
0
                LOG_WARNING("failed to serialize txn recycle info")
5931
0
                        .tag("key", hex(k))
5932
0
                        .tag("db_id", db_id)
5933
0
                        .tag("txn_id", txn_id);
5934
0
                return -1;
5935
0
            }
5936
2
            txn->put(recyc_txn_key, recyc_txn_val);
5937
            // Remove txn running key
5938
2
            txn->remove(k);
5939
2
            err = txn->commit();
5940
2
            if (err != TxnErrorCode::TXN_OK) {
5941
0
                LOG_WARNING("failed to commit txn err={}", err)
5942
0
                        .tag("key", hex(k))
5943
0
                        .tag("db_id", db_id)
5944
0
                        .tag("txn_id", txn_id);
5945
0
                return -1;
5946
0
            }
5947
2
            metrics_context.total_recycled_num = ++num_abort;
5948
2
            metrics_context.report();
5949
2
        }
5950
5951
2
        return 0;
5952
9
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5853
3
                                  this](std::string_view k, std::string_view v) -> int {
5854
3
        ++num_scanned;
5855
5856
3
        std::unique_ptr<Transaction> txn;
5857
3
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5858
3
        if (err != TxnErrorCode::TXN_OK) {
5859
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5860
0
            return -1;
5861
0
        }
5862
3
        std::string_view k1 = k;
5863
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5864
3
        k1.remove_prefix(1); // Remove key space
5865
3
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5866
3
        if (decode_key(&k1, &out) != 0) {
5867
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5868
0
            return -1;
5869
0
        }
5870
3
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5871
3
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5872
3
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5873
        // Update txn_info
5874
3
        std::string txn_inf_key, txn_inf_val;
5875
3
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5876
3
        err = txn->get(txn_inf_key, &txn_inf_val);
5877
3
        if (err != TxnErrorCode::TXN_OK) {
5878
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5879
0
            return -1;
5880
0
        }
5881
3
        TxnInfoPB txn_info;
5882
3
        if (!txn_info.ParseFromString(txn_inf_val)) {
5883
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5884
0
            return -1;
5885
0
        }
5886
5887
3
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5888
3
            txn.reset();
5889
3
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5890
3
            std::shared_ptr<TxnLazyCommitTask> task =
5891
3
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5892
3
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5893
3
            if (ret.first != MetaServiceCode::OK) {
5894
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5895
0
                             << "msg=" << ret.second;
5896
0
                return -1;
5897
0
            }
5898
3
            ++num_advance;
5899
3
            return 0;
5900
3
        } else {
5901
0
            TxnRunningPB txn_running_pb;
5902
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5903
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5904
0
                return -1;
5905
0
            }
5906
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5907
0
                return 0;
5908
0
            }
5909
0
            ++num_timeout;
5910
5911
0
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5912
0
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5913
0
            txn_info.set_finish_time(current_time);
5914
0
            txn_info.set_reason("timeout");
5915
0
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5916
0
            txn_inf_val.clear();
5917
0
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5918
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5919
0
                return -1;
5920
0
            }
5921
0
            txn->put(txn_inf_key, txn_inf_val);
5922
0
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5923
            // Put recycle txn key
5924
0
            std::string recyc_txn_key, recyc_txn_val;
5925
0
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5926
0
            RecycleTxnPB recycle_txn_pb;
5927
0
            recycle_txn_pb.set_creation_time(current_time);
5928
0
            recycle_txn_pb.set_label(txn_info.label());
5929
0
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5930
0
                LOG_WARNING("failed to serialize txn recycle info")
5931
0
                        .tag("key", hex(k))
5932
0
                        .tag("db_id", db_id)
5933
0
                        .tag("txn_id", txn_id);
5934
0
                return -1;
5935
0
            }
5936
0
            txn->put(recyc_txn_key, recyc_txn_val);
5937
            // Remove txn running key
5938
0
            txn->remove(k);
5939
0
            err = txn->commit();
5940
0
            if (err != TxnErrorCode::TXN_OK) {
5941
0
                LOG_WARNING("failed to commit txn err={}", err)
5942
0
                        .tag("key", hex(k))
5943
0
                        .tag("db_id", db_id)
5944
0
                        .tag("txn_id", txn_id);
5945
0
                return -1;
5946
0
            }
5947
0
            metrics_context.total_recycled_num = ++num_abort;
5948
0
            metrics_context.report();
5949
0
        }
5950
5951
0
        return 0;
5952
3
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17abort_timeout_txnEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
5853
6
                                  this](std::string_view k, std::string_view v) -> int {
5854
6
        ++num_scanned;
5855
5856
6
        std::unique_ptr<Transaction> txn;
5857
6
        TxnErrorCode err = txn_kv_->create_txn(&txn);
5858
6
        if (err != TxnErrorCode::TXN_OK) {
5859
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
5860
0
            return -1;
5861
0
        }
5862
6
        std::string_view k1 = k;
5863
        //TxnRunningKeyInfo 0:instance_id  1:db_id  2:txn_id
5864
6
        k1.remove_prefix(1); // Remove key space
5865
6
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
5866
6
        if (decode_key(&k1, &out) != 0) {
5867
0
            LOG_ERROR("failed to decode key").tag("key", hex(k));
5868
0
            return -1;
5869
0
        }
5870
6
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
5871
6
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
5872
6
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
5873
        // Update txn_info
5874
6
        std::string txn_inf_key, txn_inf_val;
5875
6
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
5876
6
        err = txn->get(txn_inf_key, &txn_inf_val);
5877
6
        if (err != TxnErrorCode::TXN_OK) {
5878
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(txn_inf_key));
5879
0
            return -1;
5880
0
        }
5881
6
        TxnInfoPB txn_info;
5882
6
        if (!txn_info.ParseFromString(txn_inf_val)) {
5883
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(k));
5884
0
            return -1;
5885
0
        }
5886
5887
6
        if (TxnStatusPB::TXN_STATUS_COMMITTED == txn_info.status()) {
5888
0
            txn.reset();
5889
0
            TEST_SYNC_POINT_CALLBACK("abort_timeout_txn::advance_last_pending_txn_id", &txn_info);
5890
0
            std::shared_ptr<TxnLazyCommitTask> task =
5891
0
                    txn_lazy_committer_->submit(instance_id_, txn_info.txn_id());
5892
0
            std::pair<MetaServiceCode, std::string> ret = task->wait();
5893
0
            if (ret.first != MetaServiceCode::OK) {
5894
0
                LOG(WARNING) << "lazy commit txn failed txn_id=" << txn_id << " code=" << ret.first
5895
0
                             << "msg=" << ret.second;
5896
0
                return -1;
5897
0
            }
5898
0
            ++num_advance;
5899
0
            return 0;
5900
6
        } else {
5901
6
            TxnRunningPB txn_running_pb;
5902
6
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
5903
0
                LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
5904
0
                return -1;
5905
0
            }
5906
6
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
5907
4
                return 0;
5908
4
            }
5909
2
            ++num_timeout;
5910
5911
2
            DCHECK(txn_info.status() != TxnStatusPB::TXN_STATUS_VISIBLE);
5912
2
            txn_info.set_status(TxnStatusPB::TXN_STATUS_ABORTED);
5913
2
            txn_info.set_finish_time(current_time);
5914
2
            txn_info.set_reason("timeout");
5915
2
            VLOG_DEBUG << "txn_info=" << txn_info.ShortDebugString();
5916
2
            txn_inf_val.clear();
5917
2
            if (!txn_info.SerializeToString(&txn_inf_val)) {
5918
0
                LOG_WARNING("failed to serialize txn info").tag("key", hex(k));
5919
0
                return -1;
5920
0
            }
5921
2
            txn->put(txn_inf_key, txn_inf_val);
5922
2
            VLOG_DEBUG << "txn->put, txn_inf_key=" << hex(txn_inf_key);
5923
            // Put recycle txn key
5924
2
            std::string recyc_txn_key, recyc_txn_val;
5925
2
            recycle_txn_key({instance_id_, db_id, txn_id}, &recyc_txn_key);
5926
2
            RecycleTxnPB recycle_txn_pb;
5927
2
            recycle_txn_pb.set_creation_time(current_time);
5928
2
            recycle_txn_pb.set_label(txn_info.label());
5929
2
            if (!recycle_txn_pb.SerializeToString(&recyc_txn_val)) {
5930
0
                LOG_WARNING("failed to serialize txn recycle info")
5931
0
                        .tag("key", hex(k))
5932
0
                        .tag("db_id", db_id)
5933
0
                        .tag("txn_id", txn_id);
5934
0
                return -1;
5935
0
            }
5936
2
            txn->put(recyc_txn_key, recyc_txn_val);
5937
            // Remove txn running key
5938
2
            txn->remove(k);
5939
2
            err = txn->commit();
5940
2
            if (err != TxnErrorCode::TXN_OK) {
5941
0
                LOG_WARNING("failed to commit txn err={}", err)
5942
0
                        .tag("key", hex(k))
5943
0
                        .tag("db_id", db_id)
5944
0
                        .tag("txn_id", txn_id);
5945
0
                return -1;
5946
0
            }
5947
2
            metrics_context.total_recycled_num = ++num_abort;
5948
2
            metrics_context.report();
5949
2
        }
5950
5951
2
        return 0;
5952
6
    };
5953
5954
19
    if (config::enable_recycler_stats_metrics) {
5955
0
        scan_and_statistics_abort_timeout_txn();
5956
0
    }
5957
    // recycle_func and loop_done for scan and recycle
5958
19
    return scan_and_recycle(begin_txn_running_key, end_txn_running_key,
5959
19
                            std::move(handle_txn_running_kv));
5960
19
}
5961
5962
19
int InstanceRecycler::recycle_expired_txn_label() {
5963
19
    const std::string task_name = "recycle_expired_txn_label";
5964
19
    int64_t num_scanned = 0;
5965
19
    int64_t num_expired = 0;
5966
19
    std::atomic_long num_recycled = 0;
5967
19
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
5968
19
    int ret = 0;
5969
5970
19
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
5971
19
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
5972
19
    std::string begin_recycle_txn_key;
5973
19
    std::string end_recycle_txn_key;
5974
19
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
5975
19
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
5976
19
    std::vector<std::string> recycle_txn_info_keys;
5977
5978
19
    LOG_WARNING("begin to recycle expired txn").tag("instance_id", instance_id_);
5979
5980
19
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
5981
19
    register_recycle_task(task_name, start_time);
5982
19
    DORIS_CLOUD_DEFER {
5983
19
        unregister_recycle_task(task_name);
5984
19
        int64_t cost =
5985
19
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5986
19
        metrics_context.finish_report();
5987
19
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5988
19
                .tag("instance_id", instance_id_)
5989
19
                .tag("num_scanned", num_scanned)
5990
19
                .tag("num_expired", num_expired)
5991
19
                .tag("num_recycled", num_recycled);
5992
19
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5982
1
    DORIS_CLOUD_DEFER {
5983
1
        unregister_recycle_task(task_name);
5984
1
        int64_t cost =
5985
1
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5986
1
        metrics_context.finish_report();
5987
1
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5988
1
                .tag("instance_id", instance_id_)
5989
1
                .tag("num_scanned", num_scanned)
5990
1
                .tag("num_expired", num_expired)
5991
1
                .tag("num_recycled", num_recycled);
5992
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_0clEv
Line
Count
Source
5982
18
    DORIS_CLOUD_DEFER {
5983
18
        unregister_recycle_task(task_name);
5984
18
        int64_t cost =
5985
18
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
5986
18
        metrics_context.finish_report();
5987
18
        LOG_WARNING("end to recycle expired txn, cost={}s", cost)
5988
18
                .tag("instance_id", instance_id_)
5989
18
                .tag("num_scanned", num_scanned)
5990
18
                .tag("num_expired", num_expired)
5991
18
                .tag("num_recycled", num_recycled);
5992
18
    };
5993
5994
19
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
5995
5996
19
    SyncExecutor<int> concurrent_delete_executor(
5997
19
            _thread_pool_group.s3_producer_pool,
5998
19
            fmt::format("recycle expired txn label, instance id {}", instance_id_),
5999
23.0k
            [](const int& ret) { return ret != 0; });
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5999
1
            [](const int& ret) { return ret != 0; });
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_2clERKi
Line
Count
Source
5999
23.0k
            [](const int& ret) { return ret != 0; });
6000
6001
19
    int64_t current_time_ms =
6002
19
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6003
6004
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6005
30.0k
        ++num_scanned;
6006
30.0k
        RecycleTxnPB recycle_txn_pb;
6007
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6008
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6009
0
            return -1;
6010
0
        }
6011
30.0k
        if ((config::force_immediate_recycle) ||
6012
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6013
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6014
30.0k
             current_time_ms)) {
6015
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6016
23.0k
            num_expired++;
6017
23.0k
            recycle_txn_info_keys.emplace_back(k);
6018
23.0k
        }
6019
30.0k
        return 0;
6020
30.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6004
1
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6005
1
        ++num_scanned;
6006
1
        RecycleTxnPB recycle_txn_pb;
6007
1
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6008
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6009
0
            return -1;
6010
0
        }
6011
1
        if ((config::force_immediate_recycle) ||
6012
1
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6013
1
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6014
1
             current_time_ms)) {
6015
1
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6016
1
            num_expired++;
6017
1
            recycle_txn_info_keys.emplace_back(k);
6018
1
        }
6019
1
        return 0;
6020
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_3clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6004
30.0k
    auto handle_recycle_txn_kv = [&, this](std::string_view k, std::string_view v) -> int {
6005
30.0k
        ++num_scanned;
6006
30.0k
        RecycleTxnPB recycle_txn_pb;
6007
30.0k
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
6008
0
            LOG_WARNING("malformed txn_running_pb").tag("key", hex(k));
6009
0
            return -1;
6010
0
        }
6011
30.0k
        if ((config::force_immediate_recycle) ||
6012
30.0k
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
6013
30.0k
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
6014
30.0k
             current_time_ms)) {
6015
23.0k
            VLOG_DEBUG << "found recycle txn, key=" << hex(k);
6016
23.0k
            num_expired++;
6017
23.0k
            recycle_txn_info_keys.emplace_back(k);
6018
23.0k
        }
6019
30.0k
        return 0;
6020
30.0k
    };
6021
6022
    // int 0 for success, 1 for conflict, -1 for error
6023
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6024
23.0k
        std::string_view k1 = k;
6025
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6026
23.0k
        k1.remove_prefix(1); // Remove key space
6027
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6028
23.0k
        int ret = decode_key(&k1, &out);
6029
23.0k
        if (ret != 0) {
6030
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6031
0
            return -1;
6032
0
        }
6033
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6034
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6035
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6036
23.0k
        std::unique_ptr<Transaction> txn;
6037
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6038
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6039
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6040
0
            return -1;
6041
0
        }
6042
        // Remove txn index kv
6043
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6044
23.0k
        txn->remove(index_key);
6045
        // Remove txn info kv
6046
23.0k
        std::string info_key, info_val;
6047
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6048
23.0k
        err = txn->get(info_key, &info_val);
6049
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6050
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6051
0
            return -1;
6052
0
        }
6053
23.0k
        TxnInfoPB txn_info;
6054
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6055
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6056
0
            return -1;
6057
0
        }
6058
23.0k
        txn->remove(info_key);
6059
        // Remove sub txn index kvs
6060
23.0k
        std::vector<std::string> sub_txn_index_keys;
6061
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6062
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6063
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6064
22.9k
        }
6065
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6066
22.9k
            txn->remove(sub_txn_index_key);
6067
22.9k
        }
6068
        // Update txn label
6069
23.0k
        std::string label_key, label_val;
6070
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6071
23.0k
        err = txn->get(label_key, &label_val);
6072
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6073
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6074
0
                         << " err=" << err;
6075
0
            return -1;
6076
0
        }
6077
23.0k
        TxnLabelPB txn_label;
6078
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6079
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6080
0
            return -1;
6081
0
        }
6082
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6083
23.0k
        if (it != txn_label.txn_ids().end()) {
6084
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6085
23.0k
        }
6086
23.0k
        if (txn_label.txn_ids().empty()) {
6087
23.0k
            txn->remove(label_key);
6088
23.0k
            TEST_SYNC_POINT_CALLBACK(
6089
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6090
23.0k
        } else {
6091
73
            if (!txn_label.SerializeToString(&label_val)) {
6092
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6093
0
                return -1;
6094
0
            }
6095
73
            TEST_SYNC_POINT_CALLBACK(
6096
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6097
73
            txn->atomic_set_ver_value(label_key, label_val);
6098
73
            TEST_SYNC_POINT_CALLBACK(
6099
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6100
73
        }
6101
        // Remove recycle txn kv
6102
23.0k
        txn->remove(k);
6103
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6104
23.0k
        err = txn->commit();
6105
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6106
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6107
62
                TEST_SYNC_POINT_CALLBACK(
6108
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6109
                // log the txn_id and label
6110
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6111
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6112
62
                             << " txn_label=" << txn_info.label();
6113
62
                return 1;
6114
62
            }
6115
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6116
0
            return -1;
6117
62
        }
6118
23.0k
        ++num_recycled;
6119
6120
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6121
23.0k
        return 0;
6122
23.0k
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6023
1
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6024
1
        std::string_view k1 = k;
6025
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6026
1
        k1.remove_prefix(1); // Remove key space
6027
1
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6028
1
        int ret = decode_key(&k1, &out);
6029
1
        if (ret != 0) {
6030
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6031
0
            return -1;
6032
0
        }
6033
1
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6034
1
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6035
1
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6036
1
        std::unique_ptr<Transaction> txn;
6037
1
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6038
1
        if (err != TxnErrorCode::TXN_OK) {
6039
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6040
0
            return -1;
6041
0
        }
6042
        // Remove txn index kv
6043
1
        auto index_key = txn_index_key({instance_id_, txn_id});
6044
1
        txn->remove(index_key);
6045
        // Remove txn info kv
6046
1
        std::string info_key, info_val;
6047
1
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6048
1
        err = txn->get(info_key, &info_val);
6049
1
        if (err != TxnErrorCode::TXN_OK) {
6050
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6051
0
            return -1;
6052
0
        }
6053
1
        TxnInfoPB txn_info;
6054
1
        if (!txn_info.ParseFromString(info_val)) {
6055
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6056
0
            return -1;
6057
0
        }
6058
1
        txn->remove(info_key);
6059
        // Remove sub txn index kvs
6060
1
        std::vector<std::string> sub_txn_index_keys;
6061
1
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6062
0
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6063
0
            sub_txn_index_keys.push_back(sub_txn_index_key);
6064
0
        }
6065
1
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6066
0
            txn->remove(sub_txn_index_key);
6067
0
        }
6068
        // Update txn label
6069
1
        std::string label_key, label_val;
6070
1
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6071
1
        err = txn->get(label_key, &label_val);
6072
1
        if (err != TxnErrorCode::TXN_OK) {
6073
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6074
0
                         << " err=" << err;
6075
0
            return -1;
6076
0
        }
6077
1
        TxnLabelPB txn_label;
6078
1
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6079
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6080
0
            return -1;
6081
0
        }
6082
1
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6083
1
        if (it != txn_label.txn_ids().end()) {
6084
1
            txn_label.mutable_txn_ids()->erase(it);
6085
1
        }
6086
1
        if (txn_label.txn_ids().empty()) {
6087
1
            txn->remove(label_key);
6088
1
            TEST_SYNC_POINT_CALLBACK(
6089
1
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6090
1
        } else {
6091
0
            if (!txn_label.SerializeToString(&label_val)) {
6092
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6093
0
                return -1;
6094
0
            }
6095
0
            TEST_SYNC_POINT_CALLBACK(
6096
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6097
0
            txn->atomic_set_ver_value(label_key, label_val);
6098
0
            TEST_SYNC_POINT_CALLBACK(
6099
0
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6100
0
        }
6101
        // Remove recycle txn kv
6102
1
        txn->remove(k);
6103
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6104
1
        err = txn->commit();
6105
1
        if (err != TxnErrorCode::TXN_OK) {
6106
0
            if (err == TxnErrorCode::TXN_CONFLICT) {
6107
0
                TEST_SYNC_POINT_CALLBACK(
6108
0
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6109
                // log the txn_id and label
6110
0
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6111
0
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6112
0
                             << " txn_label=" << txn_info.label();
6113
0
                return 1;
6114
0
            }
6115
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6116
0
            return -1;
6117
0
        }
6118
1
        ++num_recycled;
6119
6120
1
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6121
1
        return 0;
6122
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_4clERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
6023
23.0k
    auto delete_recycle_txn_kv = [&](const std::string& k) -> int {
6024
23.0k
        std::string_view k1 = k;
6025
        //RecycleTxnKeyInfo 0:instance_id  1:db_id  2:txn_id
6026
23.0k
        k1.remove_prefix(1); // Remove key space
6027
23.0k
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6028
23.0k
        int ret = decode_key(&k1, &out);
6029
23.0k
        if (ret != 0) {
6030
0
            LOG_ERROR("failed to decode key, ret={}", ret).tag("key", hex(k));
6031
0
            return -1;
6032
0
        }
6033
23.0k
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
6034
23.0k
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
6035
23.0k
        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id << " txn_id=" << txn_id;
6036
23.0k
        std::unique_ptr<Transaction> txn;
6037
23.0k
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6038
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6039
0
            LOG_ERROR("failed to create txn err={}", err).tag("key", hex(k));
6040
0
            return -1;
6041
0
        }
6042
        // Remove txn index kv
6043
23.0k
        auto index_key = txn_index_key({instance_id_, txn_id});
6044
23.0k
        txn->remove(index_key);
6045
        // Remove txn info kv
6046
23.0k
        std::string info_key, info_val;
6047
23.0k
        txn_info_key({instance_id_, db_id, txn_id}, &info_key);
6048
23.0k
        err = txn->get(info_key, &info_val);
6049
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6050
0
            LOG_WARNING("failed to get txn info err={}", err).tag("key", hex(info_key));
6051
0
            return -1;
6052
0
        }
6053
23.0k
        TxnInfoPB txn_info;
6054
23.0k
        if (!txn_info.ParseFromString(info_val)) {
6055
0
            LOG_WARNING("failed to parse txn info").tag("key", hex(info_key));
6056
0
            return -1;
6057
0
        }
6058
23.0k
        txn->remove(info_key);
6059
        // Remove sub txn index kvs
6060
23.0k
        std::vector<std::string> sub_txn_index_keys;
6061
23.0k
        for (auto sub_txn_id : txn_info.sub_txn_ids()) {
6062
22.9k
            auto sub_txn_index_key = txn_index_key({instance_id_, sub_txn_id});
6063
22.9k
            sub_txn_index_keys.push_back(sub_txn_index_key);
6064
22.9k
        }
6065
23.0k
        for (auto& sub_txn_index_key : sub_txn_index_keys) {
6066
22.9k
            txn->remove(sub_txn_index_key);
6067
22.9k
        }
6068
        // Update txn label
6069
23.0k
        std::string label_key, label_val;
6070
23.0k
        txn_label_key({instance_id_, db_id, txn_info.label()}, &label_key);
6071
23.0k
        err = txn->get(label_key, &label_val);
6072
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6073
0
            LOG(WARNING) << "failed to get txn label, txn_id=" << txn_id << " key=" << label_key
6074
0
                         << " err=" << err;
6075
0
            return -1;
6076
0
        }
6077
23.0k
        TxnLabelPB txn_label;
6078
23.0k
        if (!txn_label.ParseFromArray(label_val.data(), label_val.size() - VERSION_STAMP_LEN)) {
6079
0
            LOG_WARNING("failed to parse txn label").tag("key", hex(label_key));
6080
0
            return -1;
6081
0
        }
6082
23.0k
        auto it = std::find(txn_label.txn_ids().begin(), txn_label.txn_ids().end(), txn_id);
6083
23.0k
        if (it != txn_label.txn_ids().end()) {
6084
23.0k
            txn_label.mutable_txn_ids()->erase(it);
6085
23.0k
        }
6086
23.0k
        if (txn_label.txn_ids().empty()) {
6087
23.0k
            txn->remove(label_key);
6088
23.0k
            TEST_SYNC_POINT_CALLBACK(
6089
23.0k
                    "InstanceRecycler::recycle_expired_txn_label.remove_label_before");
6090
23.0k
        } else {
6091
73
            if (!txn_label.SerializeToString(&label_val)) {
6092
0
                LOG(WARNING) << "failed to serialize txn label, key=" << hex(label_key);
6093
0
                return -1;
6094
0
            }
6095
73
            TEST_SYNC_POINT_CALLBACK(
6096
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_before");
6097
73
            txn->atomic_set_ver_value(label_key, label_val);
6098
73
            TEST_SYNC_POINT_CALLBACK(
6099
73
                    "InstanceRecycler::recycle_expired_txn_label.update_label_after");
6100
73
        }
6101
        // Remove recycle txn kv
6102
23.0k
        txn->remove(k);
6103
23.0k
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.before_commit");
6104
23.0k
        err = txn->commit();
6105
23.0k
        if (err != TxnErrorCode::TXN_OK) {
6106
62
            if (err == TxnErrorCode::TXN_CONFLICT) {
6107
62
                TEST_SYNC_POINT_CALLBACK(
6108
62
                        "InstanceRecycler::recycle_expired_txn_label.txn_conflict");
6109
                // log the txn_id and label
6110
62
                LOG(WARNING) << "txn conflict, txn_id=" << txn_id
6111
62
                             << " txn_label_pb=" << txn_label.ShortDebugString()
6112
62
                             << " txn_label=" << txn_info.label();
6113
62
                return 1;
6114
62
            }
6115
0
            LOG(WARNING) << "failed to delete expired txn, err=" << err << " key=" << hex(k);
6116
0
            return -1;
6117
62
        }
6118
23.0k
        ++num_recycled;
6119
6120
23.0k
        LOG(INFO) << "recycle expired txn, key=" << hex(k);
6121
23.0k
        return 0;
6122
23.0k
    };
6123
6124
19
    auto loop_done = [&]() -> int {
6125
10
        DORIS_CLOUD_DEFER {
6126
10
            recycle_txn_info_keys.clear();
6127
10
        };
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6125
1
        DORIS_CLOUD_DEFER {
6126
1
            recycle_txn_info_keys.clear();
6127
1
        };
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6125
9
        DORIS_CLOUD_DEFER {
6126
9
            recycle_txn_info_keys.clear();
6127
9
        };
6128
10
        TEST_SYNC_POINT_CALLBACK(
6129
10
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6130
10
                &recycle_txn_info_keys);
6131
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6132
23.0k
            concurrent_delete_executor.add([&]() {
6133
23.0k
                int ret = delete_recycle_txn_kv(k);
6134
23.0k
                if (ret == 1) {
6135
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6136
54
                    for (int i = 1; i <= max_retry; ++i) {
6137
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6138
54
                        ret = delete_recycle_txn_kv(k);
6139
                        // clang-format off
6140
54
                        TEST_SYNC_POINT_CALLBACK(
6141
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6142
                        // clang-format off
6143
54
                        if (ret != 1) {
6144
18
                            break;
6145
18
                        }
6146
                        // random sleep 0-100 ms to retry
6147
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6148
36
                    }
6149
18
                }
6150
23.0k
                if (ret != 0) {
6151
9
                    LOG_WARNING("failed to delete recycle txn kv")
6152
9
                            .tag("instance id", instance_id_)
6153
9
                            .tag("key", hex(k));
6154
9
                    return -1;
6155
9
                }
6156
23.0k
                return 0;
6157
23.0k
            });
recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6132
1
            concurrent_delete_executor.add([&]() {
6133
1
                int ret = delete_recycle_txn_kv(k);
6134
1
                if (ret == 1) {
6135
0
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6136
0
                    for (int i = 1; i <= max_retry; ++i) {
6137
0
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6138
0
                        ret = delete_recycle_txn_kv(k);
6139
                        // clang-format off
6140
0
                        TEST_SYNC_POINT_CALLBACK(
6141
0
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6142
                        // clang-format off
6143
0
                        if (ret != 1) {
6144
0
                            break;
6145
0
                        }
6146
                        // random sleep 0-100 ms to retry
6147
0
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6148
0
                    }
6149
0
                }
6150
1
                if (ret != 0) {
6151
0
                    LOG_WARNING("failed to delete recycle txn kv")
6152
0
                            .tag("instance id", instance_id_)
6153
0
                            .tag("key", hex(k));
6154
0
                    return -1;
6155
0
                }
6156
1
                return 0;
6157
1
            });
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEvENKUlvE0_clEv
Line
Count
Source
6132
23.0k
            concurrent_delete_executor.add([&]() {
6133
23.0k
                int ret = delete_recycle_txn_kv(k);
6134
23.0k
                if (ret == 1) {
6135
18
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6136
54
                    for (int i = 1; i <= max_retry; ++i) {
6137
54
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6138
54
                        ret = delete_recycle_txn_kv(k);
6139
                        // clang-format off
6140
54
                        TEST_SYNC_POINT_CALLBACK(
6141
54
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6142
                        // clang-format off
6143
54
                        if (ret != 1) {
6144
18
                            break;
6145
18
                        }
6146
                        // random sleep 0-100 ms to retry
6147
36
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6148
36
                    }
6149
18
                }
6150
23.0k
                if (ret != 0) {
6151
9
                    LOG_WARNING("failed to delete recycle txn kv")
6152
9
                            .tag("instance id", instance_id_)
6153
9
                            .tag("key", hex(k));
6154
9
                    return -1;
6155
9
                }
6156
23.0k
                return 0;
6157
23.0k
            });
6158
23.0k
        }
6159
10
        bool finished = true;
6160
10
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6161
23.0k
        for (int r : rets) {
6162
23.0k
            if (r != 0) {
6163
9
                ret = -1;
6164
9
            }
6165
23.0k
        }
6166
6167
10
        ret = finished ? ret : -1;
6168
6169
        // Update metrics after all concurrent tasks completed
6170
10
        metrics_context.total_recycled_num = num_recycled.load();
6171
10
        metrics_context.report();
6172
6173
10
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6174
6175
10
        if (ret != 0) {
6176
3
            LOG_WARNING("recycle txn kv ret!=0")
6177
3
                    .tag("finished", finished)
6178
3
                    .tag("ret", ret)
6179
3
                    .tag("instance_id", instance_id_);
6180
3
            return ret;
6181
3
        }
6182
7
        return ret;
6183
10
    };
recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6124
1
    auto loop_done = [&]() -> int {
6125
1
        DORIS_CLOUD_DEFER {
6126
1
            recycle_txn_info_keys.clear();
6127
1
        };
6128
1
        TEST_SYNC_POINT_CALLBACK(
6129
1
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6130
1
                &recycle_txn_info_keys);
6131
1
        for (const auto& k : recycle_txn_info_keys) {
6132
1
            concurrent_delete_executor.add([&]() {
6133
1
                int ret = delete_recycle_txn_kv(k);
6134
1
                if (ret == 1) {
6135
1
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6136
1
                    for (int i = 1; i <= max_retry; ++i) {
6137
1
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6138
1
                        ret = delete_recycle_txn_kv(k);
6139
                        // clang-format off
6140
1
                        TEST_SYNC_POINT_CALLBACK(
6141
1
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6142
                        // clang-format off
6143
1
                        if (ret != 1) {
6144
1
                            break;
6145
1
                        }
6146
                        // random sleep 0-100 ms to retry
6147
1
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6148
1
                    }
6149
1
                }
6150
1
                if (ret != 0) {
6151
1
                    LOG_WARNING("failed to delete recycle txn kv")
6152
1
                            .tag("instance id", instance_id_)
6153
1
                            .tag("key", hex(k));
6154
1
                    return -1;
6155
1
                }
6156
1
                return 0;
6157
1
            });
6158
1
        }
6159
1
        bool finished = true;
6160
1
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6161
1
        for (int r : rets) {
6162
1
            if (r != 0) {
6163
0
                ret = -1;
6164
0
            }
6165
1
        }
6166
6167
1
        ret = finished ? ret : -1;
6168
6169
        // Update metrics after all concurrent tasks completed
6170
1
        metrics_context.total_recycled_num = num_recycled.load();
6171
1
        metrics_context.report();
6172
6173
1
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6174
6175
1
        if (ret != 0) {
6176
0
            LOG_WARNING("recycle txn kv ret!=0")
6177
0
                    .tag("finished", finished)
6178
0
                    .tag("ret", ret)
6179
0
                    .tag("instance_id", instance_id_);
6180
0
            return ret;
6181
0
        }
6182
1
        return ret;
6183
1
    };
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25recycle_expired_txn_labelEvENK3$_1clEv
Line
Count
Source
6124
9
    auto loop_done = [&]() -> int {
6125
9
        DORIS_CLOUD_DEFER {
6126
9
            recycle_txn_info_keys.clear();
6127
9
        };
6128
9
        TEST_SYNC_POINT_CALLBACK(
6129
9
                "InstanceRecycler::recycle_expired_txn_label.check_recycle_txn_info_keys",
6130
9
                &recycle_txn_info_keys);
6131
23.0k
        for (const auto& k : recycle_txn_info_keys) {
6132
23.0k
            concurrent_delete_executor.add([&]() {
6133
23.0k
                int ret = delete_recycle_txn_kv(k);
6134
23.0k
                if (ret == 1) {
6135
23.0k
                    const int max_retry = std::max(1, config::recycle_txn_delete_max_retry_times);
6136
23.0k
                    for (int i = 1; i <= max_retry; ++i) {
6137
23.0k
                        LOG(WARNING) << "txn conflict, retry times=" << i << " key=" << hex(k);
6138
23.0k
                        ret = delete_recycle_txn_kv(k);
6139
                        // clang-format off
6140
23.0k
                        TEST_SYNC_POINT_CALLBACK(
6141
23.0k
                                "InstanceRecycler::recycle_expired_txn_label.delete_recycle_txn_kv_error", &ret);
6142
                        // clang-format off
6143
23.0k
                        if (ret != 1) {
6144
23.0k
                            break;
6145
23.0k
                        }
6146
                        // random sleep 0-100 ms to retry
6147
23.0k
                        std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 100));
6148
23.0k
                    }
6149
23.0k
                }
6150
23.0k
                if (ret != 0) {
6151
23.0k
                    LOG_WARNING("failed to delete recycle txn kv")
6152
23.0k
                            .tag("instance id", instance_id_)
6153
23.0k
                            .tag("key", hex(k));
6154
23.0k
                    return -1;
6155
23.0k
                }
6156
23.0k
                return 0;
6157
23.0k
            });
6158
23.0k
        }
6159
9
        bool finished = true;
6160
9
        std::vector<int> rets = concurrent_delete_executor.when_all(&finished);
6161
23.0k
        for (int r : rets) {
6162
23.0k
            if (r != 0) {
6163
9
                ret = -1;
6164
9
            }
6165
23.0k
        }
6166
6167
9
        ret = finished ? ret : -1;
6168
6169
        // Update metrics after all concurrent tasks completed
6170
9
        metrics_context.total_recycled_num = num_recycled.load();
6171
9
        metrics_context.report();
6172
6173
9
        TEST_SYNC_POINT_CALLBACK("InstanceRecycler::recycle_expired_txn_label.failure", &ret);
6174
6175
9
        if (ret != 0) {
6176
3
            LOG_WARNING("recycle txn kv ret!=0")
6177
3
                    .tag("finished", finished)
6178
3
                    .tag("ret", ret)
6179
3
                    .tag("instance_id", instance_id_);
6180
3
            return ret;
6181
3
        }
6182
6
        return ret;
6183
9
    };
6184
6185
19
    if (config::enable_recycler_stats_metrics) {
6186
0
        scan_and_statistics_expired_txn_label();
6187
0
    }
6188
    // recycle_func and loop_done for scan and recycle
6189
19
    return scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key,
6190
19
                            std::move(handle_recycle_txn_kv), std::move(loop_done));
6191
19
}
6192
6193
struct CopyJobIdTuple {
6194
    std::string instance_id;
6195
    std::string stage_id;
6196
    long table_id;
6197
    std::string copy_id;
6198
    std::string stage_path;
6199
};
6200
struct BatchObjStoreAccessor {
6201
    BatchObjStoreAccessor(std::shared_ptr<StorageVaultAccessor> accessor, uint64_t& batch_count,
6202
                          TxnKv* txn_kv)
6203
3
            : accessor_(std::move(accessor)), batch_count_(batch_count), txn_kv_(txn_kv) {};
6204
3
    ~BatchObjStoreAccessor() {
6205
3
        if (!paths_.empty()) {
6206
3
            consume();
6207
3
        }
6208
3
    }
6209
6210
    /**
6211
    * To implicitely do batch work and submit the batch delete task to s3
6212
    * The s3 delete opreations would be done in batches, and then delete CopyJobPB key one by one
6213
    *
6214
    * @param copy_job The protubuf struct consists of the copy job files.
6215
    * @param key The copy job's key on fdb, the key is originally occupied by fdb range iterator, to make sure
6216
    *            it would last until we finish the delete task, here we need pass one string value
6217
    * @param cope_job_id_tuple One tuple {log_trace instance_id, stage_id, table_id, query_id, stage_path} to print log
6218
    */
6219
5
    void add(CopyJobPB copy_job, std::string key, const CopyJobIdTuple cope_job_id_tuple) {
6220
5
        auto& [instance_id, stage_id, table_id, copy_id, path] = cope_job_id_tuple;
6221
5
        auto& file_keys = copy_file_keys_[key];
6222
5
        file_keys.log_trace =
6223
5
                fmt::format("instance_id={}, stage_id={}, table_id={}, query_id={}, path={}",
6224
5
                            instance_id, stage_id, table_id, copy_id, path);
6225
5
        std::string_view log_trace = file_keys.log_trace;
6226
2.03k
        for (const auto& file : copy_job.object_files()) {
6227
2.03k
            auto relative_path = file.relative_path();
6228
2.03k
            paths_.push_back(relative_path);
6229
2.03k
            file_keys.keys.push_back(copy_file_key(
6230
2.03k
                    {instance_id, stage_id, table_id, file.relative_path(), file.etag()}));
6231
2.03k
            LOG_INFO(log_trace)
6232
2.03k
                    .tag("relative_path", relative_path)
6233
2.03k
                    .tag("batch_count", batch_count_);
6234
2.03k
        }
6235
5
        LOG_INFO(log_trace)
6236
5
                .tag("objects_num", copy_job.object_files().size())
6237
5
                .tag("batch_count", batch_count_);
6238
        // TODO(AlexYue): If the size is 1001, it would be one delete with 1000 objects and one delete request with only one object(**ATTN**: DOESN'T
6239
        // recommend using delete objects when objects num is less than 10)
6240
5
        if (paths_.size() < 1000) {
6241
3
            return;
6242
3
        }
6243
2
        consume();
6244
2
    }
6245
6246
private:
6247
5
    void consume() {
6248
5
        DORIS_CLOUD_DEFER {
6249
5
            paths_.clear();
6250
5
            copy_file_keys_.clear();
6251
5
            batch_count_++;
6252
6253
5
            LOG_WARNING("begin to delete {} internal stage objects in batch {}", paths_.size(),
6254
5
                        batch_count_);
6255
5
        };
6256
6257
5
        StopWatch sw;
6258
        // TODO(yuejing): 在accessor的delete_objets的实现里可以考虑如果_paths数量不超过10个的话,就直接发10个delete objection operation而不是发post
6259
5
        if (0 != accessor_->delete_files(paths_)) {
6260
2
            LOG_WARNING("failed to delete {} internal stage objects in batch {} and it takes {} us",
6261
2
                        paths_.size(), batch_count_, sw.elapsed_us());
6262
2
            return;
6263
2
        }
6264
3
        LOG_WARNING("succeed to delete {} internal stage objects in batch {} and it takes {} us",
6265
3
                    paths_.size(), batch_count_, sw.elapsed_us());
6266
        // delete fdb's keys
6267
3
        for (auto& file_keys : copy_file_keys_) {
6268
3
            auto& [log_trace, keys] = file_keys.second;
6269
3
            std::unique_ptr<Transaction> txn;
6270
3
            if (txn_kv_->create_txn(&txn) != cloud::TxnErrorCode::TXN_OK) {
6271
0
                LOG(WARNING) << "failed to create txn";
6272
0
                continue;
6273
0
            }
6274
            // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6275
            // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6276
            // limited, should not cause the txn commit failed.
6277
1.02k
            for (const auto& key : keys) {
6278
1.02k
                txn->remove(key);
6279
1.02k
                LOG_INFO("remove copy_file_key={}, {}", hex(key), log_trace);
6280
1.02k
            }
6281
3
            txn->remove(file_keys.first);
6282
3
            if (auto ret = txn->commit(); ret != cloud::TxnErrorCode::TXN_OK) {
6283
0
                LOG(WARNING) << "failed to commit txn ret is " << ret;
6284
0
                continue;
6285
0
            }
6286
3
        }
6287
3
    }
6288
    std::shared_ptr<StorageVaultAccessor> accessor_;
6289
    // the path of the s3 files to be deleted
6290
    std::vector<std::string> paths_;
6291
    struct CopyFiles {
6292
        std::string log_trace;
6293
        std::vector<std::string> keys;
6294
    };
6295
    // pair<std::string, std::vector<std::string>>
6296
    // first: instance_id_ stage_id table_id query_id
6297
    // second: keys to be deleted
6298
    // <fdb key, <{instance_id_ stage_id table_id query_id}, file keys to be deleted>>
6299
    std::unordered_map<std::string, CopyFiles> copy_file_keys_;
6300
    // used to distinguish different batch tasks, the task log consists of thread ID and batch number
6301
    // which can together uniquely identifies different tasks for tracing log
6302
    uint64_t& batch_count_;
6303
    TxnKv* txn_kv_;
6304
};
6305
6306
13
int InstanceRecycler::recycle_copy_jobs() {
6307
13
    int64_t num_scanned = 0;
6308
13
    int64_t num_finished = 0;
6309
13
    int64_t num_expired = 0;
6310
13
    int64_t num_recycled = 0;
6311
    // Used for INTERNAL stage's copy jobs to tag each batch for log trace
6312
13
    uint64_t batch_count = 0;
6313
13
    const std::string task_name = "recycle_copy_jobs";
6314
13
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6315
6316
13
    LOG_WARNING("begin to recycle copy jobs").tag("instance_id", instance_id_);
6317
6318
13
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6319
13
    register_recycle_task(task_name, start_time);
6320
6321
13
    DORIS_CLOUD_DEFER {
6322
13
        unregister_recycle_task(task_name);
6323
13
        int64_t cost =
6324
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6325
13
        metrics_context.finish_report();
6326
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6327
13
                .tag("instance_id", instance_id_)
6328
13
                .tag("num_scanned", num_scanned)
6329
13
                .tag("num_finished", num_finished)
6330
13
                .tag("num_expired", num_expired)
6331
13
                .tag("num_recycled", num_recycled);
6332
13
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_0clEv
Line
Count
Source
6321
13
    DORIS_CLOUD_DEFER {
6322
13
        unregister_recycle_task(task_name);
6323
13
        int64_t cost =
6324
13
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6325
13
        metrics_context.finish_report();
6326
13
        LOG_WARNING("recycle copy jobs finished, cost={}s", cost)
6327
13
                .tag("instance_id", instance_id_)
6328
13
                .tag("num_scanned", num_scanned)
6329
13
                .tag("num_finished", num_finished)
6330
13
                .tag("num_expired", num_expired)
6331
13
                .tag("num_recycled", num_recycled);
6332
13
    };
6333
6334
13
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
6335
13
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
6336
13
    std::string key0;
6337
13
    std::string key1;
6338
13
    copy_job_key(key_info0, &key0);
6339
13
    copy_job_key(key_info1, &key1);
6340
13
    std::unordered_map<std::string, std::shared_ptr<BatchObjStoreAccessor>> stage_accessor_map;
6341
13
    auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled,
6342
13
                         &batch_count, &stage_accessor_map, &task_name, &metrics_context,
6343
16
                         this](std::string_view k, std::string_view v) -> int {
6344
16
        ++num_scanned;
6345
16
        CopyJobPB copy_job;
6346
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6347
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6348
0
            return -1;
6349
0
        }
6350
6351
        // decode copy job key
6352
16
        auto k1 = k;
6353
16
        k1.remove_prefix(1);
6354
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6355
16
        decode_key(&k1, &out);
6356
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6357
        // -> CopyJobPB
6358
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6359
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6360
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6361
6362
16
        bool check_storage = true;
6363
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6364
12
            ++num_finished;
6365
6366
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6367
7
                auto it = stage_accessor_map.find(stage_id);
6368
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6369
7
                std::string_view path;
6370
7
                if (it != stage_accessor_map.end()) {
6371
2
                    accessor = it->second;
6372
5
                } else {
6373
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6374
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6375
5
                                                      &inner_accessor);
6376
5
                    if (ret < 0) { // error
6377
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6378
0
                        return -1;
6379
5
                    } else if (ret == 0) {
6380
3
                        path = inner_accessor->uri();
6381
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6382
3
                                inner_accessor, batch_count, txn_kv_.get());
6383
3
                        stage_accessor_map.emplace(stage_id, accessor);
6384
3
                    } else { // stage not found, skip check storage
6385
2
                        check_storage = false;
6386
2
                    }
6387
5
                }
6388
7
                if (check_storage) {
6389
                    // TODO delete objects with key and etag is not supported
6390
5
                    accessor->add(std::move(copy_job), std::string(k),
6391
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6392
5
                    return 0;
6393
5
                }
6394
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6395
5
                int64_t current_time =
6396
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6397
5
                if (copy_job.finish_time_ms() > 0) {
6398
2
                    if (!config::force_immediate_recycle &&
6399
2
                        current_time < copy_job.finish_time_ms() +
6400
2
                                               config::copy_job_max_retention_second * 1000) {
6401
1
                        return 0;
6402
1
                    }
6403
3
                } else {
6404
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6405
3
                    if (!config::force_immediate_recycle &&
6406
3
                        current_time < copy_job.start_time_ms() +
6407
3
                                               config::copy_job_max_retention_second * 1000) {
6408
1
                        return 0;
6409
1
                    }
6410
3
                }
6411
5
            }
6412
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6413
4
            int64_t current_time =
6414
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6415
            // if copy job is timeout: delete all copy file kvs and copy job kv
6416
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6417
2
                return 0;
6418
2
            }
6419
2
            ++num_expired;
6420
2
        }
6421
6422
        // delete all copy files
6423
7
        std::vector<std::string> copy_file_keys;
6424
70
        for (auto& file : copy_job.object_files()) {
6425
70
            copy_file_keys.push_back(copy_file_key(
6426
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6427
70
        }
6428
7
        std::unique_ptr<Transaction> txn;
6429
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6430
0
            LOG(WARNING) << "failed to create txn";
6431
0
            return -1;
6432
0
        }
6433
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6434
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6435
        // limited, should not cause the txn commit failed.
6436
70
        for (const auto& key : copy_file_keys) {
6437
70
            txn->remove(key);
6438
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6439
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6440
70
                      << ", query_id=" << copy_id;
6441
70
        }
6442
7
        txn->remove(k);
6443
7
        TxnErrorCode err = txn->commit();
6444
7
        if (err != TxnErrorCode::TXN_OK) {
6445
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6446
0
            return -1;
6447
0
        }
6448
6449
7
        metrics_context.total_recycled_num = ++num_recycled;
6450
7
        metrics_context.report();
6451
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6452
7
        return 0;
6453
7
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler17recycle_copy_jobsEvENK3$_1clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6343
16
                         this](std::string_view k, std::string_view v) -> int {
6344
16
        ++num_scanned;
6345
16
        CopyJobPB copy_job;
6346
16
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
6347
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
6348
0
            return -1;
6349
0
        }
6350
6351
        // decode copy job key
6352
16
        auto k1 = k;
6353
16
        k1.remove_prefix(1);
6354
16
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6355
16
        decode_key(&k1, &out);
6356
        // 0x01 "copy" ${instance_id} "job" ${stage_id} ${table_id} ${copy_id} ${group_id}
6357
        // -> CopyJobPB
6358
16
        const auto& stage_id = std::get<std::string>(std::get<0>(out[3]));
6359
16
        const auto& table_id = std::get<int64_t>(std::get<0>(out[4]));
6360
16
        const auto& copy_id = std::get<std::string>(std::get<0>(out[5]));
6361
6362
16
        bool check_storage = true;
6363
16
        if (copy_job.job_status() == CopyJobPB::FINISH) {
6364
12
            ++num_finished;
6365
6366
12
            if (copy_job.stage_type() == StagePB::INTERNAL) {
6367
7
                auto it = stage_accessor_map.find(stage_id);
6368
7
                std::shared_ptr<BatchObjStoreAccessor> accessor;
6369
7
                std::string_view path;
6370
7
                if (it != stage_accessor_map.end()) {
6371
2
                    accessor = it->second;
6372
5
                } else {
6373
5
                    std::shared_ptr<StorageVaultAccessor> inner_accessor;
6374
5
                    auto ret = init_copy_job_accessor(stage_id, copy_job.stage_type(),
6375
5
                                                      &inner_accessor);
6376
5
                    if (ret < 0) { // error
6377
0
                        LOG_WARNING("Failed to init_copy_job_accessor due to error code {}", ret);
6378
0
                        return -1;
6379
5
                    } else if (ret == 0) {
6380
3
                        path = inner_accessor->uri();
6381
3
                        accessor = std::make_shared<BatchObjStoreAccessor>(
6382
3
                                inner_accessor, batch_count, txn_kv_.get());
6383
3
                        stage_accessor_map.emplace(stage_id, accessor);
6384
3
                    } else { // stage not found, skip check storage
6385
2
                        check_storage = false;
6386
2
                    }
6387
5
                }
6388
7
                if (check_storage) {
6389
                    // TODO delete objects with key and etag is not supported
6390
5
                    accessor->add(std::move(copy_job), std::string(k),
6391
5
                                  {instance_id_, stage_id, table_id, copy_id, std::string(path)});
6392
5
                    return 0;
6393
5
                }
6394
7
            } else if (copy_job.stage_type() == StagePB::EXTERNAL) {
6395
5
                int64_t current_time =
6396
5
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6397
5
                if (copy_job.finish_time_ms() > 0) {
6398
2
                    if (!config::force_immediate_recycle &&
6399
2
                        current_time < copy_job.finish_time_ms() +
6400
2
                                               config::copy_job_max_retention_second * 1000) {
6401
1
                        return 0;
6402
1
                    }
6403
3
                } else {
6404
                    // For compatibility, copy job does not contain finish time before 2.2.2, use start time
6405
3
                    if (!config::force_immediate_recycle &&
6406
3
                        current_time < copy_job.start_time_ms() +
6407
3
                                               config::copy_job_max_retention_second * 1000) {
6408
1
                        return 0;
6409
1
                    }
6410
3
                }
6411
5
            }
6412
12
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
6413
4
            int64_t current_time =
6414
4
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
6415
            // if copy job is timeout: delete all copy file kvs and copy job kv
6416
4
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
6417
2
                return 0;
6418
2
            }
6419
2
            ++num_expired;
6420
2
        }
6421
6422
        // delete all copy files
6423
7
        std::vector<std::string> copy_file_keys;
6424
70
        for (auto& file : copy_job.object_files()) {
6425
70
            copy_file_keys.push_back(copy_file_key(
6426
70
                    {instance_id_, stage_id, table_id, file.relative_path(), file.etag()}));
6427
70
        }
6428
7
        std::unique_ptr<Transaction> txn;
6429
7
        if (txn_kv_->create_txn(&txn) != TxnErrorCode::TXN_OK) {
6430
0
            LOG(WARNING) << "failed to create txn";
6431
0
            return -1;
6432
0
        }
6433
        // FIXME: We have already limited the file num and file meta size when selecting file in FE.
6434
        // And if too many copy files, begin_copy failed commit too. So here the copy file keys are
6435
        // limited, should not cause the txn commit failed.
6436
70
        for (const auto& key : copy_file_keys) {
6437
70
            txn->remove(key);
6438
70
            LOG(INFO) << "remove copy_file_key=" << hex(key) << ", instance_id=" << instance_id_
6439
70
                      << ", stage_id=" << stage_id << ", table_id=" << table_id
6440
70
                      << ", query_id=" << copy_id;
6441
70
        }
6442
7
        txn->remove(k);
6443
7
        TxnErrorCode err = txn->commit();
6444
7
        if (err != TxnErrorCode::TXN_OK) {
6445
0
            LOG(WARNING) << "failed to commit txn, err=" << err;
6446
0
            return -1;
6447
0
        }
6448
6449
7
        metrics_context.total_recycled_num = ++num_recycled;
6450
7
        metrics_context.report();
6451
7
        check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time);
6452
7
        return 0;
6453
7
    };
6454
6455
13
    if (config::enable_recycler_stats_metrics) {
6456
0
        scan_and_statistics_copy_jobs();
6457
0
    }
6458
    // recycle_func and loop_done for scan and recycle
6459
13
    return scan_and_recycle(key0, key1, std::move(recycle_func));
6460
13
}
6461
6462
int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id,
6463
                                             const StagePB::StageType& stage_type,
6464
5
                                             std::shared_ptr<StorageVaultAccessor>* accessor) {
6465
5
#ifdef UNIT_TEST
6466
    // In unit test, external use the same accessor as the internal stage
6467
5
    auto it = accessor_map_.find(stage_id);
6468
5
    if (it != accessor_map_.end()) {
6469
3
        *accessor = it->second;
6470
3
    } else {
6471
2
        std::cout << "UT can not find accessor with stage_id: " << stage_id << std::endl;
6472
2
        return 1;
6473
2
    }
6474
#else
6475
    // init s3 accessor and add to accessor map
6476
    auto stage_it =
6477
            std::find_if(instance_info_.stages().begin(), instance_info_.stages().end(),
6478
                         [&stage_id](auto&& stage) { return stage.stage_id() == stage_id; });
6479
6480
    if (stage_it == instance_info_.stages().end()) {
6481
        LOG(INFO) << "Recycle nonexisted stage copy jobs. instance_id=" << instance_id_
6482
                  << ", stage_id=" << stage_id << ", stage_type=" << stage_type;
6483
        return 1;
6484
    }
6485
6486
    const auto& object_store_info = stage_it->obj_info();
6487
    auto stage_access_type = stage_it->has_access_type() ? stage_it->access_type() : StagePB::AKSK;
6488
6489
    S3Conf s3_conf;
6490
    if (stage_type == StagePB::EXTERNAL) {
6491
        if (stage_access_type == StagePB::AKSK) {
6492
            auto conf = S3Conf::from_obj_store_info(object_store_info);
6493
            if (!conf) {
6494
                return -1;
6495
            }
6496
6497
            s3_conf = std::move(*conf);
6498
        } else if (stage_access_type == StagePB::BUCKET_ACL) {
6499
            auto conf = S3Conf::from_obj_store_info(object_store_info, true /* skip_aksk */);
6500
            if (!conf) {
6501
                return -1;
6502
            }
6503
6504
            s3_conf = std::move(*conf);
6505
            if (instance_info_.ram_user().has_encryption_info()) {
6506
                AkSkPair plain_ak_sk_pair;
6507
                int ret = decrypt_ak_sk_helper(
6508
                        instance_info_.ram_user().ak(), instance_info_.ram_user().sk(),
6509
                        instance_info_.ram_user().encryption_info(), &plain_ak_sk_pair);
6510
                if (ret != 0) {
6511
                    LOG(WARNING) << "fail to decrypt ak sk. instance_id: " << instance_id_
6512
                                 << " ram_user: " << proto_to_json(instance_info_.ram_user());
6513
                    return -1;
6514
                }
6515
                s3_conf.ak = std::move(plain_ak_sk_pair.first);
6516
                s3_conf.sk = std::move(plain_ak_sk_pair.second);
6517
            } else {
6518
                s3_conf.ak = instance_info_.ram_user().ak();
6519
                s3_conf.sk = instance_info_.ram_user().sk();
6520
            }
6521
        } else {
6522
            LOG(INFO) << "Unsupported stage access type=" << stage_access_type
6523
                      << ", instance_id=" << instance_id_ << ", stage_id=" << stage_id;
6524
            return -1;
6525
        }
6526
    } else if (stage_type == StagePB::INTERNAL) {
6527
        int idx = stoi(object_store_info.id());
6528
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6529
            LOG(WARNING) << "invalid idx: " << idx;
6530
            return -1;
6531
        }
6532
6533
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6534
        auto conf = S3Conf::from_obj_store_info(old_obj);
6535
        if (!conf) {
6536
            return -1;
6537
        }
6538
6539
        s3_conf = std::move(*conf);
6540
        s3_conf.prefix = object_store_info.prefix();
6541
    } else {
6542
        LOG(WARNING) << "unknown stage type " << stage_type;
6543
        return -1;
6544
    }
6545
6546
    std::shared_ptr<S3Accessor> s3_accessor;
6547
    int ret = S3Accessor::create(std::move(s3_conf), &s3_accessor);
6548
    if (ret != 0) {
6549
        LOG(WARNING) << "failed to init s3 accessor ret=" << ret;
6550
        return -1;
6551
    }
6552
6553
    *accessor = std::move(s3_accessor);
6554
#endif
6555
3
    return 0;
6556
5
}
6557
6558
11
int InstanceRecycler::recycle_stage() {
6559
11
    int64_t num_scanned = 0;
6560
11
    int64_t num_recycled = 0;
6561
11
    const std::string task_name = "recycle_stage";
6562
11
    RecyclerMetricsContext metrics_context(instance_id_, task_name);
6563
6564
11
    LOG_WARNING("begin to recycle stage").tag("instance_id", instance_id_);
6565
6566
11
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6567
11
    register_recycle_task(task_name, start_time);
6568
6569
11
    DORIS_CLOUD_DEFER {
6570
11
        unregister_recycle_task(task_name);
6571
11
        int64_t cost =
6572
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6573
11
        metrics_context.finish_report();
6574
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6575
11
                .tag("instance_id", instance_id_)
6576
11
                .tag("num_scanned", num_scanned)
6577
11
                .tag("num_recycled", num_recycled);
6578
11
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_0clEv
Line
Count
Source
6569
11
    DORIS_CLOUD_DEFER {
6570
11
        unregister_recycle_task(task_name);
6571
11
        int64_t cost =
6572
11
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6573
11
        metrics_context.finish_report();
6574
11
        LOG_WARNING("recycle stage, cost={}s", cost)
6575
11
                .tag("instance_id", instance_id_)
6576
11
                .tag("num_scanned", num_scanned)
6577
11
                .tag("num_recycled", num_recycled);
6578
11
    };
6579
6580
11
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
6581
11
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
6582
11
    std::string key0 = recycle_stage_key(key_info0);
6583
11
    std::string key1 = recycle_stage_key(key_info1);
6584
6585
11
    std::vector<std::string_view> stage_keys;
6586
11
    auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, &metrics_context,
6587
11
                         this](std::string_view k, std::string_view v) -> int {
6588
1
        ++num_scanned;
6589
1
        RecycleStagePB recycle_stage;
6590
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6591
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6592
0
            return -1;
6593
0
        }
6594
6595
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6596
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6597
0
            LOG(WARNING) << "invalid idx: " << idx;
6598
0
            return -1;
6599
0
        }
6600
6601
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6602
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6603
1
                [&] {
6604
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6605
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6606
1
                    if (!s3_conf) {
6607
1
                        return -1;
6608
1
                    }
6609
6610
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6611
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6612
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6613
1
                    if (ret != 0) {
6614
1
                        return -1;
6615
1
                    }
6616
6617
1
                    accessor = std::move(s3_accessor);
6618
1
                    return 0;
6619
1
                }(),
6620
1
                "recycle_stage:get_accessor", &accessor);
6621
6622
1
        if (ret != 0) {
6623
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6624
0
            return ret;
6625
0
        }
6626
6627
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6628
1
                .tag("instance_id", instance_id_)
6629
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6630
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6631
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6632
1
                .tag("obj_info_id", idx)
6633
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6634
1
        ret = accessor->delete_all();
6635
1
        if (ret != 0) {
6636
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6637
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6638
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6639
0
                         << ", ret=" << ret;
6640
0
            return -1;
6641
0
        }
6642
1
        metrics_context.total_recycled_num = ++num_recycled;
6643
1
        metrics_context.report();
6644
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6645
1
        stage_keys.push_back(k);
6646
1
        return 0;
6647
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_2clESt17basic_string_viewIcSt11char_traitsIcEES6_
Line
Count
Source
6587
1
                         this](std::string_view k, std::string_view v) -> int {
6588
1
        ++num_scanned;
6589
1
        RecycleStagePB recycle_stage;
6590
1
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
6591
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
6592
0
            return -1;
6593
0
        }
6594
6595
1
        int idx = stoi(recycle_stage.stage().obj_info().id());
6596
1
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6597
0
            LOG(WARNING) << "invalid idx: " << idx;
6598
0
            return -1;
6599
0
        }
6600
6601
1
        std::shared_ptr<StorageVaultAccessor> accessor;
6602
1
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
6603
1
                [&] {
6604
1
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
6605
1
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6606
1
                    if (!s3_conf) {
6607
1
                        return -1;
6608
1
                    }
6609
6610
1
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
6611
1
                    std::shared_ptr<S3Accessor> s3_accessor;
6612
1
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
6613
1
                    if (ret != 0) {
6614
1
                        return -1;
6615
1
                    }
6616
6617
1
                    accessor = std::move(s3_accessor);
6618
1
                    return 0;
6619
1
                }(),
6620
1
                "recycle_stage:get_accessor", &accessor);
6621
6622
1
        if (ret != 0) {
6623
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
6624
0
            return ret;
6625
0
        }
6626
6627
1
        LOG_WARNING("begin to delete objects of dropped internal stage")
6628
1
                .tag("instance_id", instance_id_)
6629
1
                .tag("stage_id", recycle_stage.stage().stage_id())
6630
1
                .tag("user_name", recycle_stage.stage().mysql_user_name()[0])
6631
1
                .tag("user_id", recycle_stage.stage().mysql_user_id()[0])
6632
1
                .tag("obj_info_id", idx)
6633
1
                .tag("prefix", recycle_stage.stage().obj_info().prefix());
6634
1
        ret = accessor->delete_all();
6635
1
        if (ret != 0) {
6636
0
            LOG(WARNING) << "failed to delete objects of dropped internal stage. instance_id="
6637
0
                         << instance_id_ << ", stage_id=" << recycle_stage.stage().stage_id()
6638
0
                         << ", prefix=" << recycle_stage.stage().obj_info().prefix()
6639
0
                         << ", ret=" << ret;
6640
0
            return -1;
6641
0
        }
6642
1
        metrics_context.total_recycled_num = ++num_recycled;
6643
1
        metrics_context.report();
6644
1
        check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time);
6645
1
        stage_keys.push_back(k);
6646
1
        return 0;
6647
1
    };
6648
6649
11
    auto loop_done = [&stage_keys, this]() -> int {
6650
1
        if (stage_keys.empty()) return 0;
6651
1
        DORIS_CLOUD_DEFER {
6652
1
            stage_keys.clear();
6653
1
        };
Unexecuted instantiation: recycler.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
recycler_test.cpp:_ZZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEvENKUlvE_clEv
Line
Count
Source
6651
1
        DORIS_CLOUD_DEFER {
6652
1
            stage_keys.clear();
6653
1
        };
6654
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6655
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6656
0
            return -1;
6657
0
        }
6658
1
        return 0;
6659
1
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler13recycle_stageEvENK3$_1clEv
Line
Count
Source
6649
1
    auto loop_done = [&stage_keys, this]() -> int {
6650
1
        if (stage_keys.empty()) return 0;
6651
1
        DORIS_CLOUD_DEFER {
6652
1
            stage_keys.clear();
6653
1
        };
6654
1
        if (0 != txn_remove(txn_kv_.get(), stage_keys)) {
6655
0
            LOG(WARNING) << "failed to delete recycle partition kv, instance_id=" << instance_id_;
6656
0
            return -1;
6657
0
        }
6658
1
        return 0;
6659
1
    };
6660
11
    if (config::enable_recycler_stats_metrics) {
6661
0
        scan_and_statistics_stage();
6662
0
    }
6663
    // recycle_func and loop_done for scan and recycle
6664
11
    return scan_and_recycle(key0, key1, std::move(recycle_func), std::move(loop_done));
6665
11
}
6666
6667
10
int InstanceRecycler::recycle_expired_stage_objects() {
6668
10
    LOG_WARNING("begin to recycle expired stage objects").tag("instance_id", instance_id_);
6669
6670
10
    int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6671
10
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
6672
6673
10
    DORIS_CLOUD_DEFER {
6674
10
        int64_t cost =
6675
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6676
10
        metrics_context.finish_report();
6677
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6678
10
                .tag("instance_id", instance_id_);
6679
10
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29recycle_expired_stage_objectsEvENK3$_0clEv
Line
Count
Source
6673
10
    DORIS_CLOUD_DEFER {
6674
10
        int64_t cost =
6675
10
                duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time;
6676
10
        metrics_context.finish_report();
6677
10
        LOG_WARNING("recycle expired stage objects, cost={}s", cost)
6678
10
                .tag("instance_id", instance_id_);
6679
10
    };
6680
6681
10
    int ret = 0;
6682
6683
10
    if (config::enable_recycler_stats_metrics) {
6684
0
        scan_and_statistics_expired_stage_objects();
6685
0
    }
6686
6687
10
    for (const auto& stage : instance_info_.stages()) {
6688
0
        std::stringstream ss;
6689
0
        ss << "instance_id=" << instance_id_ << ", stage_id=" << stage.stage_id() << ", user_name="
6690
0
           << (stage.mysql_user_name().empty() ? "null" : stage.mysql_user_name().at(0))
6691
0
           << ", user_id=" << (stage.mysql_user_id().empty() ? "null" : stage.mysql_user_id().at(0))
6692
0
           << ", prefix=" << stage.obj_info().prefix();
6693
6694
0
        if (stopped()) {
6695
0
            break;
6696
0
        }
6697
0
        if (stage.type() == StagePB::EXTERNAL) {
6698
0
            continue;
6699
0
        }
6700
0
        int idx = stoi(stage.obj_info().id());
6701
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
6702
0
            LOG(WARNING) << "invalid idx: " << idx << ", id: " << stage.obj_info().id();
6703
0
            continue;
6704
0
        }
6705
6706
0
        const auto& old_obj = instance_info_.obj_info()[idx - 1];
6707
0
        auto s3_conf = S3Conf::from_obj_store_info(old_obj);
6708
0
        if (!s3_conf) {
6709
0
            LOG(WARNING) << "failed to init s3_conf with obj_info=" << old_obj.ShortDebugString();
6710
0
            continue;
6711
0
        }
6712
6713
0
        s3_conf->prefix = stage.obj_info().prefix();
6714
0
        std::shared_ptr<S3Accessor> accessor;
6715
0
        int ret1 = S3Accessor::create(*s3_conf, &accessor);
6716
0
        if (ret1 != 0) {
6717
0
            LOG(WARNING) << "failed to init s3 accessor ret=" << ret1 << " " << ss.str();
6718
0
            ret = -1;
6719
0
            continue;
6720
0
        }
6721
6722
0
        if (s3_conf->prefix.find("/stage/") == std::string::npos) {
6723
0
            LOG(WARNING) << "try to delete illegal prefix, which is catastrophic, " << ss.str();
6724
0
            ret = -1;
6725
0
            continue;
6726
0
        }
6727
6728
0
        LOG(INFO) << "recycle expired stage objects, " << ss.str();
6729
0
        int64_t expiration_time =
6730
0
                duration_cast<seconds>(system_clock::now().time_since_epoch()).count() -
6731
0
                config::internal_stage_objects_expire_time_second;
6732
0
        if (config::force_immediate_recycle) {
6733
0
            expiration_time = INT64_MAX;
6734
0
        }
6735
0
        ret1 = accessor->delete_all(expiration_time);
6736
0
        if (ret1 != 0) {
6737
0
            LOG(WARNING) << "failed to recycle expired stage objects, ret=" << ret1 << " "
6738
0
                         << ss.str();
6739
0
            ret = -1;
6740
0
            continue;
6741
0
        }
6742
0
        metrics_context.total_recycled_num++;
6743
0
        metrics_context.report();
6744
0
    }
6745
10
    return ret;
6746
10
}
6747
6748
193
void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) {
6749
193
    std::lock_guard lock(recycle_tasks_mutex);
6750
193
    running_recycle_tasks[task_name] = start_time;
6751
193
}
6752
6753
193
void InstanceRecycler::unregister_recycle_task(const std::string& task_name) {
6754
193
    std::lock_guard lock(recycle_tasks_mutex);
6755
193
    DCHECK(running_recycle_tasks[task_name] > 0);
6756
193
    running_recycle_tasks.erase(task_name);
6757
193
}
6758
6759
21
bool InstanceRecycler::check_recycle_tasks() {
6760
21
    std::map<std::string, int64_t> tmp_running_recycle_tasks;
6761
21
    {
6762
21
        std::lock_guard lock(recycle_tasks_mutex);
6763
21
        tmp_running_recycle_tasks = running_recycle_tasks;
6764
21
    }
6765
6766
21
    bool found = false;
6767
21
    int64_t now = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
6768
21
    for (auto& [task_name, start_time] : tmp_running_recycle_tasks) {
6769
20
        int64_t cost = now - start_time;
6770
20
        if (cost > config::recycle_task_threshold_seconds) [[unlikely]] {
6771
20
            LOG_INFO("recycle task cost too much time cost={}s", cost)
6772
20
                    .tag("instance_id", instance_id_)
6773
20
                    .tag("task", task_name);
6774
20
            found = true;
6775
20
        }
6776
20
    }
6777
6778
21
    return found;
6779
21
}
6780
6781
// Scan and statistics indexes that need to be recycled
6782
0
int InstanceRecycler::scan_and_statistics_indexes() {
6783
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_indexes");
6784
6785
0
    RecycleIndexKeyInfo index_key_info0 {instance_id_, 0};
6786
0
    RecycleIndexKeyInfo index_key_info1 {instance_id_, INT64_MAX};
6787
0
    std::string index_key0;
6788
0
    std::string index_key1;
6789
0
    recycle_index_key(index_key_info0, &index_key0);
6790
0
    recycle_index_key(index_key_info1, &index_key1);
6791
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6792
6793
0
    auto handle_index_kv = [&, this](std::string_view k, std::string_view v) -> int {
6794
0
        RecycleIndexPB index_pb;
6795
0
        if (!index_pb.ParseFromArray(v.data(), v.size())) {
6796
0
            return 0;
6797
0
        }
6798
0
        int64_t current_time = ::time(nullptr);
6799
0
        if (current_time <
6800
0
            calculate_index_expired_time(instance_id_, index_pb, &earlest_ts)) { // not expired
6801
0
            return 0;
6802
0
        }
6803
        // decode index_id
6804
0
        auto k1 = k;
6805
0
        k1.remove_prefix(1);
6806
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6807
0
        decode_key(&k1, &out);
6808
        // 0x01 "recycle" ${instance_id} "index" ${index_id} -> RecycleIndexPB
6809
0
        auto index_id = std::get<int64_t>(std::get<0>(out[3]));
6810
0
        std::unique_ptr<Transaction> txn;
6811
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6812
0
        if (err != TxnErrorCode::TXN_OK) {
6813
0
            return 0;
6814
0
        }
6815
0
        std::string val;
6816
0
        err = txn->get(k, &val);
6817
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6818
0
            return 0;
6819
0
        }
6820
0
        if (err != TxnErrorCode::TXN_OK) {
6821
0
            return 0;
6822
0
        }
6823
0
        index_pb.Clear();
6824
0
        if (!index_pb.ParseFromString(val)) {
6825
0
            return 0;
6826
0
        }
6827
0
        if (scan_tablets_and_statistics(index_pb.table_id(), index_id, metrics_context) != 0) {
6828
0
            return 0;
6829
0
        }
6830
0
        metrics_context.total_need_recycle_num++;
6831
0
        return 0;
6832
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_indexesEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6833
6834
0
    int ret = scan_and_recycle(index_key0, index_key1, std::move(handle_index_kv));
6835
0
    metrics_context.report(true);
6836
0
    segment_metrics_context_.report(true);
6837
0
    tablet_metrics_context_.report(true);
6838
0
    return ret;
6839
0
}
6840
6841
// Scan and statistics partitions that need to be recycled
6842
0
int InstanceRecycler::scan_and_statistics_partitions() {
6843
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_partitions");
6844
6845
0
    RecyclePartKeyInfo part_key_info0 {instance_id_, 0};
6846
0
    RecyclePartKeyInfo part_key_info1 {instance_id_, INT64_MAX};
6847
0
    std::string part_key0;
6848
0
    std::string part_key1;
6849
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6850
6851
0
    recycle_partition_key(part_key_info0, &part_key0);
6852
0
    recycle_partition_key(part_key_info1, &part_key1);
6853
0
    auto handle_partition_kv = [&, this](std::string_view k, std::string_view v) -> int {
6854
0
        RecyclePartitionPB part_pb;
6855
0
        if (!part_pb.ParseFromArray(v.data(), v.size())) {
6856
0
            return 0;
6857
0
        }
6858
0
        int64_t current_time = ::time(nullptr);
6859
0
        if (current_time <
6860
0
            calculate_partition_expired_time(instance_id_, part_pb, &earlest_ts)) { // not expired
6861
0
            return 0;
6862
0
        }
6863
        // decode partition_id
6864
0
        auto k1 = k;
6865
0
        k1.remove_prefix(1);
6866
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
6867
0
        decode_key(&k1, &out);
6868
        // 0x01 "recycle" ${instance_id} "partition" ${partition_id} -> RecyclePartitionPB
6869
0
        auto partition_id = std::get<int64_t>(std::get<0>(out[3]));
6870
        // Change state to RECYCLING
6871
0
        std::unique_ptr<Transaction> txn;
6872
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
6873
0
        if (err != TxnErrorCode::TXN_OK) {
6874
0
            return 0;
6875
0
        }
6876
0
        std::string val;
6877
0
        err = txn->get(k, &val);
6878
0
        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
6879
0
            return 0;
6880
0
        }
6881
0
        if (err != TxnErrorCode::TXN_OK) {
6882
0
            return 0;
6883
0
        }
6884
0
        part_pb.Clear();
6885
0
        if (!part_pb.ParseFromString(val)) {
6886
0
            return 0;
6887
0
        }
6888
        // Partitions with PREPARED state MUST have no data
6889
0
        bool is_empty_tablet = part_pb.state() == RecyclePartitionPB::PREPARED;
6890
0
        int ret = 0;
6891
0
        for (int64_t index_id : part_pb.index_id()) {
6892
0
            if (scan_tablets_and_statistics(part_pb.table_id(), index_id, metrics_context,
6893
0
                                            partition_id, is_empty_tablet) != 0) {
6894
0
                ret = 0;
6895
0
            }
6896
0
        }
6897
0
        metrics_context.total_need_recycle_num++;
6898
0
        return ret;
6899
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler30scan_and_statistics_partitionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6900
6901
0
    int ret = scan_and_recycle(part_key0, part_key1, std::move(handle_partition_kv));
6902
0
    metrics_context.report(true);
6903
0
    segment_metrics_context_.report(true);
6904
0
    tablet_metrics_context_.report(true);
6905
0
    return ret;
6906
0
}
6907
6908
// Scan and statistics rowsets that need to be recycled
6909
0
int InstanceRecycler::scan_and_statistics_rowsets() {
6910
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_rowsets");
6911
0
    RecycleRowsetKeyInfo recyc_rs_key_info0 {instance_id_, 0, ""};
6912
0
    RecycleRowsetKeyInfo recyc_rs_key_info1 {instance_id_, INT64_MAX, ""};
6913
0
    std::string recyc_rs_key0;
6914
0
    std::string recyc_rs_key1;
6915
0
    recycle_rowset_key(recyc_rs_key_info0, &recyc_rs_key0);
6916
0
                recycle_rowset_key(recyc_rs_key_info1, &recyc_rs_key1);
6917
0
       int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6918
6919
0
    auto handle_rowset_kv = [&, this](std::string_view k, std::string_view v) -> int {
6920
0
        RecycleRowsetPB rowset;
6921
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6922
0
            return 0;
6923
0
        }
6924
0
        auto* rowset_meta = rowset.mutable_rowset_meta();
6925
0
        int64_t current_time = ::time(nullptr);
6926
0
        if (current_time <
6927
0
            calculate_rowset_expired_time(instance_id_, rowset, &earlest_ts)) { // not expired
6928
0
            return 0;
6929
0
        }
6930
6931
0
        if (!rowset.has_type()) {
6932
0
            if (!rowset.has_resource_id()) [[unlikely]] {
6933
0
                return 0;
6934
0
            }
6935
0
            if (rowset.resource_id().empty()) [[unlikely]] {
6936
0
                return 0;
6937
0
            }
6938
0
            metrics_context.total_need_recycle_num++;
6939
0
            metrics_context.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6940
0
            segment_metrics_context_.total_need_recycle_num += rowset.rowset_meta().num_segments();
6941
0
            segment_metrics_context_.total_need_recycle_data_size += rowset.rowset_meta().total_disk_size();
6942
0
            return 0;
6943
0
        }
6944
6945
0
        if(!rowset_meta->has_is_recycled() || !rowset_meta->is_recycled()) {
6946
0
            return 0;
6947
0
        }
6948
6949
0
        if (!rowset_meta->has_resource_id()) [[unlikely]] {
6950
0
            if (rowset.type() == RecycleRowsetPB::PREPARE || rowset_meta->num_segments() != 0) {
6951
0
                return 0;
6952
0
            }
6953
0
        }
6954
0
        metrics_context.total_need_recycle_num++;
6955
0
        metrics_context.total_need_recycle_data_size += rowset_meta->total_disk_size();
6956
0
        segment_metrics_context_.total_need_recycle_num += rowset_meta->num_segments();
6957
0
        segment_metrics_context_.total_need_recycle_data_size += rowset_meta->total_disk_size();
6958
0
        return 0;
6959
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler27scan_and_statistics_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
6960
0
    int ret = scan_and_recycle(recyc_rs_key0, recyc_rs_key1, std::move(handle_rowset_kv));
6961
0
    metrics_context.report(true);
6962
0
    segment_metrics_context_.report(true);
6963
0
    return ret;
6964
0
}
6965
6966
// Scan and statistics tmp_rowsets that need to be recycled
6967
0
int InstanceRecycler::scan_and_statistics_tmp_rowsets() {
6968
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_tmp_rowsets");
6969
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info0 {instance_id_, 0, 0};
6970
0
    MetaRowsetTmpKeyInfo tmp_rs_key_info1 {instance_id_, INT64_MAX, 0};
6971
0
    std::string tmp_rs_key0;
6972
0
    std::string tmp_rs_key1;
6973
0
    meta_rowset_tmp_key(tmp_rs_key_info0, &tmp_rs_key0);
6974
0
    meta_rowset_tmp_key(tmp_rs_key_info1, &tmp_rs_key1);
6975
6976
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
6977
6978
0
    auto handle_tmp_rowsets_kv = [&, this](std::string_view k, std::string_view v) -> int {
6979
0
        doris::RowsetMetaCloudPB rowset;
6980
0
        if (!rowset.ParseFromArray(v.data(), v.size())) {
6981
0
            return 0;
6982
0
        }
6983
0
        int64_t expiration = calculate_tmp_rowset_expired_time(instance_id_, rowset, &earlest_ts);
6984
0
        int64_t current_time = ::time(nullptr);
6985
0
        if (current_time < expiration) {
6986
0
            return 0;
6987
0
        }
6988
6989
0
        DCHECK_GT(rowset.txn_id(), 0)
6990
0
                << "txn_id=" << rowset.txn_id() << " rowset=" << rowset.ShortDebugString();
6991
6992
0
        if(!rowset.has_is_recycled() || !rowset.is_recycled()) {
6993
0
            return 0;
6994
0
        }
6995
6996
0
        if (!rowset.has_resource_id()) {
6997
0
            if (rowset.num_segments() > 0) [[unlikely]] { // impossible
6998
0
                return 0;
6999
0
            }
7000
0
            return 0;
7001
0
        }
7002
7003
0
        metrics_context.total_need_recycle_num++;
7004
0
        metrics_context.total_need_recycle_data_size += rowset.total_disk_size();
7005
0
        segment_metrics_context_.total_need_recycle_data_size += rowset.total_disk_size();
7006
0
        segment_metrics_context_.total_need_recycle_num += rowset.num_segments();
7007
0
        return 0;
7008
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler31scan_and_statistics_tmp_rowsetsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7009
0
    int ret = scan_and_recycle(tmp_rs_key0, tmp_rs_key1, std::move(handle_tmp_rowsets_kv));
7010
0
    metrics_context.report(true);
7011
0
    segment_metrics_context_.report(true);
7012
0
    return ret;
7013
0
}
7014
7015
// Scan and statistics abort_timeout_txn that need to be recycled
7016
0
int InstanceRecycler::scan_and_statistics_abort_timeout_txn() {
7017
0
    RecyclerMetricsContext metrics_context(instance_id_, "abort_timeout_txn");
7018
7019
0
    TxnRunningKeyInfo txn_running_key_info0 {instance_id_, 0, 0};
7020
0
    TxnRunningKeyInfo txn_running_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7021
0
    std::string begin_txn_running_key;
7022
0
    std::string end_txn_running_key;
7023
0
    txn_running_key(txn_running_key_info0, &begin_txn_running_key);
7024
0
    txn_running_key(txn_running_key_info1, &end_txn_running_key);
7025
7026
0
    int64_t current_time =
7027
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7028
7029
0
    auto handle_abort_timeout_txn_kv = [&metrics_context, &current_time, this](
7030
0
                                               std::string_view k, std::string_view v) -> int {
7031
0
        std::unique_ptr<Transaction> txn;
7032
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7033
0
        if (err != TxnErrorCode::TXN_OK) {
7034
0
            return 0;
7035
0
        }
7036
0
        std::string_view k1 = k;
7037
0
        k1.remove_prefix(1);
7038
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7039
0
        if (decode_key(&k1, &out) != 0) {
7040
0
            return 0;
7041
0
        }
7042
0
        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
7043
0
        int64_t txn_id = std::get<int64_t>(std::get<0>(out[4]));
7044
        // Update txn_info
7045
0
        std::string txn_inf_key, txn_inf_val;
7046
0
        txn_info_key({instance_id_, db_id, txn_id}, &txn_inf_key);
7047
0
        err = txn->get(txn_inf_key, &txn_inf_val);
7048
0
        if (err != TxnErrorCode::TXN_OK) {
7049
0
            return 0;
7050
0
        }
7051
0
        TxnInfoPB txn_info;
7052
0
        if (!txn_info.ParseFromString(txn_inf_val)) {
7053
0
            return 0;
7054
0
        }
7055
7056
0
        if (TxnStatusPB::TXN_STATUS_COMMITTED != txn_info.status()) {
7057
0
            TxnRunningPB txn_running_pb;
7058
0
            if (!txn_running_pb.ParseFromArray(v.data(), v.size())) {
7059
0
                return 0;
7060
0
            }
7061
0
            if (!config::force_immediate_recycle && txn_running_pb.timeout_time() > current_time) {
7062
0
                return 0;
7063
0
            }
7064
0
            metrics_context.total_need_recycle_num++;
7065
0
        }
7066
0
        return 0;
7067
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_abort_timeout_txnEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7068
7069
0
    int ret = scan_and_recycle(begin_txn_running_key, end_txn_running_key, std::move(handle_abort_timeout_txn_kv));
7070
0
    metrics_context.report(true);
7071
0
    return ret;
7072
0
}
7073
7074
// Scan and statistics expired_txn_label that need to be recycled
7075
0
int InstanceRecycler::scan_and_statistics_expired_txn_label() {
7076
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_txn_label");
7077
7078
0
    RecycleTxnKeyInfo recycle_txn_key_info0 {instance_id_, 0, 0};
7079
0
    RecycleTxnKeyInfo recycle_txn_key_info1 {instance_id_, INT64_MAX, INT64_MAX};
7080
0
    std::string begin_recycle_txn_key;
7081
0
    std::string end_recycle_txn_key;
7082
0
    recycle_txn_key(recycle_txn_key_info0, &begin_recycle_txn_key);
7083
0
    recycle_txn_key(recycle_txn_key_info1, &end_recycle_txn_key);
7084
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7085
0
    int64_t current_time_ms =
7086
0
            duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7087
7088
    // for calculate the total num or bytes of recyled objects
7089
0
    auto handle_expired_txn_label_kv = [&, this](std::string_view k, std::string_view v) -> int {
7090
0
        RecycleTxnPB recycle_txn_pb;
7091
0
        if (!recycle_txn_pb.ParseFromArray(v.data(), v.size())) {
7092
0
            return 0;
7093
0
        }
7094
0
        if ((config::force_immediate_recycle) ||
7095
0
            (recycle_txn_pb.has_immediate() && recycle_txn_pb.immediate()) ||
7096
0
            (calculate_txn_expired_time(instance_id_, recycle_txn_pb, &earlest_ts) <=
7097
0
             current_time_ms)) {
7098
0
            metrics_context.total_need_recycle_num++;
7099
0
        }
7100
0
        return 0;
7101
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler37scan_and_statistics_expired_txn_labelEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7102
7103
0
    int ret = scan_and_recycle(begin_recycle_txn_key, end_recycle_txn_key, std::move(handle_expired_txn_label_kv));
7104
0
    metrics_context.report(true);
7105
0
    return ret;
7106
0
}
7107
7108
// Scan and statistics copy_jobs that need to be recycled
7109
0
int InstanceRecycler::scan_and_statistics_copy_jobs() {
7110
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_copy_jobs");
7111
0
    CopyJobKeyInfo key_info0 {instance_id_, "", 0, "", 0};
7112
0
    CopyJobKeyInfo key_info1 {instance_id_, "\xff", 0, "", 0};
7113
0
    std::string key0;
7114
0
    std::string key1;
7115
0
    copy_job_key(key_info0, &key0);
7116
0
    copy_job_key(key_info1, &key1);
7117
7118
    // for calculate the total num or bytes of recyled objects
7119
0
    auto scan_and_statistics = [&metrics_context](std::string_view k, std::string_view v) -> int {
7120
0
        CopyJobPB copy_job;
7121
0
        if (!copy_job.ParseFromArray(v.data(), v.size())) {
7122
0
            LOG_WARNING("malformed copy job").tag("key", hex(k));
7123
0
            return 0;
7124
0
        }
7125
7126
0
        if (copy_job.job_status() == CopyJobPB::FINISH) {
7127
0
            if (copy_job.stage_type() == StagePB::EXTERNAL) {
7128
0
                int64_t current_time =
7129
0
                        duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7130
0
                if (copy_job.finish_time_ms() > 0) {
7131
0
                    if (!config::force_immediate_recycle &&
7132
0
                        current_time < copy_job.finish_time_ms() +
7133
0
                                               config::copy_job_max_retention_second * 1000) {
7134
0
                        return 0;
7135
0
                    }
7136
0
                } else {
7137
0
                    if (!config::force_immediate_recycle &&
7138
0
                        current_time < copy_job.start_time_ms() +
7139
0
                                               config::copy_job_max_retention_second * 1000) {
7140
0
                        return 0;
7141
0
                    }
7142
0
                }
7143
0
            }
7144
0
        } else if (copy_job.job_status() == CopyJobPB::LOADING) {
7145
0
            int64_t current_time =
7146
0
                    duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
7147
0
            if (!config::force_immediate_recycle && current_time <= copy_job.timeout_time_ms()) {
7148
0
                return 0;
7149
0
            }
7150
0
        }
7151
0
        metrics_context.total_need_recycle_num++;
7152
0
        return 0;
7153
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler29scan_and_statistics_copy_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7154
7155
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7156
0
    metrics_context.report(true);
7157
0
    return ret;
7158
0
}
7159
7160
// Scan and statistics stage that need to be recycled
7161
0
int InstanceRecycler::scan_and_statistics_stage() {
7162
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_stage");
7163
0
    RecycleStageKeyInfo key_info0 {instance_id_, ""};
7164
0
    RecycleStageKeyInfo key_info1 {instance_id_, "\xff"};
7165
0
    std::string key0 = recycle_stage_key(key_info0);
7166
0
    std::string key1 = recycle_stage_key(key_info1);
7167
7168
    // for calculate the total num or bytes of recyled objects
7169
0
    auto scan_and_statistics = [&metrics_context, this](std::string_view k,
7170
0
                                                        std::string_view v) -> int {
7171
0
        RecycleStagePB recycle_stage;
7172
0
        if (!recycle_stage.ParseFromArray(v.data(), v.size())) {
7173
0
            LOG_WARNING("malformed recycle stage").tag("key", hex(k));
7174
0
            return 0;
7175
0
        }
7176
7177
0
        int idx = stoi(recycle_stage.stage().obj_info().id());
7178
0
        if (idx > instance_info_.obj_info().size() || idx < 1) {
7179
0
            LOG(WARNING) << "invalid idx: " << idx;
7180
0
            return 0;
7181
0
        }
7182
7183
0
        std::shared_ptr<StorageVaultAccessor> accessor;
7184
0
        int ret = SYNC_POINT_HOOK_RETURN_VALUE(
7185
0
                [&] {
7186
0
                    auto& old_obj = instance_info_.obj_info()[idx - 1];
7187
0
                    auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7188
0
                    if (!s3_conf) {
7189
0
                        return 0;
7190
0
                    }
7191
7192
0
                    s3_conf->prefix = recycle_stage.stage().obj_info().prefix();
7193
0
                    std::shared_ptr<S3Accessor> s3_accessor;
7194
0
                    int ret = S3Accessor::create(std::move(s3_conf.value()), &s3_accessor);
7195
0
                    if (ret != 0) {
7196
0
                        return 0;
7197
0
                    }
7198
7199
0
                    accessor = std::move(s3_accessor);
7200
0
                    return 0;
7201
0
                }(),
7202
0
                "recycle_stage:get_accessor", &accessor);
7203
7204
0
        if (ret != 0) {
7205
0
            LOG(WARNING) << "failed to init accessor ret=" << ret;
7206
0
            return 0;
7207
0
        }
7208
7209
0
        metrics_context.total_need_recycle_num++;
7210
0
        return 0;
7211
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler25scan_and_statistics_stageEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7212
7213
0
    int ret = scan_and_recycle(key0, key1, std::move(scan_and_statistics));
7214
0
    metrics_context.report(true);
7215
0
    return ret;
7216
0
}
7217
7218
// Scan and statistics expired_stage_objects that need to be recycled
7219
0
int InstanceRecycler::scan_and_statistics_expired_stage_objects() {
7220
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_expired_stage_objects");
7221
7222
    // for calculate the total num or bytes of recyled objects
7223
0
    auto scan_and_statistics = [&metrics_context, this]() {
7224
0
        for (const auto& stage : instance_info_.stages()) {
7225
0
            if (stopped()) {
7226
0
                break;
7227
0
            }
7228
0
            if (stage.type() == StagePB::EXTERNAL) {
7229
0
                continue;
7230
0
            }
7231
0
            int idx = stoi(stage.obj_info().id());
7232
0
            if (idx > instance_info_.obj_info().size() || idx < 1) {
7233
0
                continue;
7234
0
            }
7235
0
            const auto& old_obj = instance_info_.obj_info()[idx - 1];
7236
0
            auto s3_conf = S3Conf::from_obj_store_info(old_obj);
7237
0
            if (!s3_conf) {
7238
0
                continue;
7239
0
            }
7240
0
            s3_conf->prefix = stage.obj_info().prefix();
7241
0
            std::shared_ptr<S3Accessor> accessor;
7242
0
            int ret1 = S3Accessor::create(*s3_conf, &accessor);
7243
0
            if (ret1 != 0) {
7244
0
                continue;
7245
0
            }
7246
0
            if (s3_conf->prefix.find("/stage/") == std::string::npos) {
7247
0
                continue;
7248
0
            }
7249
0
            metrics_context.total_need_recycle_num++;
7250
0
        }
7251
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler41scan_and_statistics_expired_stage_objectsEvENK3$_0clEv
7252
7253
0
    scan_and_statistics();
7254
0
    metrics_context.report(true);
7255
0
    return 0;
7256
0
}
7257
7258
// Scan and statistics versions that need to be recycled
7259
0
int InstanceRecycler::scan_and_statistics_versions() {
7260
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_versions");
7261
0
    auto version_key_begin = partition_version_key({instance_id_, 0, 0, 0});
7262
0
    auto version_key_end = partition_version_key({instance_id_, INT64_MAX, 0, 0});
7263
7264
0
    int64_t last_scanned_table_id = 0;
7265
0
    bool is_recycled = false; // Is last scanned kv recycled
7266
    // for calculate the total num or bytes of recyled objects
7267
0
    auto scan_and_statistics = [&metrics_context, &last_scanned_table_id, &is_recycled, this](
7268
0
                                       std::string_view k, std::string_view) {
7269
0
        auto k1 = k;
7270
0
        k1.remove_prefix(1);
7271
        // 0x01 "version" ${instance_id} "partition" ${db_id} ${tbl_id} ${partition_id}
7272
0
        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out;
7273
0
        decode_key(&k1, &out);
7274
0
        DCHECK_EQ(out.size(), 6) << k;
7275
0
        auto table_id = std::get<int64_t>(std::get<0>(out[4]));
7276
0
        if (table_id == last_scanned_table_id) { // Already handle kvs of this table
7277
0
            metrics_context.total_need_recycle_num +=
7278
0
                    is_recycled; // Version kv of this table has been recycled
7279
0
            return 0;
7280
0
        }
7281
0
        last_scanned_table_id = table_id;
7282
0
        is_recycled = false;
7283
0
        auto tablet_key_begin = stats_tablet_key({instance_id_, table_id, 0, 0, 0});
7284
0
        auto tablet_key_end = stats_tablet_key({instance_id_, table_id, INT64_MAX, 0, 0});
7285
0
        std::unique_ptr<Transaction> txn;
7286
0
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7287
0
        if (err != TxnErrorCode::TXN_OK) {
7288
0
            return 0;
7289
0
        }
7290
0
        std::unique_ptr<RangeGetIterator> iter;
7291
0
        err = txn->get(tablet_key_begin, tablet_key_end, &iter, false, 1);
7292
0
        if (err != TxnErrorCode::TXN_OK) {
7293
0
            return 0;
7294
0
        }
7295
0
        if (iter->has_next()) { // Table is useful, should not recycle table and partition versions
7296
0
            return 0;
7297
0
        }
7298
0
        metrics_context.total_need_recycle_num++;
7299
0
        is_recycled = true;
7300
0
        return 0;
7301
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler28scan_and_statistics_versionsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7302
7303
0
    int ret = scan_and_recycle(version_key_begin, version_key_end, std::move(scan_and_statistics));
7304
0
    metrics_context.report(true);
7305
0
    return ret;
7306
0
}
7307
7308
// Scan and statistics restore jobs that need to be recycled
7309
0
int InstanceRecycler::scan_and_statistics_restore_jobs() {
7310
0
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_restore_jobs");
7311
0
    JobRestoreTabletKeyInfo restore_job_key_info0 {instance_id_, 0};
7312
0
    JobRestoreTabletKeyInfo restore_job_key_info1 {instance_id_, INT64_MAX};
7313
0
    std::string restore_job_key0;
7314
0
    std::string restore_job_key1;
7315
0
    job_restore_tablet_key(restore_job_key_info0, &restore_job_key0);
7316
0
    job_restore_tablet_key(restore_job_key_info1, &restore_job_key1);
7317
7318
0
    int64_t earlest_ts = std::numeric_limits<int64_t>::max();
7319
7320
    // for calculate the total num or bytes of recyled objects
7321
0
    auto scan_and_statistics = [&](std::string_view k, std::string_view v) -> int {
7322
0
        RestoreJobCloudPB restore_job_pb;
7323
0
        if (!restore_job_pb.ParseFromArray(v.data(), v.size())) {
7324
0
            LOG_WARNING("malformed recycle partition value").tag("key", hex(k));
7325
0
            return 0;
7326
0
        }
7327
0
        int64_t expiration =
7328
0
                calculate_restore_job_expired_time(instance_id_, restore_job_pb, &earlest_ts);
7329
0
        int64_t current_time = ::time(nullptr);
7330
0
        if (current_time < expiration) { // not expired
7331
0
            return 0;
7332
0
        }
7333
0
        metrics_context.total_need_recycle_num++;
7334
0
        if(restore_job_pb.need_recycle_data()) {
7335
0
            scan_tablet_and_statistics(restore_job_pb.tablet_id(), metrics_context);
7336
0
        }
7337
0
        return 0;
7338
0
    };
Unexecuted instantiation: recycler.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
Unexecuted instantiation: recycler_test.cpp:_ZZN5doris5cloud16InstanceRecycler32scan_and_statistics_restore_jobsEvENK3$_0clESt17basic_string_viewIcSt11char_traitsIcEES6_
7339
7340
0
    int ret = scan_and_recycle(restore_job_key0, restore_job_key1, std::move(scan_and_statistics));
7341
0
    metrics_context.report(true);
7342
0
    return ret;
7343
0
}
7344
7345
3
void InstanceRecycler::scan_and_statistics_operation_logs() {
7346
3
    if (!should_recycle_versioned_keys()) {
7347
0
        return;
7348
0
    }
7349
7350
3
    RecyclerMetricsContext metrics_context(instance_id_, "recycle_operation_logs");
7351
7352
3
    OperationLogRecycleChecker recycle_checker(instance_id_, txn_kv_.get(), instance_info_);
7353
3
    if (recycle_checker.init() != 0) {
7354
0
        return;
7355
0
    }
7356
7357
3
    std::string log_key_prefix = versioned::log_key(instance_id_);
7358
3
    std::string begin_key = encode_versioned_key(log_key_prefix, Versionstamp::min());
7359
3
    std::string end_key = encode_versioned_key(log_key_prefix, Versionstamp::max());
7360
7361
3
    std::unique_ptr<BlobIterator> iter = blob_get_range(txn_kv_, begin_key, end_key);
7362
8
    for (; iter->valid(); iter->next()) {
7363
5
        OperationLogPB operation_log;
7364
5
        if (!iter->parse_value(&operation_log)) {
7365
0
            continue;
7366
0
        }
7367
7368
5
        std::string_view key = iter->key();
7369
5
        Versionstamp log_versionstamp;
7370
5
        if (!decode_versioned_key(&key, &log_versionstamp)) {
7371
0
            continue;
7372
0
        }
7373
7374
5
        OperationLogReferenceInfo ref_info;
7375
5
        if (recycle_checker.can_recycle(log_versionstamp, operation_log.min_timestamp(),
7376
5
                                         &ref_info)) {
7377
4
            metrics_context.total_need_recycle_num++;
7378
4
            metrics_context.total_need_recycle_data_size += operation_log.ByteSizeLong();
7379
4
        }
7380
5
    }
7381
7382
3
    metrics_context.report(true);
7383
3
}
7384
7385
int InstanceRecycler::classify_rowset_task_by_ref_count(
7386
60
        RowsetDeleteTask& task, std::vector<RowsetDeleteTask>& batch_delete_tasks) {
7387
60
    constexpr int MAX_RETRY = 10;
7388
60
    const auto& rowset_meta = task.rowset_meta;
7389
60
    int64_t tablet_id = rowset_meta.tablet_id();
7390
60
    const std::string& rowset_id = rowset_meta.rowset_id_v2();
7391
60
    std::string_view reference_instance_id = instance_id_;
7392
60
    if (rowset_meta.has_reference_instance_id()) {
7393
5
        reference_instance_id = rowset_meta.reference_instance_id();
7394
5
    }
7395
7396
61
    for (int i = 0; i < MAX_RETRY; ++i) {
7397
61
        std::unique_ptr<Transaction> txn;
7398
61
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7399
61
        if (err != TxnErrorCode::TXN_OK) {
7400
0
            LOG_WARNING("failed to create txn when classifying rowset task")
7401
0
                    .tag("instance_id", instance_id_)
7402
0
                    .tag("tablet_id", tablet_id)
7403
0
                    .tag("rowset_id", rowset_id)
7404
0
                    .tag("err", err);
7405
0
            return -1;
7406
0
        }
7407
7408
61
        std::string rowset_ref_count_key =
7409
61
                versioned::data_rowset_ref_count_key({reference_instance_id, tablet_id, rowset_id});
7410
61
        task.rowset_ref_count_key = rowset_ref_count_key;
7411
7412
61
        int64_t ref_count = 0;
7413
61
        {
7414
61
            std::string value;
7415
61
            TxnErrorCode err = txn->get(rowset_ref_count_key, &value);
7416
61
            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
7417
0
                ref_count = 1;
7418
61
            } else if (err != TxnErrorCode::TXN_OK) {
7419
0
                LOG_WARNING("failed to get rowset ref count key when classifying")
7420
0
                        .tag("instance_id", instance_id_)
7421
0
                        .tag("tablet_id", tablet_id)
7422
0
                        .tag("rowset_id", rowset_id)
7423
0
                        .tag("err", err);
7424
0
                return -1;
7425
61
            } else if (!txn->decode_atomic_int(value, &ref_count)) {
7426
0
                LOG_WARNING("failed to decode rowset data ref count when classifying")
7427
0
                        .tag("instance_id", instance_id_)
7428
0
                        .tag("tablet_id", tablet_id)
7429
0
                        .tag("rowset_id", rowset_id)
7430
0
                        .tag("value", hex(value));
7431
0
                return -1;
7432
0
            }
7433
61
        }
7434
7435
61
        if (ref_count > 1) {
7436
            // ref_count > 1: decrement count, remove recycle keys, don't add to batch delete
7437
12
            txn->atomic_add(rowset_ref_count_key, -1);
7438
12
            LOG_INFO("decrease rowset data ref count in classification phase")
7439
12
                    .tag("instance_id", instance_id_)
7440
12
                    .tag("tablet_id", tablet_id)
7441
12
                    .tag("rowset_id", rowset_id)
7442
12
                    .tag("ref_count", ref_count - 1)
7443
12
                    .tag("ref_count_key", hex(rowset_ref_count_key));
7444
7445
12
            if (!task.recycle_rowset_key.empty()) {
7446
0
                txn->remove(task.recycle_rowset_key);
7447
0
                LOG_INFO("remove recycle rowset key in classification phase")
7448
0
                        .tag("key", hex(task.recycle_rowset_key));
7449
0
            }
7450
12
            if (!task.non_versioned_rowset_key.empty()) {
7451
12
                txn->remove(task.non_versioned_rowset_key);
7452
12
                LOG_INFO("remove non versioned rowset key in classification phase")
7453
12
                        .tag("key", hex(task.non_versioned_rowset_key));
7454
12
            }
7455
7456
12
            err = txn->commit();
7457
12
            if (err == TxnErrorCode::TXN_CONFLICT) {
7458
1
                VLOG_DEBUG << "decrease rowset ref count but txn conflict in classification, retry"
7459
0
                           << " tablet_id=" << tablet_id << " rowset_id=" << rowset_id
7460
0
                           << ", ref_count=" << ref_count << ", retry=" << i;
7461
1
                std::this_thread::sleep_for(std::chrono::milliseconds(500));
7462
1
                continue;
7463
11
            } else if (err != TxnErrorCode::TXN_OK) {
7464
0
                LOG_WARNING("failed to commit txn when classifying rowset task")
7465
0
                        .tag("instance_id", instance_id_)
7466
0
                        .tag("tablet_id", tablet_id)
7467
0
                        .tag("rowset_id", rowset_id)
7468
0
                        .tag("err", err);
7469
0
                return -1;
7470
0
            }
7471
11
            return 1; // handled, not added to batch delete
7472
49
        } else {
7473
            // ref_count == 1: Add to batch delete plan without modifying any KV.
7474
            // Keep recycle_rowset_key as "pending recycle" marker until data is actually deleted.
7475
49
            LOG_INFO("add rowset to batch delete plan")
7476
49
                    .tag("instance_id", instance_id_)
7477
49
                    .tag("tablet_id", tablet_id)
7478
49
                    .tag("rowset_id", rowset_id)
7479
49
                    .tag("resource_id", rowset_meta.resource_id())
7480
49
                    .tag("ref_count", ref_count);
7481
7482
49
            batch_delete_tasks.push_back(std::move(task));
7483
49
            return 0; // added to batch delete
7484
49
        }
7485
61
    }
7486
7487
0
    LOG_WARNING("failed to classify rowset task after retry")
7488
0
            .tag("instance_id", instance_id_)
7489
0
            .tag("tablet_id", tablet_id)
7490
0
            .tag("rowset_id", rowset_id)
7491
0
            .tag("retry", MAX_RETRY);
7492
0
    return -1;
7493
60
}
7494
7495
10
int InstanceRecycler::cleanup_rowset_metadata(const std::vector<RowsetDeleteTask>& tasks) {
7496
10
    int ret = 0;
7497
49
    for (const auto& task : tasks) {
7498
49
        int64_t tablet_id = task.rowset_meta.tablet_id();
7499
49
        const std::string& rowset_id = task.rowset_meta.rowset_id_v2();
7500
7501
        // Note: decrement_packed_file_ref_counts is already called in delete_rowset_data,
7502
        // so we don't need to call it again here.
7503
7504
        // Remove all metadata keys in one transaction
7505
49
        std::unique_ptr<Transaction> txn;
7506
49
        TxnErrorCode err = txn_kv_->create_txn(&txn);
7507
49
        if (err != TxnErrorCode::TXN_OK) {
7508
0
            LOG_WARNING("failed to create txn when cleaning up metadata")
7509
0
                    .tag("instance_id", instance_id_)
7510
0
                    .tag("tablet_id", tablet_id)
7511
0
                    .tag("rowset_id", rowset_id)
7512
0
                    .tag("err", err);
7513
0
            ret = -1;
7514
0
            continue;
7515
0
        }
7516
7517
49
        std::string_view reference_instance_id = instance_id_;
7518
49
        if (task.rowset_meta.has_reference_instance_id()) {
7519
5
            reference_instance_id = task.rowset_meta.reference_instance_id();
7520
5
        }
7521
7522
49
        txn->remove(task.rowset_ref_count_key);
7523
49
        LOG_INFO("delete rowset data ref count key in cleanup phase")
7524
49
                .tag("instance_id", instance_id_)
7525
49
                .tag("tablet_id", tablet_id)
7526
49
                .tag("rowset_id", rowset_id)
7527
49
                .tag("ref_count_key", hex(task.rowset_ref_count_key));
7528
7529
49
        std::string dbm_start_key =
7530
49
                meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id, 0, 0});
7531
49
        std::string dbm_end_key = meta_delete_bitmap_key(
7532
49
                {reference_instance_id, tablet_id, rowset_id,
7533
49
                 std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()});
7534
49
        txn->remove(dbm_start_key, dbm_end_key);
7535
49
        LOG_INFO("remove delete bitmap kv in cleanup phase")
7536
49
                .tag("instance_id", instance_id_)
7537
49
                .tag("tablet_id", tablet_id)
7538
49
                .tag("rowset_id", rowset_id)
7539
49
                .tag("begin", hex(dbm_start_key))
7540
49
                .tag("end", hex(dbm_end_key));
7541
7542
49
        std::string versioned_dbm_start_key =
7543
49
                versioned::meta_delete_bitmap_key({reference_instance_id, tablet_id, rowset_id});
7544
49
        std::string versioned_dbm_end_key = versioned_dbm_start_key;
7545
49
        encode_int64(INT64_MAX, &versioned_dbm_end_key);
7546
49
        txn->remove(versioned_dbm_start_key, versioned_dbm_end_key);
7547
49
        LOG_INFO("remove versioned delete bitmap kv in cleanup phase")
7548
49
                .tag("instance_id", instance_id_)
7549
49
                .tag("tablet_id", tablet_id)
7550
49
                .tag("rowset_id", rowset_id)
7551
49
                .tag("begin", hex(versioned_dbm_start_key))
7552
49
                .tag("end", hex(versioned_dbm_end_key));
7553
7554
        // Remove versioned meta rowset key
7555
49
        if (!task.versioned_rowset_key.empty()) {
7556
49
            versioned::document_remove<RowsetMetaCloudPB>(
7557
49
                txn.get(), task.versioned_rowset_key, task.versionstamp);
7558
49
            LOG_INFO("remove versioned meta rowset key in cleanup phase")
7559
49
                    .tag("instance_id", instance_id_)
7560
49
                    .tag("tablet_id", tablet_id)
7561
49
                    .tag("rowset_id", rowset_id)
7562
49
                    .tag("key_prefix", hex(task.versioned_rowset_key));
7563
49
        }
7564
7565
49
        if (!task.non_versioned_rowset_key.empty()) {
7566
49
            txn->remove(task.non_versioned_rowset_key);
7567
49
            LOG_INFO("remove non versioned rowset key in cleanup phase")
7568
49
                    .tag("instance_id", instance_id_)
7569
49
                    .tag("tablet_id", tablet_id)
7570
49
                    .tag("rowset_id", rowset_id)
7571
49
                    .tag("key", hex(task.non_versioned_rowset_key));
7572
49
        }
7573
7574
        // Remove recycle_rowset_key last to ensure retry safety:
7575
        // if cleanup fails, this key remains and triggers next round retry.
7576
49
        if (!task.recycle_rowset_key.empty()) {
7577
0
            txn->remove(task.recycle_rowset_key);
7578
0
            LOG_INFO("remove recycle rowset key in cleanup phase")
7579
0
                    .tag("instance_id", instance_id_)
7580
0
                    .tag("tablet_id", tablet_id)
7581
0
                    .tag("rowset_id", rowset_id)
7582
0
                    .tag("key", hex(task.recycle_rowset_key));
7583
0
        }
7584
7585
49
        err = txn->commit();
7586
49
        if (err != TxnErrorCode::TXN_OK) {
7587
            // Metadata cleanup failed. recycle_rowset_key remains, next round will retry.
7588
0
            LOG_WARNING("failed to commit cleanup metadata txn, will retry next round")
7589
0
                    .tag("instance_id", instance_id_)
7590
0
                    .tag("tablet_id", tablet_id)
7591
0
                    .tag("rowset_id", rowset_id)
7592
0
                    .tag("err", err);
7593
0
            ret = -1;
7594
0
            continue;
7595
0
        }
7596
7597
49
        LOG_INFO("cleanup rowset metadata success")
7598
49
                .tag("instance_id", instance_id_)
7599
49
                .tag("tablet_id", tablet_id)
7600
49
                .tag("rowset_id", rowset_id);
7601
49
    }
7602
10
    return ret;
7603
10
}
7604
7605
} // namespace doris::cloud